Annotation of XML/parser.c, revision 1.190
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.188 daniel 49: const char *xmlParserVersion = LIBXML_VERSION_STRING;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.189 daniel 184: if (in->buf->readcallback != NULL)
1.140 daniel 185: ret = xmlParserInputBufferGrow(in->buf, len);
186: else
187: return(0);
1.135 daniel 188:
189: /*
190: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
191: * block, but we use it really as an integer to do some
192: * pointer arithmetic. Insure will raise it as a bug but in
193: * that specific case, that's not !
194: */
1.91 daniel 195: if (in->base != in->buf->buffer->content) {
196: /*
197: * the buffer has been realloced
198: */
199: index = in->cur - in->base;
200: in->base = in->buf->buffer->content;
201: in->cur = &in->buf->buffer->content[index];
202: }
203:
204: CHECK_BUFFER(in);
205:
206: return(ret);
207: }
208:
209: /**
210: * xmlParserInputShrink:
211: * @in: an XML parser input
212: *
213: * This function removes used input for the parser.
214: */
215: void
216: xmlParserInputShrink(xmlParserInputPtr in) {
217: int used;
218: int ret;
219: int index;
220:
221: #ifdef DEBUG_INPUT
222: fprintf(stderr, "Shrink\n");
223: #endif
224: if (in->buf == NULL) return;
225: if (in->base == NULL) return;
226: if (in->cur == NULL) return;
227: if (in->buf->buffer == NULL) return;
228:
229: CHECK_BUFFER(in);
230:
231: used = in->cur - in->buf->buffer->content;
232: if (used > INPUT_CHUNK) {
1.110 daniel 233: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 234: if (ret > 0) {
235: in->cur -= ret;
236: in->consumed += ret;
237: }
238: }
239:
240: CHECK_BUFFER(in);
241:
242: if (in->buf->buffer->use > INPUT_CHUNK) {
243: return;
244: }
245: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
246: if (in->base != in->buf->buffer->content) {
247: /*
248: * the buffer has been realloced
249: */
250: index = in->cur - in->base;
251: in->base = in->buf->buffer->content;
252: in->cur = &in->buf->buffer->content[index];
253: }
254:
255: CHECK_BUFFER(in);
256: }
257:
1.45 daniel 258: /************************************************************************
259: * *
260: * Parser stacks related functions and macros *
261: * *
262: ************************************************************************/
1.79 daniel 263:
264: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 265: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 266: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 267: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
268: const xmlChar ** str);
1.79 daniel 269:
1.1 veillard 270: /*
1.40 daniel 271: * Generic function for accessing stacks in the Parser Context
1.1 veillard 272: */
273:
1.140 daniel 274: #define PUSH_AND_POP(scope, type, name) \
275: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 276: if (ctxt->name##Nr >= ctxt->name##Max) { \
277: ctxt->name##Max *= 2; \
1.119 daniel 278: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 279: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
280: if (ctxt->name##Tab == NULL) { \
1.31 daniel 281: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 282: return(0); \
1.31 daniel 283: } \
284: } \
1.40 daniel 285: ctxt->name##Tab[ctxt->name##Nr] = value; \
286: ctxt->name = value; \
287: return(ctxt->name##Nr++); \
1.31 daniel 288: } \
1.140 daniel 289: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 290: type ret; \
1.40 daniel 291: if (ctxt->name##Nr <= 0) return(0); \
292: ctxt->name##Nr--; \
1.50 daniel 293: if (ctxt->name##Nr > 0) \
294: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
295: else \
296: ctxt->name = NULL; \
1.69 daniel 297: ret = ctxt->name##Tab[ctxt->name##Nr]; \
298: ctxt->name##Tab[ctxt->name##Nr] = 0; \
299: return(ret); \
1.31 daniel 300: } \
301:
1.140 daniel 302: PUSH_AND_POP(extern, xmlParserInputPtr, input)
303: PUSH_AND_POP(extern, xmlNodePtr, node)
304: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 305:
1.176 daniel 306: int spacePush(xmlParserCtxtPtr ctxt, int val) {
307: if (ctxt->spaceNr >= ctxt->spaceMax) {
308: ctxt->spaceMax *= 2;
309: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
310: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
311: if (ctxt->spaceTab == NULL) {
312: fprintf(stderr, "realloc failed !\n");
313: return(0);
314: }
315: }
316: ctxt->spaceTab[ctxt->spaceNr] = val;
317: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
318: return(ctxt->spaceNr++);
319: }
320:
321: int spacePop(xmlParserCtxtPtr ctxt) {
322: int ret;
323: if (ctxt->spaceNr <= 0) return(0);
324: ctxt->spaceNr--;
325: if (ctxt->spaceNr > 0)
326: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
327: else
328: ctxt->space = NULL;
329: ret = ctxt->spaceTab[ctxt->spaceNr];
330: ctxt->spaceTab[ctxt->spaceNr] = -1;
331: return(ret);
332: }
333:
1.55 daniel 334: /*
335: * Macros for accessing the content. Those should be used only by the parser,
336: * and not exported.
337: *
338: * Dirty macros, i.e. one need to make assumption on the context to use them
339: *
1.123 daniel 340: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 341: * To be used with extreme caution since operations consuming
342: * characters may move the input buffer to a different location !
1.123 daniel 343: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 344: * in ISO-Latin or UTF-8.
1.151 daniel 345: * This should be used internally by the parser
1.55 daniel 346: * only to compare to ASCII values otherwise it would break when
347: * running with UTF-8 encoding.
1.123 daniel 348: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 349: * to compare on ASCII based substring.
1.123 daniel 350: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 351: * strings within the parser.
352: *
1.77 daniel 353: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 354: *
355: * NEXT Skip to the next character, this does the proper decoding
356: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 357: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 358: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 359: */
1.45 daniel 360:
1.152 daniel 361: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 362: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 363: #define NXT(val) ctxt->input->cur[(val)]
364: #define CUR_PTR ctxt->input->cur
1.154 daniel 365:
1.164 daniel 366: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
367: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 368: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
369: if ((*ctxt->input->cur == 0) && \
370: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
371: xmlPopInput(ctxt)
1.164 daniel 372:
1.97 daniel 373: #define SHRINK xmlParserInputShrink(ctxt->input); \
374: if ((*ctxt->input->cur == 0) && \
375: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
376: xmlPopInput(ctxt)
377:
378: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379: if ((*ctxt->input->cur == 0) && \
380: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
381: xmlPopInput(ctxt)
1.55 daniel 382:
1.155 daniel 383: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 384:
1.151 daniel 385: #define NEXT xmlNextChar(ctxt);
1.154 daniel 386:
1.153 daniel 387: #define NEXTL(l) \
388: if (*(ctxt->input->cur) == '\n') { \
389: ctxt->input->line++; ctxt->input->col = 1; \
390: } else ctxt->input->col++; \
1.154 daniel 391: ctxt->token = 0; ctxt->input->cur += l; \
392: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
393: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
394:
1.152 daniel 395: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 396: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 397:
1.152 daniel 398: #define COPY_BUF(l,b,i,v) \
399: if (l == 1) b[i++] = (xmlChar) v; \
400: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 401:
402: /**
403: * xmlNextChar:
404: * @ctxt: the XML parser context
405: *
406: * Skip to the next char input char.
407: */
1.55 daniel 408:
1.151 daniel 409: void
410: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.176 daniel 411: /*
412: * TODO: 2.11 End-of-Line Handling
413: * the literal two-character sequence "#xD#xA" or a standalone
414: * literal #xD, an XML processor must pass to the application
415: * the single character #xA.
416: */
1.151 daniel 417: if (ctxt->token != 0) ctxt->token = 0;
418: else {
419: if ((*ctxt->input->cur == 0) &&
420: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
421: (ctxt->instate != XML_PARSER_COMMENT)) {
422: /*
423: * If we are at the end of the current entity and
424: * the context allows it, we pop consumed entities
425: * automatically.
426: * TODO: the auto closing should be blocked in other cases
427: */
428: xmlPopInput(ctxt);
429: } else {
430: if (*(ctxt->input->cur) == '\n') {
431: ctxt->input->line++; ctxt->input->col = 1;
432: } else ctxt->input->col++;
433: if (ctxt->encoding == NULL) {
434: /*
435: * We are supposed to handle UTF8, check it's valid
436: * From rfc2044: encoding of the Unicode values on UTF-8:
437: *
438: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
439: * 0000 0000-0000 007F 0xxxxxxx
440: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
441: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
442: *
1.160 daniel 443: * Check for the 0x110000 limit too
1.151 daniel 444: */
445: const unsigned char *cur = ctxt->input->cur;
446: unsigned char c;
1.91 daniel 447:
1.151 daniel 448: c = *cur;
449: if (c & 0x80) {
450: if (cur[1] == 0)
451: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
452: if ((cur[1] & 0xc0) != 0x80)
453: goto encoding_error;
454: if ((c & 0xe0) == 0xe0) {
455: unsigned int val;
456:
457: if (cur[2] == 0)
458: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
459: if ((cur[2] & 0xc0) != 0x80)
460: goto encoding_error;
461: if ((c & 0xf0) == 0xf0) {
462: if (cur[3] == 0)
463: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
464: if (((c & 0xf8) != 0xf0) ||
465: ((cur[3] & 0xc0) != 0x80))
466: goto encoding_error;
467: /* 4-byte code */
468: ctxt->input->cur += 4;
469: val = (cur[0] & 0x7) << 18;
470: val |= (cur[1] & 0x3f) << 12;
471: val |= (cur[2] & 0x3f) << 6;
472: val |= cur[3] & 0x3f;
473: } else {
474: /* 3-byte code */
475: ctxt->input->cur += 3;
476: val = (cur[0] & 0xf) << 12;
477: val |= (cur[1] & 0x3f) << 6;
478: val |= cur[2] & 0x3f;
479: }
480: if (((val > 0xd7ff) && (val < 0xe000)) ||
481: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 482: (val >= 0x110000)) {
1.151 daniel 483: if ((ctxt->sax != NULL) &&
484: (ctxt->sax->error != NULL))
485: ctxt->sax->error(ctxt->userData,
486: "Char out of allowed range\n");
487: ctxt->errNo = XML_ERR_INVALID_ENCODING;
488: ctxt->wellFormed = 0;
1.180 daniel 489: ctxt->disableSAX = 1;
1.151 daniel 490: }
491: } else
492: /* 2-byte code */
493: ctxt->input->cur += 2;
494: } else
495: /* 1-byte code */
496: ctxt->input->cur++;
497: } else {
498: /*
499: * Assume it's a fixed lenght encoding (1) with
500: * a compatibke encoding for the ASCII set, since
501: * XML constructs only use < 128 chars
502: */
503: ctxt->input->cur++;
504: }
505: ctxt->nbChars++;
506: if (*ctxt->input->cur == 0)
507: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
508: }
509: }
1.154 daniel 510: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
511: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 512: if ((*ctxt->input->cur == 0) &&
513: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
514: xmlPopInput(ctxt);
1.151 daniel 515: return;
516: encoding_error:
517: /*
518: * If we detect an UTF8 error that probably mean that the
519: * input encoding didn't get properly advertized in the
520: * declaration header. Report the error and switch the encoding
521: * to ISO-Latin-1 (if you don't like this policy, just declare the
522: * encoding !)
523: */
524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
525: ctxt->sax->error(ctxt->userData,
526: "Input is not proper UTF-8, indicate encoding !\n");
527: ctxt->errNo = XML_ERR_INVALID_ENCODING;
528:
529: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
530: ctxt->input->cur++;
531: return;
532: }
1.42 daniel 533:
1.152 daniel 534: /**
535: * xmlCurrentChar:
536: * @ctxt: the XML parser context
537: * @len: pointer to the length of the char read
538: *
539: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 540: * bytes in the input buffer. Implement the end of line normalization:
541: * 2.11 End-of-Line Handling
542: * Wherever an external parsed entity or the literal entity value
543: * of an internal parsed entity contains either the literal two-character
544: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
545: * must pass to the application the single character #xA.
546: * This behavior can conveniently be produced by normalizing all
547: * line breaks to #xA on input, before parsing.)
1.152 daniel 548: *
549: * Returns the current char value and its lenght
550: */
551:
552: int
553: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
554: if (ctxt->token != 0) {
555: *len = 0;
556: return(ctxt->token);
557: }
558: if (ctxt->encoding == NULL) {
559: /*
560: * We are supposed to handle UTF8, check it's valid
561: * From rfc2044: encoding of the Unicode values on UTF-8:
562: *
563: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
564: * 0000 0000-0000 007F 0xxxxxxx
565: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
566: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
567: *
1.160 daniel 568: * Check for the 0x110000 limit too
1.152 daniel 569: */
570: const unsigned char *cur = ctxt->input->cur;
571: unsigned char c;
572: unsigned int val;
573:
574: c = *cur;
575: if (c & 0x80) {
576: if (cur[1] == 0)
577: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
578: if ((cur[1] & 0xc0) != 0x80)
579: goto encoding_error;
580: if ((c & 0xe0) == 0xe0) {
581:
582: if (cur[2] == 0)
583: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
584: if ((cur[2] & 0xc0) != 0x80)
585: goto encoding_error;
586: if ((c & 0xf0) == 0xf0) {
587: if (cur[3] == 0)
588: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
589: if (((c & 0xf8) != 0xf0) ||
590: ((cur[3] & 0xc0) != 0x80))
591: goto encoding_error;
592: /* 4-byte code */
593: *len = 4;
594: val = (cur[0] & 0x7) << 18;
595: val |= (cur[1] & 0x3f) << 12;
596: val |= (cur[2] & 0x3f) << 6;
597: val |= cur[3] & 0x3f;
598: } else {
599: /* 3-byte code */
600: *len = 3;
601: val = (cur[0] & 0xf) << 12;
602: val |= (cur[1] & 0x3f) << 6;
603: val |= cur[2] & 0x3f;
604: }
605: } else {
606: /* 2-byte code */
607: *len = 2;
608: val = (cur[0] & 0x1f) << 6;
1.168 daniel 609: val |= cur[1] & 0x3f;
1.152 daniel 610: }
611: if (!IS_CHAR(val)) {
612: if ((ctxt->sax != NULL) &&
613: (ctxt->sax->error != NULL))
614: ctxt->sax->error(ctxt->userData,
615: "Char out of allowed range\n");
616: ctxt->errNo = XML_ERR_INVALID_ENCODING;
617: ctxt->wellFormed = 0;
1.180 daniel 618: ctxt->disableSAX = 1;
1.152 daniel 619: }
620: return(val);
621: } else {
622: /* 1-byte code */
623: *len = 1;
1.180 daniel 624: if (*ctxt->input->cur == 0xD) {
625: if (ctxt->input->cur[1] == 0xA) {
626: ctxt->nbChars++;
627: ctxt->input->cur++;
628: }
629: return(0xA);
630: }
1.152 daniel 631: return((int) *ctxt->input->cur);
632: }
633: }
634: /*
635: * Assume it's a fixed lenght encoding (1) with
636: * a compatibke encoding for the ASCII set, since
637: * XML constructs only use < 128 chars
638: */
639: *len = 1;
1.180 daniel 640: if (*ctxt->input->cur == 0xD) {
641: if (ctxt->input->cur[1] == 0xA) {
642: ctxt->nbChars++;
643: ctxt->input->cur++;
644: }
645: return(0xA);
646: }
1.152 daniel 647: return((int) *ctxt->input->cur);
648: encoding_error:
649: /*
650: * If we detect an UTF8 error that probably mean that the
651: * input encoding didn't get properly advertized in the
652: * declaration header. Report the error and switch the encoding
653: * to ISO-Latin-1 (if you don't like this policy, just declare the
654: * encoding !)
655: */
656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657: ctxt->sax->error(ctxt->userData,
658: "Input is not proper UTF-8, indicate encoding !\n");
659: ctxt->errNo = XML_ERR_INVALID_ENCODING;
660:
661: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
662: *len = 1;
663: return((int) *ctxt->input->cur);
664: }
665:
666: /**
1.162 daniel 667: * xmlStringCurrentChar:
668: * @ctxt: the XML parser context
669: * @cur: pointer to the beginning of the char
670: * @len: pointer to the length of the char read
671: *
672: * The current char value, if using UTF-8 this may actaully span multiple
673: * bytes in the input buffer.
674: *
675: * Returns the current char value and its lenght
676: */
677:
678: int
679: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
680: if (ctxt->encoding == NULL) {
681: /*
682: * We are supposed to handle UTF8, check it's valid
683: * From rfc2044: encoding of the Unicode values on UTF-8:
684: *
685: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
686: * 0000 0000-0000 007F 0xxxxxxx
687: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
688: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
689: *
690: * Check for the 0x110000 limit too
691: */
692: unsigned char c;
693: unsigned int val;
694:
695: c = *cur;
696: if (c & 0x80) {
697: if ((cur[1] & 0xc0) != 0x80)
698: goto encoding_error;
699: if ((c & 0xe0) == 0xe0) {
700:
701: if ((cur[2] & 0xc0) != 0x80)
702: goto encoding_error;
703: if ((c & 0xf0) == 0xf0) {
704: if (((c & 0xf8) != 0xf0) ||
705: ((cur[3] & 0xc0) != 0x80))
706: goto encoding_error;
707: /* 4-byte code */
708: *len = 4;
709: val = (cur[0] & 0x7) << 18;
710: val |= (cur[1] & 0x3f) << 12;
711: val |= (cur[2] & 0x3f) << 6;
712: val |= cur[3] & 0x3f;
713: } else {
714: /* 3-byte code */
715: *len = 3;
716: val = (cur[0] & 0xf) << 12;
717: val |= (cur[1] & 0x3f) << 6;
718: val |= cur[2] & 0x3f;
719: }
720: } else {
721: /* 2-byte code */
722: *len = 2;
723: val = (cur[0] & 0x1f) << 6;
724: val |= cur[2] & 0x3f;
725: }
726: if (!IS_CHAR(val)) {
727: if ((ctxt->sax != NULL) &&
728: (ctxt->sax->error != NULL))
729: ctxt->sax->error(ctxt->userData,
730: "Char out of allowed range\n");
731: ctxt->errNo = XML_ERR_INVALID_ENCODING;
732: ctxt->wellFormed = 0;
1.180 daniel 733: ctxt->disableSAX = 1;
1.162 daniel 734: }
735: return(val);
736: } else {
737: /* 1-byte code */
738: *len = 1;
739: return((int) *cur);
740: }
741: }
742: /*
743: * Assume it's a fixed lenght encoding (1) with
744: * a compatibke encoding for the ASCII set, since
745: * XML constructs only use < 128 chars
746: */
747: *len = 1;
748: return((int) *cur);
749: encoding_error:
750: /*
751: * If we detect an UTF8 error that probably mean that the
752: * input encoding didn't get properly advertized in the
753: * declaration header. Report the error and switch the encoding
754: * to ISO-Latin-1 (if you don't like this policy, just declare the
755: * encoding !)
756: */
757: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
758: ctxt->sax->error(ctxt->userData,
759: "Input is not proper UTF-8, indicate encoding !\n");
760: ctxt->errNo = XML_ERR_INVALID_ENCODING;
761:
762: *len = 1;
763: return((int) *cur);
764: }
765:
766: /**
1.152 daniel 767: * xmlCopyChar:
768: * @len: pointer to the length of the char read (or zero)
769: * @array: pointer to an arry of xmlChar
770: * @val: the char value
771: *
772: * append the char value in the array
773: *
774: * Returns the number of xmlChar written
775: */
776:
777: int
778: xmlCopyChar(int len, xmlChar *out, int val) {
779: /*
780: * We are supposed to handle UTF8, check it's valid
781: * From rfc2044: encoding of the Unicode values on UTF-8:
782: *
783: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
784: * 0000 0000-0000 007F 0xxxxxxx
785: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
786: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
787: */
788: if (len == 0) {
789: if (val < 0) len = 0;
1.160 daniel 790: else if (val < 0x80) len = 1;
791: else if (val < 0x800) len = 2;
792: else if (val < 0x10000) len = 3;
793: else if (val < 0x110000) len = 4;
1.152 daniel 794: if (len == 0) {
795: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
796: val);
797: return(0);
798: }
799: }
800: if (len > 1) {
801: int bits;
802:
803: if (val < 0x80) { *out++= val; bits= -6; }
804: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
805: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
806: else { *out++= (val >> 18) | 0xF0; bits= 12; }
807:
808: for ( ; bits >= 0; bits-= 6)
809: *out++= ((val >> bits) & 0x3F) | 0x80 ;
810:
811: return(len);
812: }
813: *out = (xmlChar) val;
814: return(1);
1.155 daniel 815: }
816:
817: /**
818: * xmlSkipBlankChars:
819: * @ctxt: the XML parser context
820: *
821: * skip all blanks character found at that point in the input streams.
822: * It pops up finished entities in the process if allowable at that point.
823: *
824: * Returns the number of space chars skipped
825: */
826:
827: int
828: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
829: int cur, res = 0;
830:
831: do {
832: cur = CUR;
833: while (IS_BLANK(cur)) {
834: NEXT;
835: cur = CUR;
836: res++;
837: }
838: while ((cur == 0) && (ctxt->inputNr > 1) &&
839: (ctxt->instate != XML_PARSER_COMMENT)) {
840: xmlPopInput(ctxt);
841: cur = CUR;
842: }
843: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
844: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
845: } while (IS_BLANK(cur));
846: return(res);
1.152 daniel 847: }
848:
1.97 daniel 849: /************************************************************************
850: * *
851: * Commodity functions to handle entities processing *
852: * *
853: ************************************************************************/
1.40 daniel 854:
1.50 daniel 855: /**
856: * xmlPopInput:
857: * @ctxt: an XML parser context
858: *
1.40 daniel 859: * xmlPopInput: the current input pointed by ctxt->input came to an end
860: * pop it and return the next char.
1.45 daniel 861: *
1.123 daniel 862: * Returns the current xmlChar in the parser context
1.40 daniel 863: */
1.123 daniel 864: xmlChar
1.55 daniel 865: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 866: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 867: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 868: if ((*ctxt->input->cur == 0) &&
869: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
870: return(xmlPopInput(ctxt));
1.40 daniel 871: return(CUR);
872: }
873:
1.50 daniel 874: /**
875: * xmlPushInput:
876: * @ctxt: an XML parser context
877: * @input: an XML parser input fragment (entity, XML fragment ...).
878: *
1.40 daniel 879: * xmlPushInput: switch to a new input stream which is stacked on top
880: * of the previous one(s).
881: */
1.55 daniel 882: void
883: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 884: if (input == NULL) return;
885: inputPush(ctxt, input);
1.164 daniel 886: GROW;
1.40 daniel 887: }
888:
1.50 daniel 889: /**
1.69 daniel 890: * xmlFreeInputStream:
1.127 daniel 891: * @input: an xmlParserInputPtr
1.69 daniel 892: *
893: * Free up an input stream.
894: */
895: void
896: xmlFreeInputStream(xmlParserInputPtr input) {
897: if (input == NULL) return;
898:
1.119 daniel 899: if (input->filename != NULL) xmlFree((char *) input->filename);
900: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 901: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 902: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 903: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 904: input->free((xmlChar *) input->base);
1.93 veillard 905: if (input->buf != NULL)
906: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 907: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 908: xmlFree(input);
1.69 daniel 909: }
910:
911: /**
1.96 daniel 912: * xmlNewInputStream:
913: * @ctxt: an XML parser context
914: *
915: * Create a new input stream structure
916: * Returns the new input stream or NULL
917: */
918: xmlParserInputPtr
919: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
920: xmlParserInputPtr input;
921:
1.119 daniel 922: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 923: if (input == NULL) {
1.190 ! daniel 924: if (ctxt != NULL) {
! 925: ctxt->errNo = XML_ERR_NO_MEMORY;
! 926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 927: ctxt->sax->error(ctxt->userData,
! 928: "malloc: couldn't allocate a new input stream\n");
! 929: ctxt->errNo = XML_ERR_NO_MEMORY;
! 930: }
1.96 daniel 931: return(NULL);
932: }
1.165 daniel 933: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 934: input->line = 1;
935: input->col = 1;
1.167 daniel 936: input->standalone = -1;
1.96 daniel 937: return(input);
938: }
939:
940: /**
1.190 ! daniel 941: * xmlNewIOInputStream:
! 942: * @ctxt: an XML parser context
! 943: * @input: an I/O Input
! 944: * @enc: the charset encoding if known
! 945: *
! 946: * Create a new input stream structure encapsulating the @input into
! 947: * a stream suitable for the parser.
! 948: *
! 949: * Returns the new input stream or NULL
! 950: */
! 951: xmlParserInputPtr
! 952: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
! 953: xmlCharEncoding enc) {
! 954: xmlParserInputPtr inputStream;
! 955:
! 956: inputStream = xmlNewInputStream(ctxt);
! 957: if (inputStream == NULL) {
! 958: return(NULL);
! 959: }
! 960: inputStream->filename = NULL;
! 961: inputStream->buf = input;
! 962: inputStream->base = inputStream->buf->buffer->content;
! 963: inputStream->cur = inputStream->buf->buffer->content;
! 964: if (enc != XML_CHAR_ENCODING_NONE) {
! 965: xmlSwitchEncoding(ctxt, enc);
! 966: }
! 967:
! 968: return(inputStream);
! 969: }
! 970:
! 971: /**
1.50 daniel 972: * xmlNewEntityInputStream:
973: * @ctxt: an XML parser context
974: * @entity: an Entity pointer
975: *
1.82 daniel 976: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 977: *
978: * Returns the new input stream or NULL
1.45 daniel 979: */
1.50 daniel 980: xmlParserInputPtr
981: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 982: xmlParserInputPtr input;
983:
984: if (entity == NULL) {
1.123 daniel 985: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 986: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 987: ctxt->sax->error(ctxt->userData,
1.45 daniel 988: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 989: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 990: return(NULL);
1.45 daniel 991: }
992: if (entity->content == NULL) {
1.159 daniel 993: switch (entity->etype) {
1.113 daniel 994: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 995: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
997: ctxt->sax->error(ctxt->userData,
998: "xmlNewEntityInputStream unparsed entity !\n");
999: break;
1000: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1001: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1002: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1003: (char *) entity->ExternalID, ctxt));
1.113 daniel 1004: case XML_INTERNAL_GENERAL_ENTITY:
1005: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1006: ctxt->sax->error(ctxt->userData,
1007: "Internal entity %s without content !\n", entity->name);
1008: break;
1009: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1010: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1012: ctxt->sax->error(ctxt->userData,
1013: "Internal parameter entity %s without content !\n", entity->name);
1014: break;
1015: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1016: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1017: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1018: ctxt->sax->error(ctxt->userData,
1019: "Predefined entity %s without content !\n", entity->name);
1020: break;
1021: }
1.50 daniel 1022: return(NULL);
1.45 daniel 1023: }
1.96 daniel 1024: input = xmlNewInputStream(ctxt);
1.45 daniel 1025: if (input == NULL) {
1.50 daniel 1026: return(NULL);
1.45 daniel 1027: }
1.156 daniel 1028: input->filename = (char *) entity->SystemID;
1.45 daniel 1029: input->base = entity->content;
1030: input->cur = entity->content;
1.140 daniel 1031: input->length = entity->length;
1.50 daniel 1032: return(input);
1.45 daniel 1033: }
1034:
1.59 daniel 1035: /**
1036: * xmlNewStringInputStream:
1037: * @ctxt: an XML parser context
1.96 daniel 1038: * @buffer: an memory buffer
1.59 daniel 1039: *
1040: * Create a new input stream based on a memory buffer.
1.68 daniel 1041: * Returns the new input stream
1.59 daniel 1042: */
1043: xmlParserInputPtr
1.123 daniel 1044: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1045: xmlParserInputPtr input;
1046:
1.96 daniel 1047: if (buffer == NULL) {
1.123 daniel 1048: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1049: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1050: ctxt->sax->error(ctxt->userData,
1.59 daniel 1051: "internal: xmlNewStringInputStream string = NULL\n");
1052: return(NULL);
1053: }
1.96 daniel 1054: input = xmlNewInputStream(ctxt);
1.59 daniel 1055: if (input == NULL) {
1056: return(NULL);
1057: }
1.96 daniel 1058: input->base = buffer;
1059: input->cur = buffer;
1.140 daniel 1060: input->length = xmlStrlen(buffer);
1.59 daniel 1061: return(input);
1062: }
1063:
1.76 daniel 1064: /**
1065: * xmlNewInputFromFile:
1066: * @ctxt: an XML parser context
1067: * @filename: the filename to use as entity
1068: *
1069: * Create a new input stream based on a file.
1070: *
1071: * Returns the new input stream or NULL in case of error
1072: */
1073: xmlParserInputPtr
1.79 daniel 1074: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1075: xmlParserInputBufferPtr buf;
1.76 daniel 1076: xmlParserInputPtr inputStream;
1.111 daniel 1077: char *directory = NULL;
1.76 daniel 1078:
1.96 daniel 1079: if (ctxt == NULL) return(NULL);
1.91 daniel 1080: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1081: if (buf == NULL) {
1.140 daniel 1082: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1083:
1.94 daniel 1084: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1085: #ifdef WIN32
1086: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1087: #else
1088: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1089: #endif
1090: buf = xmlParserInputBufferCreateFilename(name,
1091: XML_CHAR_ENCODING_NONE);
1.106 daniel 1092: if (buf != NULL)
1.142 daniel 1093: directory = xmlParserGetDirectory(name);
1.106 daniel 1094: }
1095: if ((buf == NULL) && (ctxt->directory != NULL)) {
1096: #ifdef WIN32
1097: sprintf(name, "%s\\%s", ctxt->directory, filename);
1098: #else
1099: sprintf(name, "%s/%s", ctxt->directory, filename);
1100: #endif
1101: buf = xmlParserInputBufferCreateFilename(name,
1102: XML_CHAR_ENCODING_NONE);
1103: if (buf != NULL)
1.142 daniel 1104: directory = xmlParserGetDirectory(name);
1.106 daniel 1105: }
1106: if (buf == NULL)
1.94 daniel 1107: return(NULL);
1108: }
1109: if (directory == NULL)
1110: directory = xmlParserGetDirectory(filename);
1.76 daniel 1111:
1.96 daniel 1112: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1113: if (inputStream == NULL) {
1.119 daniel 1114: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1115: return(NULL);
1116: }
1117:
1.119 daniel 1118: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1119: inputStream->directory = directory;
1.91 daniel 1120: inputStream->buf = buf;
1.76 daniel 1121:
1.91 daniel 1122: inputStream->base = inputStream->buf->buffer->content;
1123: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1124: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1125: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1126: return(inputStream);
1127: }
1128:
1.77 daniel 1129: /************************************************************************
1130: * *
1.97 daniel 1131: * Commodity functions to handle parser contexts *
1132: * *
1133: ************************************************************************/
1134:
1135: /**
1136: * xmlInitParserCtxt:
1137: * @ctxt: an XML parser context
1138: *
1139: * Initialize a parser context
1140: */
1141:
1142: void
1143: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1144: {
1145: xmlSAXHandler *sax;
1146:
1.168 daniel 1147: xmlDefaultSAXHandlerInit();
1148:
1.119 daniel 1149: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1150: if (sax == NULL) {
1151: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1152: }
1.180 daniel 1153: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1154:
1155: /* Allocate the Input stack */
1.119 daniel 1156: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1157: ctxt->inputNr = 0;
1158: ctxt->inputMax = 5;
1159: ctxt->input = NULL;
1.165 daniel 1160:
1.97 daniel 1161: ctxt->version = NULL;
1162: ctxt->encoding = NULL;
1163: ctxt->standalone = -1;
1.98 daniel 1164: ctxt->hasExternalSubset = 0;
1165: ctxt->hasPErefs = 0;
1.97 daniel 1166: ctxt->html = 0;
1.98 daniel 1167: ctxt->external = 0;
1.140 daniel 1168: ctxt->instate = XML_PARSER_START;
1.97 daniel 1169: ctxt->token = 0;
1.106 daniel 1170: ctxt->directory = NULL;
1.97 daniel 1171:
1172: /* Allocate the Node stack */
1.119 daniel 1173: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1174: ctxt->nodeNr = 0;
1175: ctxt->nodeMax = 10;
1176: ctxt->node = NULL;
1177:
1.140 daniel 1178: /* Allocate the Name stack */
1179: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1180: ctxt->nameNr = 0;
1181: ctxt->nameMax = 10;
1182: ctxt->name = NULL;
1183:
1.176 daniel 1184: /* Allocate the space stack */
1185: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1186: ctxt->spaceNr = 1;
1187: ctxt->spaceMax = 10;
1188: ctxt->spaceTab[0] = -1;
1189: ctxt->space = &ctxt->spaceTab[0];
1190:
1.160 daniel 1191: if (sax == NULL) {
1192: ctxt->sax = &xmlDefaultSAXHandler;
1193: } else {
1.97 daniel 1194: ctxt->sax = sax;
1195: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1196: }
1197: ctxt->userData = ctxt;
1198: ctxt->myDoc = NULL;
1199: ctxt->wellFormed = 1;
1.99 daniel 1200: ctxt->valid = 1;
1.100 daniel 1201: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1202: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1203: ctxt->vctxt.userData = ctxt;
1.149 daniel 1204: if (ctxt->validate) {
1205: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1206: if (xmlGetWarningsDefaultValue == 0)
1207: ctxt->vctxt.warning = NULL;
1208: else
1209: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1210: /* Allocate the Node stack */
1211: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1212: ctxt->vctxt.nodeNr = 0;
1213: ctxt->vctxt.nodeMax = 4;
1214: ctxt->vctxt.node = NULL;
1.149 daniel 1215: } else {
1216: ctxt->vctxt.error = NULL;
1217: ctxt->vctxt.warning = NULL;
1218: }
1.97 daniel 1219: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1220: ctxt->record_info = 0;
1.135 daniel 1221: ctxt->nbChars = 0;
1.140 daniel 1222: ctxt->checkIndex = 0;
1.180 daniel 1223: ctxt->inSubset = 0;
1.140 daniel 1224: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1225: ctxt->depth = 0;
1.97 daniel 1226: xmlInitNodeInfoSeq(&ctxt->node_seq);
1227: }
1228:
1229: /**
1230: * xmlFreeParserCtxt:
1231: * @ctxt: an XML parser context
1232: *
1233: * Free all the memory used by a parser context. However the parsed
1234: * document in ctxt->myDoc is not freed.
1235: */
1236:
1237: void
1238: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1239: {
1240: xmlParserInputPtr input;
1.140 daniel 1241: xmlChar *oldname;
1.97 daniel 1242:
1243: if (ctxt == NULL) return;
1244:
1245: while ((input = inputPop(ctxt)) != NULL) {
1246: xmlFreeInputStream(input);
1247: }
1.140 daniel 1248: while ((oldname = namePop(ctxt)) != NULL) {
1249: xmlFree(oldname);
1250: }
1.176 daniel 1251: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1252: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1253: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1254: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1255: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1256: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1257: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1258: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1259: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1260: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1261: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1262: xmlFree(ctxt->sax);
1263: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1264: xmlFree(ctxt);
1.97 daniel 1265: }
1266:
1267: /**
1268: * xmlNewParserCtxt:
1269: *
1270: * Allocate and initialize a new parser context.
1271: *
1272: * Returns the xmlParserCtxtPtr or NULL
1273: */
1274:
1275: xmlParserCtxtPtr
1276: xmlNewParserCtxt()
1277: {
1278: xmlParserCtxtPtr ctxt;
1279:
1.119 daniel 1280: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1281: if (ctxt == NULL) {
1282: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1283: perror("malloc");
1284: return(NULL);
1285: }
1.165 daniel 1286: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1287: xmlInitParserCtxt(ctxt);
1288: return(ctxt);
1289: }
1290:
1291: /**
1292: * xmlClearParserCtxt:
1293: * @ctxt: an XML parser context
1294: *
1295: * Clear (release owned resources) and reinitialize a parser context
1296: */
1297:
1298: void
1299: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1300: {
1301: xmlClearNodeInfoSeq(&ctxt->node_seq);
1302: xmlInitParserCtxt(ctxt);
1303: }
1304:
1305: /************************************************************************
1306: * *
1.77 daniel 1307: * Commodity functions to handle entities *
1308: * *
1309: ************************************************************************/
1310:
1.174 daniel 1311: /**
1312: * xmlCheckEntity:
1313: * @ctxt: an XML parser context
1314: * @content: the entity content string
1315: *
1316: * Parse an entity content and checks the WF constraints
1317: *
1318: */
1319:
1320: void
1321: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1322: }
1.97 daniel 1323:
1324: /**
1325: * xmlParseCharRef:
1326: * @ctxt: an XML parser context
1327: *
1328: * parse Reference declarations
1329: *
1330: * [66] CharRef ::= '&#' [0-9]+ ';' |
1331: * '&#x' [0-9a-fA-F]+ ';'
1332: *
1.98 daniel 1333: * [ WFC: Legal Character ]
1334: * Characters referred to using character references must match the
1335: * production for Char.
1336: *
1.135 daniel 1337: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1338: */
1.97 daniel 1339: int
1340: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1341: int val = 0;
1342:
1.111 daniel 1343: if (ctxt->token != 0) {
1344: val = ctxt->token;
1345: ctxt->token = 0;
1346: return(val);
1347: }
1.152 daniel 1348: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1349: (NXT(2) == 'x')) {
1350: SKIP(3);
1.152 daniel 1351: while (RAW != ';') {
1352: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1353: val = val * 16 + (CUR - '0');
1.152 daniel 1354: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1355: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1356: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1357: val = val * 16 + (CUR - 'A') + 10;
1358: else {
1.123 daniel 1359: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1361: ctxt->sax->error(ctxt->userData,
1362: "xmlParseCharRef: invalid hexadecimal value\n");
1363: ctxt->wellFormed = 0;
1.180 daniel 1364: ctxt->disableSAX = 1;
1.97 daniel 1365: val = 0;
1366: break;
1367: }
1368: NEXT;
1369: }
1.164 daniel 1370: if (RAW == ';') {
1371: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1372: ctxt->nbChars ++;
1373: ctxt->input->cur++;
1374: }
1.152 daniel 1375: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1376: SKIP(2);
1.152 daniel 1377: while (RAW != ';') {
1378: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1379: val = val * 10 + (CUR - '0');
1380: else {
1.123 daniel 1381: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1383: ctxt->sax->error(ctxt->userData,
1384: "xmlParseCharRef: invalid decimal value\n");
1385: ctxt->wellFormed = 0;
1.180 daniel 1386: ctxt->disableSAX = 1;
1.97 daniel 1387: val = 0;
1388: break;
1389: }
1390: NEXT;
1391: }
1.164 daniel 1392: if (RAW == ';') {
1393: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1394: ctxt->nbChars ++;
1395: ctxt->input->cur++;
1396: }
1.97 daniel 1397: } else {
1.123 daniel 1398: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1400: ctxt->sax->error(ctxt->userData,
1401: "xmlParseCharRef: invalid value\n");
1.97 daniel 1402: ctxt->wellFormed = 0;
1.180 daniel 1403: ctxt->disableSAX = 1;
1.97 daniel 1404: }
1.98 daniel 1405:
1.97 daniel 1406: /*
1.98 daniel 1407: * [ WFC: Legal Character ]
1408: * Characters referred to using character references must match the
1409: * production for Char.
1.97 daniel 1410: */
1411: if (IS_CHAR(val)) {
1412: return(val);
1413: } else {
1.123 daniel 1414: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1415: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1416: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1417: val);
1418: ctxt->wellFormed = 0;
1.180 daniel 1419: ctxt->disableSAX = 1;
1.97 daniel 1420: }
1421: return(0);
1.77 daniel 1422: }
1423:
1.96 daniel 1424: /**
1.135 daniel 1425: * xmlParseStringCharRef:
1426: * @ctxt: an XML parser context
1427: * @str: a pointer to an index in the string
1428: *
1429: * parse Reference declarations, variant parsing from a string rather
1430: * than an an input flow.
1431: *
1432: * [66] CharRef ::= '&#' [0-9]+ ';' |
1433: * '&#x' [0-9a-fA-F]+ ';'
1434: *
1435: * [ WFC: Legal Character ]
1436: * Characters referred to using character references must match the
1437: * production for Char.
1438: *
1439: * Returns the value parsed (as an int), 0 in case of error, str will be
1440: * updated to the current value of the index
1441: */
1442: int
1443: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1444: const xmlChar *ptr;
1445: xmlChar cur;
1446: int val = 0;
1447:
1448: if ((str == NULL) || (*str == NULL)) return(0);
1449: ptr = *str;
1450: cur = *ptr;
1.137 daniel 1451: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1452: ptr += 3;
1453: cur = *ptr;
1454: while (cur != ';') {
1455: if ((cur >= '0') && (cur <= '9'))
1456: val = val * 16 + (cur - '0');
1457: else if ((cur >= 'a') && (cur <= 'f'))
1458: val = val * 16 + (cur - 'a') + 10;
1459: else if ((cur >= 'A') && (cur <= 'F'))
1460: val = val * 16 + (cur - 'A') + 10;
1461: else {
1462: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1464: ctxt->sax->error(ctxt->userData,
1465: "xmlParseCharRef: invalid hexadecimal value\n");
1466: ctxt->wellFormed = 0;
1.180 daniel 1467: ctxt->disableSAX = 1;
1.135 daniel 1468: val = 0;
1469: break;
1470: }
1471: ptr++;
1472: cur = *ptr;
1473: }
1474: if (cur == ';')
1475: ptr++;
1.145 daniel 1476: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1477: ptr += 2;
1478: cur = *ptr;
1479: while (cur != ';') {
1480: if ((cur >= '0') && (cur <= '9'))
1481: val = val * 10 + (cur - '0');
1482: else {
1483: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1485: ctxt->sax->error(ctxt->userData,
1486: "xmlParseCharRef: invalid decimal value\n");
1487: ctxt->wellFormed = 0;
1.180 daniel 1488: ctxt->disableSAX = 1;
1.135 daniel 1489: val = 0;
1490: break;
1491: }
1492: ptr++;
1493: cur = *ptr;
1494: }
1495: if (cur == ';')
1496: ptr++;
1497: } else {
1498: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1500: ctxt->sax->error(ctxt->userData,
1501: "xmlParseCharRef: invalid value\n");
1502: ctxt->wellFormed = 0;
1.180 daniel 1503: ctxt->disableSAX = 1;
1.135 daniel 1504: return(0);
1505: }
1506: *str = ptr;
1507:
1508: /*
1509: * [ WFC: Legal Character ]
1510: * Characters referred to using character references must match the
1511: * production for Char.
1512: */
1513: if (IS_CHAR(val)) {
1514: return(val);
1515: } else {
1516: ctxt->errNo = XML_ERR_INVALID_CHAR;
1517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1518: ctxt->sax->error(ctxt->userData,
1519: "CharRef: invalid xmlChar value %d\n", val);
1520: ctxt->wellFormed = 0;
1.180 daniel 1521: ctxt->disableSAX = 1;
1.135 daniel 1522: }
1523: return(0);
1524: }
1525:
1526: /**
1.96 daniel 1527: * xmlParserHandleReference:
1528: * @ctxt: the parser context
1529: *
1.97 daniel 1530: * [67] Reference ::= EntityRef | CharRef
1531: *
1.96 daniel 1532: * [68] EntityRef ::= '&' Name ';'
1533: *
1.98 daniel 1534: * [ WFC: Entity Declared ]
1535: * the Name given in the entity reference must match that in an entity
1536: * declaration, except that well-formed documents need not declare any
1537: * of the following entities: amp, lt, gt, apos, quot.
1538: *
1539: * [ WFC: Parsed Entity ]
1540: * An entity reference must not contain the name of an unparsed entity
1541: *
1.97 daniel 1542: * [66] CharRef ::= '&#' [0-9]+ ';' |
1543: * '&#x' [0-9a-fA-F]+ ';'
1544: *
1.96 daniel 1545: * A PEReference may have been detectect in the current input stream
1546: * the handling is done accordingly to
1547: * http://www.w3.org/TR/REC-xml#entproc
1548: */
1549: void
1550: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1551: xmlParserInputPtr input;
1.123 daniel 1552: xmlChar *name;
1.97 daniel 1553: xmlEntityPtr ent = NULL;
1554:
1.126 daniel 1555: if (ctxt->token != 0) {
1556: return;
1557: }
1.152 daniel 1558: if (RAW != '&') return;
1.97 daniel 1559: GROW;
1.152 daniel 1560: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1561: switch(ctxt->instate) {
1.140 daniel 1562: case XML_PARSER_ENTITY_DECL:
1563: case XML_PARSER_PI:
1.109 daniel 1564: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1565: case XML_PARSER_COMMENT:
1.168 daniel 1566: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1567: /* we just ignore it there */
1568: return;
1569: case XML_PARSER_START_TAG:
1.109 daniel 1570: return;
1.140 daniel 1571: case XML_PARSER_END_TAG:
1.97 daniel 1572: return;
1573: case XML_PARSER_EOF:
1.123 daniel 1574: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1576: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1577: ctxt->wellFormed = 0;
1.180 daniel 1578: ctxt->disableSAX = 1;
1.97 daniel 1579: return;
1580: case XML_PARSER_PROLOG:
1.140 daniel 1581: case XML_PARSER_START:
1582: case XML_PARSER_MISC:
1.123 daniel 1583: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1585: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1586: ctxt->wellFormed = 0;
1.180 daniel 1587: ctxt->disableSAX = 1;
1.97 daniel 1588: return;
1589: case XML_PARSER_EPILOG:
1.123 daniel 1590: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1592: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1593: ctxt->wellFormed = 0;
1.180 daniel 1594: ctxt->disableSAX = 1;
1.97 daniel 1595: return;
1596: case XML_PARSER_DTD:
1.123 daniel 1597: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1598: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1599: ctxt->sax->error(ctxt->userData,
1600: "CharRef are forbiden in DTDs!\n");
1601: ctxt->wellFormed = 0;
1.180 daniel 1602: ctxt->disableSAX = 1;
1.97 daniel 1603: return;
1604: case XML_PARSER_ENTITY_VALUE:
1605: /*
1606: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1607: * substitution here since we need the literal
1.97 daniel 1608: * entity value to be able to save the internal
1609: * subset of the document.
1610: * This will be handled by xmlDecodeEntities
1611: */
1612: return;
1613: case XML_PARSER_CONTENT:
1614: case XML_PARSER_ATTRIBUTE_VALUE:
1615: ctxt->token = xmlParseCharRef(ctxt);
1616: return;
1617: }
1618: return;
1619: }
1620:
1621: switch(ctxt->instate) {
1.109 daniel 1622: case XML_PARSER_CDATA_SECTION:
1623: return;
1.140 daniel 1624: case XML_PARSER_PI:
1.97 daniel 1625: case XML_PARSER_COMMENT:
1.168 daniel 1626: case XML_PARSER_SYSTEM_LITERAL:
1627: case XML_PARSER_CONTENT:
1.97 daniel 1628: return;
1.140 daniel 1629: case XML_PARSER_START_TAG:
1630: return;
1631: case XML_PARSER_END_TAG:
1632: return;
1.97 daniel 1633: case XML_PARSER_EOF:
1.123 daniel 1634: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1637: ctxt->wellFormed = 0;
1.180 daniel 1638: ctxt->disableSAX = 1;
1.97 daniel 1639: return;
1640: case XML_PARSER_PROLOG:
1.140 daniel 1641: case XML_PARSER_START:
1642: case XML_PARSER_MISC:
1.123 daniel 1643: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1645: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1646: ctxt->wellFormed = 0;
1.180 daniel 1647: ctxt->disableSAX = 1;
1.97 daniel 1648: return;
1649: case XML_PARSER_EPILOG:
1.123 daniel 1650: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1652: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1653: ctxt->wellFormed = 0;
1.180 daniel 1654: ctxt->disableSAX = 1;
1.97 daniel 1655: return;
1656: case XML_PARSER_ENTITY_VALUE:
1657: /*
1658: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1659: * substitution here since we need the literal
1.97 daniel 1660: * entity value to be able to save the internal
1661: * subset of the document.
1662: * This will be handled by xmlDecodeEntities
1663: */
1664: return;
1665: case XML_PARSER_ATTRIBUTE_VALUE:
1666: /*
1667: * NOTE: in the case of attributes values, we don't do the
1668: * substitution here unless we are in a mode where
1669: * the parser is explicitely asked to substitute
1670: * entities. The SAX callback is called with values
1671: * without entity substitution.
1672: * This will then be handled by xmlDecodeEntities
1673: */
1.113 daniel 1674: return;
1.97 daniel 1675: case XML_PARSER_ENTITY_DECL:
1676: /*
1677: * we just ignore it there
1678: * the substitution will be done once the entity is referenced
1679: */
1680: return;
1681: case XML_PARSER_DTD:
1.123 daniel 1682: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1684: ctxt->sax->error(ctxt->userData,
1685: "Entity references are forbiden in DTDs!\n");
1686: ctxt->wellFormed = 0;
1.180 daniel 1687: ctxt->disableSAX = 1;
1.97 daniel 1688: return;
1689: }
1690:
1691: NEXT;
1692: name = xmlScanName(ctxt);
1693: if (name == NULL) {
1.123 daniel 1694: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1697: ctxt->wellFormed = 0;
1.180 daniel 1698: ctxt->disableSAX = 1;
1.97 daniel 1699: ctxt->token = '&';
1700: return;
1701: }
1702: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1703: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1705: ctxt->sax->error(ctxt->userData,
1706: "Entity reference: ';' expected\n");
1707: ctxt->wellFormed = 0;
1.180 daniel 1708: ctxt->disableSAX = 1;
1.97 daniel 1709: ctxt->token = '&';
1.119 daniel 1710: xmlFree(name);
1.97 daniel 1711: return;
1712: }
1713: SKIP(xmlStrlen(name) + 1);
1714: if (ctxt->sax != NULL) {
1715: if (ctxt->sax->getEntity != NULL)
1716: ent = ctxt->sax->getEntity(ctxt->userData, name);
1717: }
1.98 daniel 1718:
1719: /*
1720: * [ WFC: Entity Declared ]
1721: * the Name given in the entity reference must match that in an entity
1722: * declaration, except that well-formed documents need not declare any
1723: * of the following entities: amp, lt, gt, apos, quot.
1724: */
1.97 daniel 1725: if (ent == NULL)
1726: ent = xmlGetPredefinedEntity(name);
1727: if (ent == NULL) {
1.123 daniel 1728: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt->userData,
1.98 daniel 1731: "Entity reference: entity %s not declared\n",
1732: name);
1.97 daniel 1733: ctxt->wellFormed = 0;
1.180 daniel 1734: ctxt->disableSAX = 1;
1.119 daniel 1735: xmlFree(name);
1.97 daniel 1736: return;
1737: }
1.98 daniel 1738:
1739: /*
1740: * [ WFC: Parsed Entity ]
1741: * An entity reference must not contain the name of an unparsed entity
1742: */
1.159 daniel 1743: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1744: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1746: ctxt->sax->error(ctxt->userData,
1747: "Entity reference to unparsed entity %s\n", name);
1748: ctxt->wellFormed = 0;
1.180 daniel 1749: ctxt->disableSAX = 1;
1.98 daniel 1750: }
1751:
1.159 daniel 1752: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1753: ctxt->token = ent->content[0];
1.119 daniel 1754: xmlFree(name);
1.97 daniel 1755: return;
1756: }
1757: input = xmlNewEntityInputStream(ctxt, ent);
1758: xmlPushInput(ctxt, input);
1.119 daniel 1759: xmlFree(name);
1.96 daniel 1760: return;
1761: }
1762:
1763: /**
1764: * xmlParserHandlePEReference:
1765: * @ctxt: the parser context
1766: *
1767: * [69] PEReference ::= '%' Name ';'
1768: *
1.98 daniel 1769: * [ WFC: No Recursion ]
1770: * TODO A parsed entity must not contain a recursive
1771: * reference to itself, either directly or indirectly.
1772: *
1773: * [ WFC: Entity Declared ]
1774: * In a document without any DTD, a document with only an internal DTD
1775: * subset which contains no parameter entity references, or a document
1776: * with "standalone='yes'", ... ... The declaration of a parameter
1777: * entity must precede any reference to it...
1778: *
1779: * [ VC: Entity Declared ]
1780: * In a document with an external subset or external parameter entities
1781: * with "standalone='no'", ... ... The declaration of a parameter entity
1782: * must precede any reference to it...
1783: *
1784: * [ WFC: In DTD ]
1785: * Parameter-entity references may only appear in the DTD.
1786: * NOTE: misleading but this is handled.
1787: *
1788: * A PEReference may have been detected in the current input stream
1.96 daniel 1789: * the handling is done accordingly to
1790: * http://www.w3.org/TR/REC-xml#entproc
1791: * i.e.
1792: * - Included in literal in entity values
1793: * - Included as Paraemeter Entity reference within DTDs
1794: */
1795: void
1796: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1797: xmlChar *name;
1.96 daniel 1798: xmlEntityPtr entity = NULL;
1799: xmlParserInputPtr input;
1800:
1.126 daniel 1801: if (ctxt->token != 0) {
1802: return;
1803: }
1.152 daniel 1804: if (RAW != '%') return;
1.96 daniel 1805: switch(ctxt->instate) {
1.109 daniel 1806: case XML_PARSER_CDATA_SECTION:
1807: return;
1.97 daniel 1808: case XML_PARSER_COMMENT:
1809: return;
1.140 daniel 1810: case XML_PARSER_START_TAG:
1811: return;
1812: case XML_PARSER_END_TAG:
1813: return;
1.96 daniel 1814: case XML_PARSER_EOF:
1.123 daniel 1815: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1816: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1817: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1818: ctxt->wellFormed = 0;
1.180 daniel 1819: ctxt->disableSAX = 1;
1.96 daniel 1820: return;
1821: case XML_PARSER_PROLOG:
1.140 daniel 1822: case XML_PARSER_START:
1823: case XML_PARSER_MISC:
1.123 daniel 1824: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1825: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1826: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1827: ctxt->wellFormed = 0;
1.180 daniel 1828: ctxt->disableSAX = 1;
1.96 daniel 1829: return;
1.97 daniel 1830: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1831: case XML_PARSER_CONTENT:
1832: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1833: case XML_PARSER_PI:
1.168 daniel 1834: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1835: /* we just ignore it there */
1836: return;
1837: case XML_PARSER_EPILOG:
1.123 daniel 1838: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1840: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1841: ctxt->wellFormed = 0;
1.180 daniel 1842: ctxt->disableSAX = 1;
1.96 daniel 1843: return;
1.97 daniel 1844: case XML_PARSER_ENTITY_VALUE:
1845: /*
1846: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1847: * substitution here since we need the literal
1.97 daniel 1848: * entity value to be able to save the internal
1849: * subset of the document.
1850: * This will be handled by xmlDecodeEntities
1851: */
1852: return;
1.96 daniel 1853: case XML_PARSER_DTD:
1.98 daniel 1854: /*
1855: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1856: * In the internal DTD subset, parameter-entity references
1857: * can occur only where markup declarations can occur, not
1858: * within markup declarations.
1859: * In that case this is handled in xmlParseMarkupDecl
1860: */
1861: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1862: return;
1.96 daniel 1863: }
1864:
1865: NEXT;
1866: name = xmlParseName(ctxt);
1867: if (name == NULL) {
1.123 daniel 1868: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1870: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1871: ctxt->wellFormed = 0;
1.180 daniel 1872: ctxt->disableSAX = 1;
1.96 daniel 1873: } else {
1.152 daniel 1874: if (RAW == ';') {
1.96 daniel 1875: NEXT;
1.98 daniel 1876: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1877: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1878: if (entity == NULL) {
1.98 daniel 1879:
1880: /*
1881: * [ WFC: Entity Declared ]
1882: * In a document without any DTD, a document with only an
1883: * internal DTD subset which contains no parameter entity
1884: * references, or a document with "standalone='yes'", ...
1885: * ... The declaration of a parameter entity must precede
1886: * any reference to it...
1887: */
1888: if ((ctxt->standalone == 1) ||
1889: ((ctxt->hasExternalSubset == 0) &&
1890: (ctxt->hasPErefs == 0))) {
1891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1892: ctxt->sax->error(ctxt->userData,
1893: "PEReference: %%%s; not found\n", name);
1894: ctxt->wellFormed = 0;
1.180 daniel 1895: ctxt->disableSAX = 1;
1.98 daniel 1896: } else {
1897: /*
1898: * [ VC: Entity Declared ]
1899: * In a document with an external subset or external
1900: * parameter entities with "standalone='no'", ...
1901: * ... The declaration of a parameter entity must precede
1902: * any reference to it...
1903: */
1904: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1905: ctxt->sax->warning(ctxt->userData,
1906: "PEReference: %%%s; not found\n", name);
1907: ctxt->valid = 0;
1908: }
1.96 daniel 1909: } else {
1.159 daniel 1910: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1911: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1912: /*
1.156 daniel 1913: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1914: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1915: */
1916: input = xmlNewEntityInputStream(ctxt, entity);
1917: xmlPushInput(ctxt, input);
1.164 daniel 1918: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1919: (RAW == '<') && (NXT(1) == '?') &&
1920: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1921: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1922: xmlParseTextDecl(ctxt);
1.164 daniel 1923: }
1924: if (ctxt->token == 0)
1925: ctxt->token = ' ';
1.96 daniel 1926: } else {
1927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1928: ctxt->sax->error(ctxt->userData,
1929: "xmlHandlePEReference: %s is not a parameter entity\n",
1930: name);
1931: ctxt->wellFormed = 0;
1.180 daniel 1932: ctxt->disableSAX = 1;
1.96 daniel 1933: }
1934: }
1935: } else {
1.123 daniel 1936: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1937: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1938: ctxt->sax->error(ctxt->userData,
1939: "xmlHandlePEReference: expecting ';'\n");
1940: ctxt->wellFormed = 0;
1.180 daniel 1941: ctxt->disableSAX = 1;
1.96 daniel 1942: }
1.119 daniel 1943: xmlFree(name);
1.97 daniel 1944: }
1945: }
1946:
1947: /*
1948: * Macro used to grow the current buffer.
1949: */
1950: #define growBuffer(buffer) { \
1951: buffer##_size *= 2; \
1.145 daniel 1952: buffer = (xmlChar *) \
1953: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1954: if (buffer == NULL) { \
1955: perror("realloc failed"); \
1.145 daniel 1956: return(NULL); \
1.97 daniel 1957: } \
1.96 daniel 1958: }
1.77 daniel 1959:
1960: /**
1961: * xmlDecodeEntities:
1962: * @ctxt: the parser context
1963: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1964: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1965: * @end: an end marker xmlChar, 0 if none
1966: * @end2: an end marker xmlChar, 0 if none
1967: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1968: *
1969: * [67] Reference ::= EntityRef | CharRef
1970: *
1971: * [69] PEReference ::= '%' Name ';'
1972: *
1973: * Returns A newly allocated string with the substitution done. The caller
1974: * must deallocate it !
1975: */
1.123 daniel 1976: xmlChar *
1.77 daniel 1977: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1978: xmlChar end, xmlChar end2, xmlChar end3) {
1979: xmlChar *buffer = NULL;
1.78 daniel 1980: int buffer_size = 0;
1.161 daniel 1981: int nbchars = 0;
1.78 daniel 1982:
1.123 daniel 1983: xmlChar *current = NULL;
1.77 daniel 1984: xmlEntityPtr ent;
1985: unsigned int max = (unsigned int) len;
1.161 daniel 1986: int c,l;
1.77 daniel 1987:
1.185 daniel 1988: if (ctxt->depth > 40) {
1989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1990: ctxt->sax->error(ctxt->userData,
1991: "Detected entity reference loop\n");
1992: ctxt->wellFormed = 0;
1993: ctxt->disableSAX = 1;
1994: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1995: return(NULL);
1996: }
1997:
1.77 daniel 1998: /*
1999: * allocate a translation buffer.
2000: */
1.140 daniel 2001: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2002: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2003: if (buffer == NULL) {
2004: perror("xmlDecodeEntities: malloc failed");
2005: return(NULL);
2006: }
2007:
1.78 daniel 2008: /*
2009: * Ok loop until we reach one of the ending char or a size limit.
2010: */
1.161 daniel 2011: c = CUR_CHAR(l);
2012: while ((nbchars < max) && (c != end) &&
2013: (c != end2) && (c != end3)) {
1.77 daniel 2014:
1.161 daniel 2015: if (c == 0) break;
2016: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2017: int val = xmlParseCharRef(ctxt);
1.161 daniel 2018: COPY_BUF(0,buffer,nbchars,val);
2019: NEXTL(l);
2020: } else if ((c == '&') && (ctxt->token != '&') &&
2021: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2022: ent = xmlParseEntityRef(ctxt);
2023: if ((ent != NULL) &&
2024: (ctxt->replaceEntities != 0)) {
2025: current = ent->content;
2026: while (*current != 0) {
1.161 daniel 2027: buffer[nbchars++] = *current++;
2028: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2029: growBuffer(buffer);
1.77 daniel 2030: }
2031: }
1.98 daniel 2032: } else if (ent != NULL) {
1.123 daniel 2033: const xmlChar *cur = ent->name;
1.98 daniel 2034:
1.161 daniel 2035: buffer[nbchars++] = '&';
2036: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2037: growBuffer(buffer);
2038: }
1.161 daniel 2039: while (*cur != 0) {
2040: buffer[nbchars++] = *cur++;
2041: }
2042: buffer[nbchars++] = ';';
1.77 daniel 2043: }
1.161 daniel 2044: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2045: /*
1.77 daniel 2046: * a PEReference induce to switch the entity flow,
2047: * we break here to flush the current set of chars
2048: * parsed if any. We will be called back later.
1.97 daniel 2049: */
1.91 daniel 2050: if (nbchars != 0) break;
1.77 daniel 2051:
2052: xmlParsePEReference(ctxt);
1.79 daniel 2053:
1.97 daniel 2054: /*
1.79 daniel 2055: * Pop-up of finished entities.
1.97 daniel 2056: */
1.152 daniel 2057: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2058: xmlPopInput(ctxt);
2059:
1.98 daniel 2060: break;
1.77 daniel 2061: } else {
1.161 daniel 2062: COPY_BUF(l,buffer,nbchars,c);
2063: NEXTL(l);
2064: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2065: growBuffer(buffer);
2066: }
1.77 daniel 2067: }
1.161 daniel 2068: c = CUR_CHAR(l);
1.77 daniel 2069: }
1.161 daniel 2070: buffer[nbchars++] = 0;
1.77 daniel 2071: return(buffer);
2072: }
2073:
1.135 daniel 2074: /**
2075: * xmlStringDecodeEntities:
2076: * @ctxt: the parser context
2077: * @str: the input string
2078: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2079: * @end: an end marker xmlChar, 0 if none
2080: * @end2: an end marker xmlChar, 0 if none
2081: * @end3: an end marker xmlChar, 0 if none
2082: *
2083: * [67] Reference ::= EntityRef | CharRef
2084: *
2085: * [69] PEReference ::= '%' Name ';'
2086: *
2087: * Returns A newly allocated string with the substitution done. The caller
2088: * must deallocate it !
2089: */
2090: xmlChar *
2091: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2092: xmlChar end, xmlChar end2, xmlChar end3) {
2093: xmlChar *buffer = NULL;
2094: int buffer_size = 0;
2095:
2096: xmlChar *current = NULL;
2097: xmlEntityPtr ent;
1.176 daniel 2098: int c,l;
2099: int nbchars = 0;
1.135 daniel 2100:
1.185 daniel 2101: if (ctxt->depth > 40) {
2102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103: ctxt->sax->error(ctxt->userData,
2104: "Detected entity reference loop\n");
2105: ctxt->wellFormed = 0;
2106: ctxt->disableSAX = 1;
2107: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2108: return(NULL);
2109: }
2110:
1.135 daniel 2111: /*
2112: * allocate a translation buffer.
2113: */
1.140 daniel 2114: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2115: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2116: if (buffer == NULL) {
2117: perror("xmlDecodeEntities: malloc failed");
2118: return(NULL);
2119: }
2120:
2121: /*
2122: * Ok loop until we reach one of the ending char or a size limit.
2123: */
1.176 daniel 2124: c = CUR_SCHAR(str, l);
2125: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2126:
1.176 daniel 2127: if (c == 0) break;
2128: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2129: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2130: if (val != 0) {
2131: COPY_BUF(0,buffer,nbchars,val);
2132: }
2133: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2134: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2135: if ((ent != NULL) && (ent->content != NULL)) {
2136: xmlChar *rep;
2137:
2138: ctxt->depth++;
2139: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2140: 0, 0, 0);
2141: ctxt->depth--;
2142: if (rep != NULL) {
2143: current = rep;
2144: while (*current != 0) {
2145: buffer[nbchars++] = *current++;
2146: if (nbchars >
2147: buffer_size - XML_PARSER_BUFFER_SIZE) {
2148: growBuffer(buffer);
2149: }
1.135 daniel 2150: }
1.185 daniel 2151: xmlFree(rep);
1.135 daniel 2152: }
2153: } else if (ent != NULL) {
2154: int i = xmlStrlen(ent->name);
2155: const xmlChar *cur = ent->name;
2156:
1.176 daniel 2157: buffer[nbchars++] = '&';
2158: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2159: growBuffer(buffer);
2160: }
2161: for (;i > 0;i--)
1.176 daniel 2162: buffer[nbchars++] = *cur++;
2163: buffer[nbchars++] = ';';
1.135 daniel 2164: }
1.176 daniel 2165: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2166: ent = xmlParseStringPEReference(ctxt, &str);
2167: if (ent != NULL) {
1.185 daniel 2168: xmlChar *rep;
2169:
2170: ctxt->depth++;
2171: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2172: 0, 0, 0);
2173: ctxt->depth--;
2174: if (rep != NULL) {
2175: current = rep;
2176: while (*current != 0) {
2177: buffer[nbchars++] = *current++;
2178: if (nbchars >
2179: buffer_size - XML_PARSER_BUFFER_SIZE) {
2180: growBuffer(buffer);
2181: }
1.135 daniel 2182: }
1.185 daniel 2183: xmlFree(rep);
1.135 daniel 2184: }
2185: }
2186: } else {
1.176 daniel 2187: COPY_BUF(l,buffer,nbchars,c);
2188: str += l;
2189: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2190: growBuffer(buffer);
2191: }
2192: }
1.176 daniel 2193: c = CUR_SCHAR(str, l);
1.135 daniel 2194: }
1.176 daniel 2195: buffer[nbchars++] = 0;
1.135 daniel 2196: return(buffer);
2197: }
2198:
1.1 veillard 2199:
1.28 daniel 2200: /************************************************************************
2201: * *
1.75 daniel 2202: * Commodity functions to handle encodings *
2203: * *
2204: ************************************************************************/
2205:
1.172 daniel 2206: /*
2207: * xmlCheckLanguageID
2208: * @lang: pointer to the string value
2209: *
2210: * Checks that the value conforms to the LanguageID production:
2211: *
2212: * [33] LanguageID ::= Langcode ('-' Subcode)*
2213: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2214: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2215: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2216: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2217: * [38] Subcode ::= ([a-z] | [A-Z])+
2218: *
2219: * Returns 1 if correct 0 otherwise
2220: **/
2221: int
2222: xmlCheckLanguageID(const xmlChar *lang) {
2223: const xmlChar *cur = lang;
2224:
2225: if (cur == NULL)
2226: return(0);
2227: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2228: ((cur[0] == 'I') && (cur[1] == '-'))) {
2229: /*
2230: * IANA code
2231: */
2232: cur += 2;
2233: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2234: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2235: cur++;
2236: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2237: ((cur[0] == 'X') && (cur[1] == '-'))) {
2238: /*
2239: * User code
2240: */
2241: cur += 2;
2242: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2243: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2244: cur++;
2245: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2246: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2247: /*
2248: * ISO639
2249: */
2250: cur++;
2251: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2252: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2253: cur++;
2254: else
2255: return(0);
2256: } else
2257: return(0);
2258: while (cur[0] != 0) {
2259: if (cur[0] != '-')
2260: return(0);
2261: cur++;
2262: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2263: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2264: cur++;
2265: else
2266: return(0);
2267: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2268: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2269: cur++;
2270: }
2271: return(1);
2272: }
2273:
1.75 daniel 2274: /**
2275: * xmlSwitchEncoding:
2276: * @ctxt: the parser context
1.124 daniel 2277: * @enc: the encoding value (number)
1.75 daniel 2278: *
2279: * change the input functions when discovering the character encoding
2280: * of a given entity.
2281: */
2282: void
2283: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2284: {
1.156 daniel 2285: xmlCharEncodingHandlerPtr handler;
2286:
2287: handler = xmlGetCharEncodingHandler(enc);
2288: if (handler != NULL) {
2289: if (ctxt->input != NULL) {
2290: if (ctxt->input->buf != NULL) {
2291: if (ctxt->input->buf->encoder != NULL) {
2292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2293: ctxt->sax->error(ctxt->userData,
2294: "xmlSwitchEncoding : encoder already regitered\n");
2295: return;
2296: }
2297: ctxt->input->buf->encoder = handler;
2298:
2299: /*
2300: * Is there already some content down the pipe to convert
2301: */
2302: if ((ctxt->input->buf->buffer != NULL) &&
2303: (ctxt->input->buf->buffer->use > 0)) {
2304: xmlChar *buf;
2305: int res, len, size;
2306: int processed;
2307:
2308: /*
2309: * Specific handling of the Byte Order Mark for
2310: * UTF-16
2311: */
2312: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2313: (ctxt->input->cur[0] == 0xFF) &&
2314: (ctxt->input->cur[1] == 0xFE)) {
2315: SKIP(2);
2316: }
2317: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2318: (ctxt->input->cur[0] == 0xFE) &&
2319: (ctxt->input->cur[1] == 0xFF)) {
2320: SKIP(2);
2321: }
2322:
2323: /*
2324: * convert the non processed part
2325: */
2326: processed = ctxt->input->cur - ctxt->input->base;
2327: len = ctxt->input->buf->buffer->use - processed;
2328:
2329: if (len <= 0) {
2330: return;
2331: }
2332: size = ctxt->input->buf->buffer->use * 4;
2333: if (size < 4000)
2334: size = 4000;
1.167 daniel 2335: retry_larger:
1.160 daniel 2336: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2337: if (buf == NULL) {
2338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2339: ctxt->sax->error(ctxt->userData,
2340: "xmlSwitchEncoding : out of memory\n");
2341: return;
2342: }
1.160 daniel 2343: /* TODO !!! Handling of buf too small */
1.156 daniel 2344: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2345: if (res == -1) {
2346: size *= 2;
2347: xmlFree(buf);
2348: goto retry_larger;
2349: }
1.156 daniel 2350: if ((res < 0) ||
2351: (len != ctxt->input->buf->buffer->use - processed)) {
2352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353: ctxt->sax->error(ctxt->userData,
2354: "xmlSwitchEncoding : conversion failed\n");
2355: xmlFree(buf);
2356: return;
2357: }
1.167 daniel 2358:
1.156 daniel 2359: /*
2360: * Conversion succeeded, get rid of the old buffer
2361: */
2362: xmlFree(ctxt->input->buf->buffer->content);
2363: ctxt->input->buf->buffer->content = buf;
2364: ctxt->input->base = buf;
2365: ctxt->input->cur = buf;
2366: ctxt->input->buf->buffer->size = size;
2367: ctxt->input->buf->buffer->use = res;
1.160 daniel 2368: buf[res] = 0;
1.156 daniel 2369: }
2370: return;
2371: } else {
2372: if (ctxt->input->length == 0) {
2373: /*
2374: * When parsing a static memory array one must know the
2375: * size to be able to convert the buffer.
2376: */
2377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2378: ctxt->sax->error(ctxt->userData,
2379: "xmlSwitchEncoding : no input\n");
2380: return;
2381: } else {
2382: xmlChar *buf;
2383: int res, len;
2384: int processed = ctxt->input->cur - ctxt->input->base;
2385:
2386: /*
2387: * convert the non processed part
2388: */
2389: len = ctxt->input->length - processed;
2390: if (len <= 0) {
2391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2392: ctxt->sax->error(ctxt->userData,
2393: "xmlSwitchEncoding : input fully consumed?\n");
2394: return;
2395: }
2396: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2397: if (buf == NULL) {
2398: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399: ctxt->sax->error(ctxt->userData,
2400: "xmlSwitchEncoding : out of memory\n");
2401: return;
2402: }
2403: res = handler->input(buf, ctxt->input->length * 4,
2404: ctxt->input->cur, &len);
2405: if ((res < 0) ||
2406: (len != ctxt->input->length - processed)) {
2407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2408: ctxt->sax->error(ctxt->userData,
2409: "xmlSwitchEncoding : conversion failed\n");
2410: xmlFree(buf);
2411: return;
2412: }
2413: /*
2414: * Conversion succeeded, get rid of the old buffer
2415: */
2416: if ((ctxt->input->free != NULL) &&
2417: (ctxt->input->base != NULL))
2418: ctxt->input->free((xmlChar *) ctxt->input->base);
2419: ctxt->input->base = ctxt->input->cur = buf;
2420: ctxt->input->length = res;
2421: }
2422: }
2423: } else {
2424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2425: ctxt->sax->error(ctxt->userData,
2426: "xmlSwitchEncoding : no input\n");
2427: }
2428: }
2429:
1.75 daniel 2430: switch (enc) {
2431: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2432: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2434: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2435: ctxt->wellFormed = 0;
1.180 daniel 2436: ctxt->disableSAX = 1;
1.75 daniel 2437: break;
2438: case XML_CHAR_ENCODING_NONE:
2439: /* let's assume it's UTF-8 without the XML decl */
2440: return;
2441: case XML_CHAR_ENCODING_UTF8:
2442: /* default encoding, no conversion should be needed */
2443: return;
2444: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2445: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2447: ctxt->sax->error(ctxt->userData,
2448: "char encoding UTF16 little endian not supported\n");
2449: break;
2450: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2451: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2453: ctxt->sax->error(ctxt->userData,
2454: "char encoding UTF16 big endian not supported\n");
2455: break;
2456: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2457: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2459: ctxt->sax->error(ctxt->userData,
2460: "char encoding USC4 little endian not supported\n");
2461: break;
2462: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2463: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2465: ctxt->sax->error(ctxt->userData,
2466: "char encoding USC4 big endian not supported\n");
2467: break;
2468: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2469: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2471: ctxt->sax->error(ctxt->userData,
2472: "char encoding EBCDIC not supported\n");
2473: break;
2474: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2475: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2476: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2477: ctxt->sax->error(ctxt->userData,
2478: "char encoding UCS4 2143 not supported\n");
2479: break;
2480: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2481: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2483: ctxt->sax->error(ctxt->userData,
2484: "char encoding UCS4 3412 not supported\n");
2485: break;
2486: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2487: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2489: ctxt->sax->error(ctxt->userData,
2490: "char encoding UCS2 not supported\n");
2491: break;
2492: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2493: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2494: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2495: ctxt->sax->error(ctxt->userData,
2496: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2497: break;
2498: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2499: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2501: ctxt->sax->error(ctxt->userData,
2502: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2503: break;
2504: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2505: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2507: ctxt->sax->error(ctxt->userData,
2508: "char encoding ISO_8859_3 not supported\n");
2509: break;
2510: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2511: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2513: ctxt->sax->error(ctxt->userData,
2514: "char encoding ISO_8859_4 not supported\n");
2515: break;
2516: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2517: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2519: ctxt->sax->error(ctxt->userData,
2520: "char encoding ISO_8859_5 not supported\n");
2521: break;
2522: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2523: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2525: ctxt->sax->error(ctxt->userData,
2526: "char encoding ISO_8859_6 not supported\n");
2527: break;
2528: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2529: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2530: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2531: ctxt->sax->error(ctxt->userData,
2532: "char encoding ISO_8859_7 not supported\n");
2533: break;
2534: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2535: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2536: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2537: ctxt->sax->error(ctxt->userData,
2538: "char encoding ISO_8859_8 not supported\n");
2539: break;
2540: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2541: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2543: ctxt->sax->error(ctxt->userData,
2544: "char encoding ISO_8859_9 not supported\n");
2545: break;
2546: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2547: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2549: ctxt->sax->error(ctxt->userData,
2550: "char encoding ISO-2022-JPnot supported\n");
2551: break;
2552: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2553: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2555: ctxt->sax->error(ctxt->userData,
2556: "char encoding Shift_JISnot supported\n");
2557: break;
2558: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2559: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2561: ctxt->sax->error(ctxt->userData,
2562: "char encoding EUC-JPnot supported\n");
2563: break;
2564: }
2565: }
2566:
2567: /************************************************************************
2568: * *
1.123 daniel 2569: * Commodity functions to handle xmlChars *
1.28 daniel 2570: * *
2571: ************************************************************************/
2572:
1.50 daniel 2573: /**
2574: * xmlStrndup:
1.123 daniel 2575: * @cur: the input xmlChar *
1.50 daniel 2576: * @len: the len of @cur
2577: *
1.123 daniel 2578: * a strndup for array of xmlChar's
1.68 daniel 2579: *
1.123 daniel 2580: * Returns a new xmlChar * or NULL
1.1 veillard 2581: */
1.123 daniel 2582: xmlChar *
2583: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2584: xmlChar *ret;
2585:
2586: if ((cur == NULL) || (len < 0)) return(NULL);
2587: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2588: if (ret == NULL) {
1.86 daniel 2589: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2590: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2591: return(NULL);
2592: }
1.123 daniel 2593: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2594: ret[len] = 0;
2595: return(ret);
2596: }
2597:
1.50 daniel 2598: /**
2599: * xmlStrdup:
1.123 daniel 2600: * @cur: the input xmlChar *
1.50 daniel 2601: *
1.152 daniel 2602: * a strdup for array of xmlChar's. Since they are supposed to be
2603: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2604: * a termination mark of '0'.
1.68 daniel 2605: *
1.123 daniel 2606: * Returns a new xmlChar * or NULL
1.1 veillard 2607: */
1.123 daniel 2608: xmlChar *
2609: xmlStrdup(const xmlChar *cur) {
2610: const xmlChar *p = cur;
1.1 veillard 2611:
1.135 daniel 2612: if (cur == NULL) return(NULL);
1.152 daniel 2613: while (*p != 0) p++;
1.1 veillard 2614: return(xmlStrndup(cur, p - cur));
2615: }
2616:
1.50 daniel 2617: /**
2618: * xmlCharStrndup:
2619: * @cur: the input char *
2620: * @len: the len of @cur
2621: *
1.123 daniel 2622: * a strndup for char's to xmlChar's
1.68 daniel 2623: *
1.123 daniel 2624: * Returns a new xmlChar * or NULL
1.45 daniel 2625: */
2626:
1.123 daniel 2627: xmlChar *
1.55 daniel 2628: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2629: int i;
1.135 daniel 2630: xmlChar *ret;
2631:
2632: if ((cur == NULL) || (len < 0)) return(NULL);
2633: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2634: if (ret == NULL) {
1.86 daniel 2635: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2636: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2637: return(NULL);
2638: }
2639: for (i = 0;i < len;i++)
1.123 daniel 2640: ret[i] = (xmlChar) cur[i];
1.45 daniel 2641: ret[len] = 0;
2642: return(ret);
2643: }
2644:
1.50 daniel 2645: /**
2646: * xmlCharStrdup:
2647: * @cur: the input char *
2648: * @len: the len of @cur
2649: *
1.123 daniel 2650: * a strdup for char's to xmlChar's
1.68 daniel 2651: *
1.123 daniel 2652: * Returns a new xmlChar * or NULL
1.45 daniel 2653: */
2654:
1.123 daniel 2655: xmlChar *
1.55 daniel 2656: xmlCharStrdup(const char *cur) {
1.45 daniel 2657: const char *p = cur;
2658:
1.135 daniel 2659: if (cur == NULL) return(NULL);
1.45 daniel 2660: while (*p != '\0') p++;
2661: return(xmlCharStrndup(cur, p - cur));
2662: }
2663:
1.50 daniel 2664: /**
2665: * xmlStrcmp:
1.123 daniel 2666: * @str1: the first xmlChar *
2667: * @str2: the second xmlChar *
1.50 daniel 2668: *
1.123 daniel 2669: * a strcmp for xmlChar's
1.68 daniel 2670: *
2671: * Returns the integer result of the comparison
1.14 veillard 2672: */
2673:
1.55 daniel 2674: int
1.123 daniel 2675: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2676: register int tmp;
2677:
1.135 daniel 2678: if ((str1 == NULL) && (str2 == NULL)) return(0);
2679: if (str1 == NULL) return(-1);
2680: if (str2 == NULL) return(1);
1.14 veillard 2681: do {
2682: tmp = *str1++ - *str2++;
2683: if (tmp != 0) return(tmp);
2684: } while ((*str1 != 0) && (*str2 != 0));
2685: return (*str1 - *str2);
2686: }
2687:
1.50 daniel 2688: /**
2689: * xmlStrncmp:
1.123 daniel 2690: * @str1: the first xmlChar *
2691: * @str2: the second xmlChar *
1.50 daniel 2692: * @len: the max comparison length
2693: *
1.123 daniel 2694: * a strncmp for xmlChar's
1.68 daniel 2695: *
2696: * Returns the integer result of the comparison
1.14 veillard 2697: */
2698:
1.55 daniel 2699: int
1.123 daniel 2700: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2701: register int tmp;
2702:
2703: if (len <= 0) return(0);
1.135 daniel 2704: if ((str1 == NULL) && (str2 == NULL)) return(0);
2705: if (str1 == NULL) return(-1);
2706: if (str2 == NULL) return(1);
1.14 veillard 2707: do {
2708: tmp = *str1++ - *str2++;
2709: if (tmp != 0) return(tmp);
2710: len--;
2711: if (len <= 0) return(0);
2712: } while ((*str1 != 0) && (*str2 != 0));
2713: return (*str1 - *str2);
2714: }
2715:
1.50 daniel 2716: /**
2717: * xmlStrchr:
1.123 daniel 2718: * @str: the xmlChar * array
2719: * @val: the xmlChar to search
1.50 daniel 2720: *
1.123 daniel 2721: * a strchr for xmlChar's
1.68 daniel 2722: *
1.123 daniel 2723: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2724: */
2725:
1.123 daniel 2726: const xmlChar *
2727: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2728: if (str == NULL) return(NULL);
1.14 veillard 2729: while (*str != 0) {
1.123 daniel 2730: if (*str == val) return((xmlChar *) str);
1.14 veillard 2731: str++;
2732: }
2733: return(NULL);
1.89 daniel 2734: }
2735:
2736: /**
2737: * xmlStrstr:
1.123 daniel 2738: * @str: the xmlChar * array (haystack)
2739: * @val: the xmlChar to search (needle)
1.89 daniel 2740: *
1.123 daniel 2741: * a strstr for xmlChar's
1.89 daniel 2742: *
1.123 daniel 2743: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2744: */
2745:
1.123 daniel 2746: const xmlChar *
2747: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2748: int n;
2749:
2750: if (str == NULL) return(NULL);
2751: if (val == NULL) return(NULL);
2752: n = xmlStrlen(val);
2753:
2754: if (n == 0) return(str);
2755: while (*str != 0) {
2756: if (*str == *val) {
1.123 daniel 2757: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2758: }
2759: str++;
2760: }
2761: return(NULL);
2762: }
2763:
2764: /**
2765: * xmlStrsub:
1.123 daniel 2766: * @str: the xmlChar * array (haystack)
1.89 daniel 2767: * @start: the index of the first char (zero based)
2768: * @len: the length of the substring
2769: *
2770: * Extract a substring of a given string
2771: *
1.123 daniel 2772: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2773: */
2774:
1.123 daniel 2775: xmlChar *
2776: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2777: int i;
2778:
2779: if (str == NULL) return(NULL);
2780: if (start < 0) return(NULL);
1.90 daniel 2781: if (len < 0) return(NULL);
1.89 daniel 2782:
2783: for (i = 0;i < start;i++) {
2784: if (*str == 0) return(NULL);
2785: str++;
2786: }
2787: if (*str == 0) return(NULL);
2788: return(xmlStrndup(str, len));
1.14 veillard 2789: }
1.28 daniel 2790:
1.50 daniel 2791: /**
2792: * xmlStrlen:
1.123 daniel 2793: * @str: the xmlChar * array
1.50 daniel 2794: *
1.127 daniel 2795: * length of a xmlChar's string
1.68 daniel 2796: *
1.123 daniel 2797: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2798: */
2799:
1.55 daniel 2800: int
1.123 daniel 2801: xmlStrlen(const xmlChar *str) {
1.45 daniel 2802: int len = 0;
2803:
2804: if (str == NULL) return(0);
2805: while (*str != 0) {
2806: str++;
2807: len++;
2808: }
2809: return(len);
2810: }
2811:
1.50 daniel 2812: /**
2813: * xmlStrncat:
1.123 daniel 2814: * @cur: the original xmlChar * array
2815: * @add: the xmlChar * array added
1.50 daniel 2816: * @len: the length of @add
2817: *
1.123 daniel 2818: * a strncat for array of xmlChar's
1.68 daniel 2819: *
1.123 daniel 2820: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2821: */
2822:
1.123 daniel 2823: xmlChar *
2824: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2825: int size;
1.123 daniel 2826: xmlChar *ret;
1.45 daniel 2827:
2828: if ((add == NULL) || (len == 0))
2829: return(cur);
2830: if (cur == NULL)
2831: return(xmlStrndup(add, len));
2832:
2833: size = xmlStrlen(cur);
1.123 daniel 2834: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2835: if (ret == NULL) {
1.86 daniel 2836: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2837: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2838: return(cur);
2839: }
1.123 daniel 2840: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2841: ret[size + len] = 0;
2842: return(ret);
2843: }
2844:
1.50 daniel 2845: /**
2846: * xmlStrcat:
1.123 daniel 2847: * @cur: the original xmlChar * array
2848: * @add: the xmlChar * array added
1.50 daniel 2849: *
1.152 daniel 2850: * a strcat for array of xmlChar's. Since they are supposed to be
2851: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2852: * a termination mark of '0'.
1.68 daniel 2853: *
1.123 daniel 2854: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2855: */
1.123 daniel 2856: xmlChar *
2857: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2858: const xmlChar *p = add;
1.45 daniel 2859:
2860: if (add == NULL) return(cur);
2861: if (cur == NULL)
2862: return(xmlStrdup(add));
2863:
1.152 daniel 2864: while (*p != 0) p++;
1.45 daniel 2865: return(xmlStrncat(cur, add, p - add));
2866: }
2867:
2868: /************************************************************************
2869: * *
2870: * Commodity functions, cleanup needed ? *
2871: * *
2872: ************************************************************************/
2873:
1.50 daniel 2874: /**
2875: * areBlanks:
2876: * @ctxt: an XML parser context
1.123 daniel 2877: * @str: a xmlChar *
1.50 daniel 2878: * @len: the size of @str
2879: *
1.45 daniel 2880: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2881: *
1.68 daniel 2882: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2883: */
2884:
1.123 daniel 2885: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2886: int i, ret;
1.45 daniel 2887: xmlNodePtr lastChild;
2888:
1.176 daniel 2889: /*
2890: * Check for xml:space value.
2891: */
2892: if (*(ctxt->space) == 1)
2893: return(0);
2894:
2895: /*
2896: * Check that the string is made of blanks
2897: */
1.45 daniel 2898: for (i = 0;i < len;i++)
2899: if (!(IS_BLANK(str[i]))) return(0);
2900:
1.176 daniel 2901: /*
2902: * Look if the element is mixed content in the Dtd if available
2903: */
1.104 daniel 2904: if (ctxt->myDoc != NULL) {
2905: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2906: if (ret == 0) return(1);
2907: if (ret == 1) return(0);
2908: }
1.176 daniel 2909:
1.104 daniel 2910: /*
1.176 daniel 2911: * Otherwise, heuristic :-\
1.104 daniel 2912: */
1.179 daniel 2913: if (ctxt->keepBlanks)
2914: return(0);
2915: if (RAW != '<') return(0);
2916: if (ctxt->node == NULL) return(0);
2917: if ((ctxt->node->children == NULL) &&
2918: (RAW == '<') && (NXT(1) == '/')) return(0);
2919:
1.45 daniel 2920: lastChild = xmlGetLastChild(ctxt->node);
2921: if (lastChild == NULL) {
2922: if (ctxt->node->content != NULL) return(0);
2923: } else if (xmlNodeIsText(lastChild))
2924: return(0);
1.157 daniel 2925: else if ((ctxt->node->children != NULL) &&
2926: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2927: return(0);
1.45 daniel 2928: return(1);
2929: }
2930:
1.50 daniel 2931: /**
2932: * xmlHandleEntity:
2933: * @ctxt: an XML parser context
2934: * @entity: an XML entity pointer.
2935: *
2936: * Default handling of defined entities, when should we define a new input
1.45 daniel 2937: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2938: *
2939: * OBSOLETE: to be removed at some point.
1.45 daniel 2940: */
2941:
1.55 daniel 2942: void
2943: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2944: int len;
1.50 daniel 2945: xmlParserInputPtr input;
1.45 daniel 2946:
2947: if (entity->content == NULL) {
1.123 daniel 2948: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2949: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2950: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2951: entity->name);
1.59 daniel 2952: ctxt->wellFormed = 0;
1.180 daniel 2953: ctxt->disableSAX = 1;
1.45 daniel 2954: return;
2955: }
2956: len = xmlStrlen(entity->content);
2957: if (len <= 2) goto handle_as_char;
2958:
2959: /*
2960: * Redefine its content as an input stream.
2961: */
1.50 daniel 2962: input = xmlNewEntityInputStream(ctxt, entity);
2963: xmlPushInput(ctxt, input);
1.45 daniel 2964: return;
2965:
2966: handle_as_char:
2967: /*
2968: * Just handle the content as a set of chars.
2969: */
1.171 daniel 2970: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2971: (ctxt->sax->characters != NULL))
1.74 daniel 2972: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2973:
2974: }
2975:
2976: /*
2977: * Forward definition for recusive behaviour.
2978: */
1.77 daniel 2979: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2980: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2981:
1.28 daniel 2982: /************************************************************************
2983: * *
2984: * Extra stuff for namespace support *
2985: * Relates to http://www.w3.org/TR/WD-xml-names *
2986: * *
2987: ************************************************************************/
2988:
1.50 daniel 2989: /**
2990: * xmlNamespaceParseNCName:
2991: * @ctxt: an XML parser context
2992: *
2993: * parse an XML namespace name.
1.28 daniel 2994: *
2995: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2996: *
2997: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2998: * CombiningChar | Extender
1.68 daniel 2999: *
3000: * Returns the namespace name or NULL
1.28 daniel 3001: */
3002:
1.123 daniel 3003: xmlChar *
1.55 daniel 3004: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3005: xmlChar buf[XML_MAX_NAMELEN + 5];
3006: int len = 0, l;
3007: int cur = CUR_CHAR(l);
1.28 daniel 3008:
1.156 daniel 3009: /* load first the value of the char !!! */
1.152 daniel 3010: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3011:
1.152 daniel 3012: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3013: (cur == '.') || (cur == '-') ||
3014: (cur == '_') ||
3015: (IS_COMBINING(cur)) ||
3016: (IS_EXTENDER(cur))) {
3017: COPY_BUF(l,buf,len,cur);
3018: NEXTL(l);
3019: cur = CUR_CHAR(l);
1.91 daniel 3020: if (len >= XML_MAX_NAMELEN) {
3021: fprintf(stderr,
3022: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3023: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3024: (cur == '.') || (cur == '-') ||
3025: (cur == '_') ||
3026: (IS_COMBINING(cur)) ||
3027: (IS_EXTENDER(cur))) {
3028: NEXTL(l);
3029: cur = CUR_CHAR(l);
3030: }
1.91 daniel 3031: break;
3032: }
3033: }
3034: return(xmlStrndup(buf, len));
1.28 daniel 3035: }
3036:
1.50 daniel 3037: /**
3038: * xmlNamespaceParseQName:
3039: * @ctxt: an XML parser context
1.123 daniel 3040: * @prefix: a xmlChar **
1.50 daniel 3041: *
3042: * parse an XML qualified name
1.28 daniel 3043: *
3044: * [NS 5] QName ::= (Prefix ':')? LocalPart
3045: *
3046: * [NS 6] Prefix ::= NCName
3047: *
3048: * [NS 7] LocalPart ::= NCName
1.68 daniel 3049: *
1.127 daniel 3050: * Returns the local part, and prefix is updated
1.50 daniel 3051: * to get the Prefix if any.
1.28 daniel 3052: */
3053:
1.123 daniel 3054: xmlChar *
3055: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3056: xmlChar *ret = NULL;
1.28 daniel 3057:
3058: *prefix = NULL;
3059: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3060: if (RAW == ':') {
1.28 daniel 3061: *prefix = ret;
1.40 daniel 3062: NEXT;
1.28 daniel 3063: ret = xmlNamespaceParseNCName(ctxt);
3064: }
3065:
3066: return(ret);
3067: }
3068:
1.50 daniel 3069: /**
1.72 daniel 3070: * xmlSplitQName:
1.162 daniel 3071: * @ctxt: an XML parser context
1.72 daniel 3072: * @name: an XML parser context
1.123 daniel 3073: * @prefix: a xmlChar **
1.72 daniel 3074: *
3075: * parse an XML qualified name string
3076: *
3077: * [NS 5] QName ::= (Prefix ':')? LocalPart
3078: *
3079: * [NS 6] Prefix ::= NCName
3080: *
3081: * [NS 7] LocalPart ::= NCName
3082: *
1.127 daniel 3083: * Returns the local part, and prefix is updated
1.72 daniel 3084: * to get the Prefix if any.
3085: */
3086:
1.123 daniel 3087: xmlChar *
1.162 daniel 3088: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3089: xmlChar buf[XML_MAX_NAMELEN + 5];
3090: int len = 0;
1.123 daniel 3091: xmlChar *ret = NULL;
3092: const xmlChar *cur = name;
1.162 daniel 3093: int c,l;
1.72 daniel 3094:
3095: *prefix = NULL;
1.113 daniel 3096:
3097: /* xml: prefix is not really a namespace */
3098: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3099: (cur[2] == 'l') && (cur[3] == ':'))
3100: return(xmlStrdup(name));
3101:
1.162 daniel 3102: /* nasty but valid */
3103: if (cur[0] == ':')
3104: return(xmlStrdup(name));
3105:
3106: c = CUR_SCHAR(cur, l);
3107: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3108:
1.162 daniel 3109: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3110: (c == '.') || (c == '-') ||
3111: (c == '_') ||
3112: (IS_COMBINING(c)) ||
3113: (IS_EXTENDER(c))) {
3114: COPY_BUF(l,buf,len,c);
3115: cur += l;
3116: c = CUR_SCHAR(cur, l);
3117: }
1.72 daniel 3118:
1.162 daniel 3119: ret = xmlStrndup(buf, len);
1.72 daniel 3120:
1.162 daniel 3121: if (c == ':') {
3122: cur += l;
1.163 daniel 3123: c = CUR_SCHAR(cur, l);
1.162 daniel 3124: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3125: *prefix = ret;
1.162 daniel 3126: len = 0;
1.72 daniel 3127:
1.162 daniel 3128: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3129: (c == '.') || (c == '-') ||
3130: (c == '_') ||
3131: (IS_COMBINING(c)) ||
3132: (IS_EXTENDER(c))) {
3133: COPY_BUF(l,buf,len,c);
3134: cur += l;
3135: c = CUR_SCHAR(cur, l);
3136: }
1.72 daniel 3137:
1.162 daniel 3138: ret = xmlStrndup(buf, len);
1.72 daniel 3139: }
3140:
3141: return(ret);
3142: }
3143: /**
1.50 daniel 3144: * xmlNamespaceParseNSDef:
3145: * @ctxt: an XML parser context
3146: *
3147: * parse a namespace prefix declaration
1.28 daniel 3148: *
3149: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3150: *
3151: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3152: *
3153: * Returns the namespace name
1.28 daniel 3154: */
3155:
1.123 daniel 3156: xmlChar *
1.55 daniel 3157: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3158: xmlChar *name = NULL;
1.28 daniel 3159:
1.152 daniel 3160: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3161: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3162: (NXT(4) == 's')) {
3163: SKIP(5);
1.152 daniel 3164: if (RAW == ':') {
1.40 daniel 3165: NEXT;
1.28 daniel 3166: name = xmlNamespaceParseNCName(ctxt);
3167: }
3168: }
1.39 daniel 3169: return(name);
1.28 daniel 3170: }
3171:
1.50 daniel 3172: /**
3173: * xmlParseQuotedString:
3174: * @ctxt: an XML parser context
3175: *
1.45 daniel 3176: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3177: * To be removed at next drop of binary compatibility
1.68 daniel 3178: *
3179: * Returns the string parser or NULL.
1.45 daniel 3180: */
1.123 daniel 3181: xmlChar *
1.55 daniel 3182: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3183: xmlChar *buf = NULL;
1.152 daniel 3184: int len = 0,l;
1.140 daniel 3185: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3186: int c;
1.45 daniel 3187:
1.135 daniel 3188: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3189: if (buf == NULL) {
3190: fprintf(stderr, "malloc of %d byte failed\n", size);
3191: return(NULL);
3192: }
1.152 daniel 3193: if (RAW == '"') {
1.45 daniel 3194: NEXT;
1.152 daniel 3195: c = CUR_CHAR(l);
1.135 daniel 3196: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3197: if (len + 5 >= size) {
1.135 daniel 3198: size *= 2;
3199: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3200: if (buf == NULL) {
3201: fprintf(stderr, "realloc of %d byte failed\n", size);
3202: return(NULL);
3203: }
3204: }
1.152 daniel 3205: COPY_BUF(l,buf,len,c);
3206: NEXTL(l);
3207: c = CUR_CHAR(l);
1.135 daniel 3208: }
3209: if (c != '"') {
1.123 daniel 3210: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3211: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3212: ctxt->sax->error(ctxt->userData,
3213: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3214: ctxt->wellFormed = 0;
1.180 daniel 3215: ctxt->disableSAX = 1;
1.55 daniel 3216: } else {
1.45 daniel 3217: NEXT;
3218: }
1.152 daniel 3219: } else if (RAW == '\''){
1.45 daniel 3220: NEXT;
1.135 daniel 3221: c = CUR;
3222: while (IS_CHAR(c) && (c != '\'')) {
3223: if (len + 1 >= size) {
3224: size *= 2;
3225: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3226: if (buf == NULL) {
3227: fprintf(stderr, "realloc of %d byte failed\n", size);
3228: return(NULL);
3229: }
3230: }
3231: buf[len++] = c;
3232: NEXT;
3233: c = CUR;
3234: }
1.152 daniel 3235: if (RAW != '\'') {
1.123 daniel 3236: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3238: ctxt->sax->error(ctxt->userData,
3239: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3240: ctxt->wellFormed = 0;
1.180 daniel 3241: ctxt->disableSAX = 1;
1.55 daniel 3242: } else {
1.45 daniel 3243: NEXT;
3244: }
3245: }
1.135 daniel 3246: return(buf);
1.45 daniel 3247: }
3248:
1.50 daniel 3249: /**
3250: * xmlParseNamespace:
3251: * @ctxt: an XML parser context
3252: *
1.45 daniel 3253: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3254: *
3255: * This is what the older xml-name Working Draft specified, a bunch of
3256: * other stuff may still rely on it, so support is still here as
1.127 daniel 3257: * if it was declared on the root of the Tree:-(
1.110 daniel 3258: *
3259: * To be removed at next drop of binary compatibility
1.45 daniel 3260: */
3261:
1.55 daniel 3262: void
3263: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3264: xmlChar *href = NULL;
3265: xmlChar *prefix = NULL;
1.45 daniel 3266: int garbage = 0;
3267:
3268: /*
3269: * We just skipped "namespace" or "xml:namespace"
3270: */
3271: SKIP_BLANKS;
3272:
1.153 daniel 3273: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3274: /*
3275: * We can have "ns" or "prefix" attributes
3276: * Old encoding as 'href' or 'AS' attributes is still supported
3277: */
1.152 daniel 3278: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3279: garbage = 0;
3280: SKIP(2);
3281: SKIP_BLANKS;
3282:
1.152 daniel 3283: if (RAW != '=') continue;
1.45 daniel 3284: NEXT;
3285: SKIP_BLANKS;
3286:
3287: href = xmlParseQuotedString(ctxt);
3288: SKIP_BLANKS;
1.152 daniel 3289: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3290: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3291: garbage = 0;
3292: SKIP(4);
3293: SKIP_BLANKS;
3294:
1.152 daniel 3295: if (RAW != '=') continue;
1.45 daniel 3296: NEXT;
3297: SKIP_BLANKS;
3298:
3299: href = xmlParseQuotedString(ctxt);
3300: SKIP_BLANKS;
1.152 daniel 3301: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3302: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3303: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3304: garbage = 0;
3305: SKIP(6);
3306: SKIP_BLANKS;
3307:
1.152 daniel 3308: if (RAW != '=') continue;
1.45 daniel 3309: NEXT;
3310: SKIP_BLANKS;
3311:
3312: prefix = xmlParseQuotedString(ctxt);
3313: SKIP_BLANKS;
1.152 daniel 3314: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3315: garbage = 0;
3316: SKIP(2);
3317: SKIP_BLANKS;
3318:
1.152 daniel 3319: if (RAW != '=') continue;
1.45 daniel 3320: NEXT;
3321: SKIP_BLANKS;
3322:
3323: prefix = xmlParseQuotedString(ctxt);
3324: SKIP_BLANKS;
1.152 daniel 3325: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3326: garbage = 0;
1.91 daniel 3327: NEXT;
1.45 daniel 3328: } else {
3329: /*
3330: * Found garbage when parsing the namespace
3331: */
1.122 daniel 3332: if (!garbage) {
1.55 daniel 3333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3334: ctxt->sax->error(ctxt->userData,
3335: "xmlParseNamespace found garbage\n");
3336: }
1.123 daniel 3337: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3338: ctxt->wellFormed = 0;
1.180 daniel 3339: ctxt->disableSAX = 1;
1.45 daniel 3340: NEXT;
3341: }
3342: }
3343:
3344: MOVETO_ENDTAG(CUR_PTR);
3345: NEXT;
3346:
3347: /*
3348: * Register the DTD.
1.72 daniel 3349: if (href != NULL)
3350: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3351: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3352: */
3353:
1.119 daniel 3354: if (prefix != NULL) xmlFree(prefix);
3355: if (href != NULL) xmlFree(href);
1.45 daniel 3356: }
3357:
1.28 daniel 3358: /************************************************************************
3359: * *
3360: * The parser itself *
3361: * Relates to http://www.w3.org/TR/REC-xml *
3362: * *
3363: ************************************************************************/
1.14 veillard 3364:
1.50 daniel 3365: /**
1.97 daniel 3366: * xmlScanName:
3367: * @ctxt: an XML parser context
3368: *
3369: * Trickery: parse an XML name but without consuming the input flow
3370: * Needed for rollback cases.
3371: *
3372: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3373: * CombiningChar | Extender
3374: *
3375: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3376: *
3377: * [6] Names ::= Name (S Name)*
3378: *
3379: * Returns the Name parsed or NULL
3380: */
3381:
1.123 daniel 3382: xmlChar *
1.97 daniel 3383: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3384: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3385: int len = 0;
3386:
3387: GROW;
1.152 daniel 3388: if (!IS_LETTER(RAW) && (RAW != '_') &&
3389: (RAW != ':')) {
1.97 daniel 3390: return(NULL);
3391: }
3392:
3393: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3394: (NXT(len) == '.') || (NXT(len) == '-') ||
3395: (NXT(len) == '_') || (NXT(len) == ':') ||
3396: (IS_COMBINING(NXT(len))) ||
3397: (IS_EXTENDER(NXT(len)))) {
3398: buf[len] = NXT(len);
3399: len++;
3400: if (len >= XML_MAX_NAMELEN) {
3401: fprintf(stderr,
3402: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3403: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3404: (NXT(len) == '.') || (NXT(len) == '-') ||
3405: (NXT(len) == '_') || (NXT(len) == ':') ||
3406: (IS_COMBINING(NXT(len))) ||
3407: (IS_EXTENDER(NXT(len))))
3408: len++;
3409: break;
3410: }
3411: }
3412: return(xmlStrndup(buf, len));
3413: }
3414:
3415: /**
1.50 daniel 3416: * xmlParseName:
3417: * @ctxt: an XML parser context
3418: *
3419: * parse an XML name.
1.22 daniel 3420: *
3421: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3422: * CombiningChar | Extender
3423: *
3424: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3425: *
3426: * [6] Names ::= Name (S Name)*
1.68 daniel 3427: *
3428: * Returns the Name parsed or NULL
1.1 veillard 3429: */
3430:
1.123 daniel 3431: xmlChar *
1.55 daniel 3432: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3433: xmlChar buf[XML_MAX_NAMELEN + 5];
3434: int len = 0, l;
3435: int c;
1.1 veillard 3436:
1.91 daniel 3437: GROW;
1.160 daniel 3438: c = CUR_CHAR(l);
1.190 ! daniel 3439: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
! 3440: (!IS_LETTER(c) && (c != '_') &&
! 3441: (c != ':'))) {
1.91 daniel 3442: return(NULL);
3443: }
1.40 daniel 3444:
1.190 ! daniel 3445: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
! 3446: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
! 3447: (c == '.') || (c == '-') ||
! 3448: (c == '_') || (c == ':') ||
! 3449: (IS_COMBINING(c)) ||
! 3450: (IS_EXTENDER(c)))) {
1.160 daniel 3451: COPY_BUF(l,buf,len,c);
3452: NEXTL(l);
3453: c = CUR_CHAR(l);
1.91 daniel 3454: if (len >= XML_MAX_NAMELEN) {
3455: fprintf(stderr,
3456: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3457: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3458: (c == '.') || (c == '-') ||
3459: (c == '_') || (c == ':') ||
3460: (IS_COMBINING(c)) ||
3461: (IS_EXTENDER(c))) {
3462: NEXTL(l);
3463: c = CUR_CHAR(l);
1.97 daniel 3464: }
1.91 daniel 3465: break;
3466: }
3467: }
3468: return(xmlStrndup(buf, len));
1.22 daniel 3469: }
3470:
1.50 daniel 3471: /**
1.135 daniel 3472: * xmlParseStringName:
3473: * @ctxt: an XML parser context
3474: * @str: a pointer to an index in the string
3475: *
3476: * parse an XML name.
3477: *
3478: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3479: * CombiningChar | Extender
3480: *
3481: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3482: *
3483: * [6] Names ::= Name (S Name)*
3484: *
3485: * Returns the Name parsed or NULL. The str pointer
3486: * is updated to the current location in the string.
3487: */
3488:
3489: xmlChar *
3490: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3491: xmlChar buf[XML_MAX_NAMELEN + 5];
3492: const xmlChar *cur = *str;
3493: int len = 0, l;
3494: int c;
1.135 daniel 3495:
1.176 daniel 3496: c = CUR_SCHAR(cur, l);
3497: if (!IS_LETTER(c) && (c != '_') &&
3498: (c != ':')) {
1.135 daniel 3499: return(NULL);
3500: }
3501:
1.176 daniel 3502: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3503: (c == '.') || (c == '-') ||
3504: (c == '_') || (c == ':') ||
3505: (IS_COMBINING(c)) ||
3506: (IS_EXTENDER(c))) {
3507: COPY_BUF(l,buf,len,c);
3508: cur += l;
3509: c = CUR_SCHAR(cur, l);
3510: if (len >= XML_MAX_NAMELEN) {
3511: fprintf(stderr,
3512: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3513: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3514: (c == '.') || (c == '-') ||
3515: (c == '_') || (c == ':') ||
3516: (IS_COMBINING(c)) ||
3517: (IS_EXTENDER(c))) {
3518: cur += l;
3519: c = CUR_SCHAR(cur, l);
3520: }
3521: break;
3522: }
1.135 daniel 3523: }
1.176 daniel 3524: *str = cur;
3525: return(xmlStrndup(buf, len));
1.135 daniel 3526: }
3527:
3528: /**
1.50 daniel 3529: * xmlParseNmtoken:
3530: * @ctxt: an XML parser context
3531: *
3532: * parse an XML Nmtoken.
1.22 daniel 3533: *
3534: * [7] Nmtoken ::= (NameChar)+
3535: *
3536: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3537: *
3538: * Returns the Nmtoken parsed or NULL
1.22 daniel 3539: */
3540:
1.123 daniel 3541: xmlChar *
1.55 daniel 3542: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3543: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3544: int len = 0;
1.160 daniel 3545: int c,l;
1.22 daniel 3546:
1.91 daniel 3547: GROW;
1.160 daniel 3548: c = CUR_CHAR(l);
3549: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3550: (c == '.') || (c == '-') ||
3551: (c == '_') || (c == ':') ||
3552: (IS_COMBINING(c)) ||
3553: (IS_EXTENDER(c))) {
3554: COPY_BUF(l,buf,len,c);
3555: NEXTL(l);
3556: c = CUR_CHAR(l);
1.91 daniel 3557: if (len >= XML_MAX_NAMELEN) {
3558: fprintf(stderr,
3559: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3560: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3561: (c == '.') || (c == '-') ||
3562: (c == '_') || (c == ':') ||
3563: (IS_COMBINING(c)) ||
3564: (IS_EXTENDER(c))) {
3565: NEXTL(l);
3566: c = CUR_CHAR(l);
3567: }
1.91 daniel 3568: break;
3569: }
3570: }
1.168 daniel 3571: if (len == 0)
3572: return(NULL);
1.91 daniel 3573: return(xmlStrndup(buf, len));
1.1 veillard 3574: }
3575:
1.50 daniel 3576: /**
3577: * xmlParseEntityValue:
3578: * @ctxt: an XML parser context
1.78 daniel 3579: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3580: *
3581: * parse a value for ENTITY decl.
1.24 daniel 3582: *
3583: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3584: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3585: *
1.78 daniel 3586: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3587: */
3588:
1.123 daniel 3589: xmlChar *
3590: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3591: xmlChar *buf = NULL;
3592: int len = 0;
1.140 daniel 3593: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3594: int c, l;
1.135 daniel 3595: xmlChar stop;
1.123 daniel 3596: xmlChar *ret = NULL;
1.176 daniel 3597: const xmlChar *cur = NULL;
1.98 daniel 3598: xmlParserInputPtr input;
1.24 daniel 3599:
1.152 daniel 3600: if (RAW == '"') stop = '"';
3601: else if (RAW == '\'') stop = '\'';
1.135 daniel 3602: else {
3603: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3604: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3605: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3606: ctxt->wellFormed = 0;
1.180 daniel 3607: ctxt->disableSAX = 1;
1.135 daniel 3608: return(NULL);
3609: }
3610: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3611: if (buf == NULL) {
3612: fprintf(stderr, "malloc of %d byte failed\n", size);
3613: return(NULL);
3614: }
1.94 daniel 3615:
1.135 daniel 3616: /*
3617: * The content of the entity definition is copied in a buffer.
3618: */
1.94 daniel 3619:
1.135 daniel 3620: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3621: input = ctxt->input;
3622: GROW;
3623: NEXT;
1.152 daniel 3624: c = CUR_CHAR(l);
1.135 daniel 3625: /*
3626: * NOTE: 4.4.5 Included in Literal
3627: * When a parameter entity reference appears in a literal entity
3628: * value, ... a single or double quote character in the replacement
3629: * text is always treated as a normal data character and will not
3630: * terminate the literal.
3631: * In practice it means we stop the loop only when back at parsing
3632: * the initial entity and the quote is found
3633: */
3634: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3635: if (len + 5 >= size) {
1.135 daniel 3636: size *= 2;
3637: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3638: if (buf == NULL) {
3639: fprintf(stderr, "realloc of %d byte failed\n", size);
3640: return(NULL);
1.94 daniel 3641: }
1.79 daniel 3642: }
1.152 daniel 3643: COPY_BUF(l,buf,len,c);
3644: NEXTL(l);
1.98 daniel 3645: /*
1.135 daniel 3646: * Pop-up of finished entities.
1.98 daniel 3647: */
1.152 daniel 3648: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3649: xmlPopInput(ctxt);
1.152 daniel 3650:
3651: c = CUR_CHAR(l);
1.135 daniel 3652: if (c == 0) {
1.94 daniel 3653: GROW;
1.152 daniel 3654: c = CUR_CHAR(l);
1.79 daniel 3655: }
1.135 daniel 3656: }
3657: buf[len] = 0;
3658:
3659: /*
1.176 daniel 3660: * Raise problem w.r.t. '&' and '%' being used in non-entities
3661: * reference constructs. Note Charref will be handled in
3662: * xmlStringDecodeEntities()
3663: */
3664: cur = buf;
3665: while (*cur != 0) {
3666: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3667: xmlChar *name;
3668: xmlChar tmp = *cur;
3669:
3670: cur++;
3671: name = xmlParseStringName(ctxt, &cur);
3672: if ((name == NULL) || (*cur != ';')) {
3673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3674: ctxt->sax->error(ctxt->userData,
3675: "EntityValue: '%c' forbidden except for entities references\n",
3676: tmp);
3677: ctxt->wellFormed = 0;
1.180 daniel 3678: ctxt->disableSAX = 1;
1.176 daniel 3679: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3680: }
3681: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3683: ctxt->sax->error(ctxt->userData,
3684: "EntityValue: PEReferences forbidden in internal subset\n",
3685: tmp);
3686: ctxt->wellFormed = 0;
1.180 daniel 3687: ctxt->disableSAX = 1;
1.176 daniel 3688: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3689: }
3690: if (name != NULL)
3691: xmlFree(name);
3692: }
3693: cur++;
3694: }
3695:
3696: /*
1.135 daniel 3697: * Then PEReference entities are substituted.
3698: */
3699: if (c != stop) {
3700: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3702: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3703: ctxt->wellFormed = 0;
1.180 daniel 3704: ctxt->disableSAX = 1;
1.170 daniel 3705: xmlFree(buf);
1.135 daniel 3706: } else {
3707: NEXT;
3708: /*
3709: * NOTE: 4.4.7 Bypassed
3710: * When a general entity reference appears in the EntityValue in
3711: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3712: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3713: */
3714: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3715: 0, 0, 0);
3716: if (orig != NULL)
3717: *orig = buf;
3718: else
3719: xmlFree(buf);
1.24 daniel 3720: }
3721:
3722: return(ret);
3723: }
3724:
1.50 daniel 3725: /**
3726: * xmlParseAttValue:
3727: * @ctxt: an XML parser context
3728: *
3729: * parse a value for an attribute
1.78 daniel 3730: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3731: * will be handled later in xmlStringGetNodeList
1.29 daniel 3732: *
3733: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3734: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3735: *
1.129 daniel 3736: * 3.3.3 Attribute-Value Normalization:
3737: * Before the value of an attribute is passed to the application or
3738: * checked for validity, the XML processor must normalize it as follows:
3739: * - a character reference is processed by appending the referenced
3740: * character to the attribute value
3741: * - an entity reference is processed by recursively processing the
3742: * replacement text of the entity
3743: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3744: * appending #x20 to the normalized value, except that only a single
3745: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3746: * parsed entity or the literal entity value of an internal parsed entity
3747: * - other characters are processed by appending them to the normalized value
1.130 daniel 3748: * If the declared value is not CDATA, then the XML processor must further
3749: * process the normalized attribute value by discarding any leading and
3750: * trailing space (#x20) characters, and by replacing sequences of space
3751: * (#x20) characters by a single space (#x20) character.
3752: * All attributes for which no declaration has been read should be treated
3753: * by a non-validating parser as if declared CDATA.
1.129 daniel 3754: *
3755: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3756: */
3757:
1.123 daniel 3758: xmlChar *
1.55 daniel 3759: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3760: xmlChar limit = 0;
3761: xmlChar *buffer = NULL;
3762: int buffer_size = 0;
3763: xmlChar *out = NULL;
3764:
3765: xmlChar *current = NULL;
3766: xmlEntityPtr ent;
3767: xmlChar cur;
3768:
1.29 daniel 3769:
1.91 daniel 3770: SHRINK;
1.151 daniel 3771: if (NXT(0) == '"') {
1.96 daniel 3772: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3773: limit = '"';
1.40 daniel 3774: NEXT;
1.151 daniel 3775: } else if (NXT(0) == '\'') {
1.129 daniel 3776: limit = '\'';
1.96 daniel 3777: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3778: NEXT;
1.29 daniel 3779: } else {
1.123 daniel 3780: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3782: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3783: ctxt->wellFormed = 0;
1.180 daniel 3784: ctxt->disableSAX = 1;
1.129 daniel 3785: return(NULL);
1.29 daniel 3786: }
3787:
1.129 daniel 3788: /*
3789: * allocate a translation buffer.
3790: */
1.140 daniel 3791: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3792: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3793: if (buffer == NULL) {
3794: perror("xmlParseAttValue: malloc failed");
3795: return(NULL);
3796: }
3797: out = buffer;
3798:
3799: /*
3800: * Ok loop until we reach one of the ending char or a size limit.
3801: */
3802: cur = CUR;
1.156 daniel 3803: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3804: if (cur == 0) break;
3805: if ((cur == '&') && (NXT(1) == '#')) {
3806: int val = xmlParseCharRef(ctxt);
3807: *out++ = val;
3808: } else if (cur == '&') {
3809: ent = xmlParseEntityRef(ctxt);
3810: if ((ent != NULL) &&
3811: (ctxt->replaceEntities != 0)) {
1.185 daniel 3812: xmlChar *rep;
3813:
1.186 daniel 3814: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3815: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3816: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3817: if (rep != NULL) {
3818: current = rep;
3819: while (*current != 0) {
3820: *out++ = *current++;
3821: if (out - buffer > buffer_size - 10) {
3822: int index = out - buffer;
1.129 daniel 3823:
1.186 daniel 3824: growBuffer(buffer);
3825: out = &buffer[index];
3826: }
1.185 daniel 3827: }
1.186 daniel 3828: xmlFree(rep);
1.129 daniel 3829: }
1.186 daniel 3830: } else {
3831: if (ent->content != NULL)
3832: *out++ = ent->content[0];
1.129 daniel 3833: }
3834: } else if (ent != NULL) {
3835: int i = xmlStrlen(ent->name);
3836: const xmlChar *cur = ent->name;
3837:
1.186 daniel 3838: /*
3839: * This may look absurd but is needed to detect
3840: * entities problems
3841: */
3842: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3843: xmlChar *rep;
3844: rep = xmlStringDecodeEntities(ctxt, ent->content,
3845: XML_SUBSTITUTE_REF, 0, 0, 0);
3846: if (rep != NULL)
3847: xmlFree(rep);
3848: }
3849:
3850: /*
3851: * Just output the reference
3852: */
1.129 daniel 3853: *out++ = '&';
3854: if (out - buffer > buffer_size - i - 10) {
3855: int index = out - buffer;
3856:
3857: growBuffer(buffer);
3858: out = &buffer[index];
3859: }
3860: for (;i > 0;i--)
3861: *out++ = *cur++;
3862: *out++ = ';';
3863: }
3864: } else {
1.156 daniel 3865: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3866: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3867: *out++ = 0x20;
3868: if (out - buffer > buffer_size - 10) {
3869: int index = out - buffer;
3870:
3871: growBuffer(buffer);
3872: out = &buffer[index];
1.129 daniel 3873: }
3874: } else {
3875: *out++ = cur;
3876: if (out - buffer > buffer_size - 10) {
3877: int index = out - buffer;
3878:
3879: growBuffer(buffer);
3880: out = &buffer[index];
3881: }
3882: }
3883: NEXT;
3884: }
3885: cur = CUR;
3886: }
3887: *out++ = 0;
1.152 daniel 3888: if (RAW == '<') {
1.129 daniel 3889: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3890: ctxt->sax->error(ctxt->userData,
3891: "Unescaped '<' not allowed in attributes values\n");
3892: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3893: ctxt->wellFormed = 0;
1.180 daniel 3894: ctxt->disableSAX = 1;
1.152 daniel 3895: } else if (RAW != limit) {
1.129 daniel 3896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3897: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3898: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3899: ctxt->wellFormed = 0;
1.180 daniel 3900: ctxt->disableSAX = 1;
1.129 daniel 3901: } else
3902: NEXT;
3903: return(buffer);
1.29 daniel 3904: }
3905:
1.50 daniel 3906: /**
3907: * xmlParseSystemLiteral:
3908: * @ctxt: an XML parser context
3909: *
3910: * parse an XML Literal
1.21 daniel 3911: *
1.22 daniel 3912: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3913: *
3914: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3915: */
3916:
1.123 daniel 3917: xmlChar *
1.55 daniel 3918: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3919: xmlChar *buf = NULL;
3920: int len = 0;
1.140 daniel 3921: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3922: int cur, l;
1.135 daniel 3923: xmlChar stop;
1.168 daniel 3924: int state = ctxt->instate;
1.21 daniel 3925:
1.91 daniel 3926: SHRINK;
1.152 daniel 3927: if (RAW == '"') {
1.40 daniel 3928: NEXT;
1.135 daniel 3929: stop = '"';
1.152 daniel 3930: } else if (RAW == '\'') {
1.40 daniel 3931: NEXT;
1.135 daniel 3932: stop = '\'';
1.21 daniel 3933: } else {
1.55 daniel 3934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3935: ctxt->sax->error(ctxt->userData,
3936: "SystemLiteral \" or ' expected\n");
1.123 daniel 3937: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3938: ctxt->wellFormed = 0;
1.180 daniel 3939: ctxt->disableSAX = 1;
1.135 daniel 3940: return(NULL);
1.21 daniel 3941: }
3942:
1.135 daniel 3943: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3944: if (buf == NULL) {
3945: fprintf(stderr, "malloc of %d byte failed\n", size);
3946: return(NULL);
3947: }
1.168 daniel 3948: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3949: cur = CUR_CHAR(l);
1.135 daniel 3950: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3951: if (len + 5 >= size) {
1.135 daniel 3952: size *= 2;
3953: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3954: if (buf == NULL) {
3955: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3956: ctxt->instate = state;
1.135 daniel 3957: return(NULL);
3958: }
3959: }
1.152 daniel 3960: COPY_BUF(l,buf,len,cur);
3961: NEXTL(l);
3962: cur = CUR_CHAR(l);
1.135 daniel 3963: if (cur == 0) {
3964: GROW;
3965: SHRINK;
1.152 daniel 3966: cur = CUR_CHAR(l);
1.135 daniel 3967: }
3968: }
3969: buf[len] = 0;
1.168 daniel 3970: ctxt->instate = state;
1.135 daniel 3971: if (!IS_CHAR(cur)) {
3972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3973: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3974: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3975: ctxt->wellFormed = 0;
1.180 daniel 3976: ctxt->disableSAX = 1;
1.135 daniel 3977: } else {
3978: NEXT;
3979: }
3980: return(buf);
1.21 daniel 3981: }
3982:
1.50 daniel 3983: /**
3984: * xmlParsePubidLiteral:
3985: * @ctxt: an XML parser context
1.21 daniel 3986: *
1.50 daniel 3987: * parse an XML public literal
1.68 daniel 3988: *
3989: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3990: *
3991: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3992: */
3993:
1.123 daniel 3994: xmlChar *
1.55 daniel 3995: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3996: xmlChar *buf = NULL;
3997: int len = 0;
1.140 daniel 3998: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3999: xmlChar cur;
4000: xmlChar stop;
1.125 daniel 4001:
1.91 daniel 4002: SHRINK;
1.152 daniel 4003: if (RAW == '"') {
1.40 daniel 4004: NEXT;
1.135 daniel 4005: stop = '"';
1.152 daniel 4006: } else if (RAW == '\'') {
1.40 daniel 4007: NEXT;
1.135 daniel 4008: stop = '\'';
1.21 daniel 4009: } else {
1.55 daniel 4010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4011: ctxt->sax->error(ctxt->userData,
4012: "SystemLiteral \" or ' expected\n");
1.123 daniel 4013: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4014: ctxt->wellFormed = 0;
1.180 daniel 4015: ctxt->disableSAX = 1;
1.135 daniel 4016: return(NULL);
4017: }
4018: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4019: if (buf == NULL) {
4020: fprintf(stderr, "malloc of %d byte failed\n", size);
4021: return(NULL);
4022: }
4023: cur = CUR;
4024: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4025: if (len + 1 >= size) {
4026: size *= 2;
4027: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4028: if (buf == NULL) {
4029: fprintf(stderr, "realloc of %d byte failed\n", size);
4030: return(NULL);
4031: }
4032: }
4033: buf[len++] = cur;
4034: NEXT;
4035: cur = CUR;
4036: if (cur == 0) {
4037: GROW;
4038: SHRINK;
4039: cur = CUR;
4040: }
4041: }
4042: buf[len] = 0;
4043: if (cur != stop) {
4044: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4045: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4046: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4047: ctxt->wellFormed = 0;
1.180 daniel 4048: ctxt->disableSAX = 1;
1.135 daniel 4049: } else {
4050: NEXT;
1.21 daniel 4051: }
1.135 daniel 4052: return(buf);
1.21 daniel 4053: }
4054:
1.50 daniel 4055: /**
4056: * xmlParseCharData:
4057: * @ctxt: an XML parser context
4058: * @cdata: int indicating whether we are within a CDATA section
4059: *
4060: * parse a CharData section.
4061: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4062: *
1.151 daniel 4063: * The right angle bracket (>) may be represented using the string ">",
4064: * and must, for compatibility, be escaped using ">" or a character
4065: * reference when it appears in the string "]]>" in content, when that
4066: * string is not marking the end of a CDATA section.
4067: *
1.27 daniel 4068: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4069: */
4070:
1.55 daniel 4071: void
4072: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4073: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4074: int nbchar = 0;
1.152 daniel 4075: int cur, l;
1.27 daniel 4076:
1.91 daniel 4077: SHRINK;
1.152 daniel 4078: cur = CUR_CHAR(l);
1.190 ! daniel 4079: while (((cur != '<') || (ctxt->token == '<')) &&
! 4080: ((cur != '&') || (ctxt->token == '&')) &&
! 4081: (IS_CHAR(cur))) {
1.97 daniel 4082: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4083: (NXT(2) == '>')) {
4084: if (cdata) break;
4085: else {
4086: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4087: ctxt->sax->error(ctxt->userData,
1.59 daniel 4088: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4089: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4090: /* Should this be relaxed ??? I see a "must here */
4091: ctxt->wellFormed = 0;
1.180 daniel 4092: ctxt->disableSAX = 1;
1.59 daniel 4093: }
4094: }
1.152 daniel 4095: COPY_BUF(l,buf,nbchar,cur);
4096: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4097: /*
4098: * Ok the segment is to be consumed as chars.
4099: */
1.171 daniel 4100: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4101: if (areBlanks(ctxt, buf, nbchar)) {
4102: if (ctxt->sax->ignorableWhitespace != NULL)
4103: ctxt->sax->ignorableWhitespace(ctxt->userData,
4104: buf, nbchar);
4105: } else {
4106: if (ctxt->sax->characters != NULL)
4107: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4108: }
4109: }
4110: nbchar = 0;
4111: }
1.152 daniel 4112: NEXTL(l);
4113: cur = CUR_CHAR(l);
1.27 daniel 4114: }
1.91 daniel 4115: if (nbchar != 0) {
4116: /*
4117: * Ok the segment is to be consumed as chars.
4118: */
1.171 daniel 4119: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4120: if (areBlanks(ctxt, buf, nbchar)) {
4121: if (ctxt->sax->ignorableWhitespace != NULL)
4122: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4123: } else {
4124: if (ctxt->sax->characters != NULL)
4125: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4126: }
4127: }
1.45 daniel 4128: }
1.27 daniel 4129: }
4130:
1.50 daniel 4131: /**
4132: * xmlParseExternalID:
4133: * @ctxt: an XML parser context
1.123 daniel 4134: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4135: * @strict: indicate whether we should restrict parsing to only
4136: * production [75], see NOTE below
1.50 daniel 4137: *
1.67 daniel 4138: * Parse an External ID or a Public ID
4139: *
4140: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4141: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4142: *
4143: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4144: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4145: *
4146: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4147: *
1.68 daniel 4148: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4149: * case publicID receives PubidLiteral, is strict is off
4150: * it is possible to return NULL and have publicID set.
1.22 daniel 4151: */
4152:
1.123 daniel 4153: xmlChar *
4154: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4155: xmlChar *URI = NULL;
1.22 daniel 4156:
1.91 daniel 4157: SHRINK;
1.152 daniel 4158: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4159: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4160: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4161: SKIP(6);
1.59 daniel 4162: if (!IS_BLANK(CUR)) {
4163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4164: ctxt->sax->error(ctxt->userData,
1.59 daniel 4165: "Space required after 'SYSTEM'\n");
1.123 daniel 4166: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4167: ctxt->wellFormed = 0;
1.180 daniel 4168: ctxt->disableSAX = 1;
1.59 daniel 4169: }
1.42 daniel 4170: SKIP_BLANKS;
1.39 daniel 4171: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4172: if (URI == NULL) {
1.55 daniel 4173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4174: ctxt->sax->error(ctxt->userData,
1.39 daniel 4175: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4176: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4177: ctxt->wellFormed = 0;
1.180 daniel 4178: ctxt->disableSAX = 1;
1.59 daniel 4179: }
1.152 daniel 4180: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4181: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4182: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4183: SKIP(6);
1.59 daniel 4184: if (!IS_BLANK(CUR)) {
4185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4186: ctxt->sax->error(ctxt->userData,
1.59 daniel 4187: "Space required after 'PUBLIC'\n");
1.123 daniel 4188: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4189: ctxt->wellFormed = 0;
1.180 daniel 4190: ctxt->disableSAX = 1;
1.59 daniel 4191: }
1.42 daniel 4192: SKIP_BLANKS;
1.39 daniel 4193: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4194: if (*publicID == NULL) {
1.55 daniel 4195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4196: ctxt->sax->error(ctxt->userData,
1.39 daniel 4197: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4198: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4199: ctxt->wellFormed = 0;
1.180 daniel 4200: ctxt->disableSAX = 1;
1.59 daniel 4201: }
1.67 daniel 4202: if (strict) {
4203: /*
4204: * We don't handle [83] so "S SystemLiteral" is required.
4205: */
4206: if (!IS_BLANK(CUR)) {
4207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4208: ctxt->sax->error(ctxt->userData,
1.67 daniel 4209: "Space required after the Public Identifier\n");
1.123 daniel 4210: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4211: ctxt->wellFormed = 0;
1.180 daniel 4212: ctxt->disableSAX = 1;
1.67 daniel 4213: }
4214: } else {
4215: /*
4216: * We handle [83] so we return immediately, if
4217: * "S SystemLiteral" is not detected. From a purely parsing
4218: * point of view that's a nice mess.
4219: */
1.135 daniel 4220: const xmlChar *ptr;
4221: GROW;
4222:
4223: ptr = CUR_PTR;
1.67 daniel 4224: if (!IS_BLANK(*ptr)) return(NULL);
4225:
4226: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4227: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4228: }
1.42 daniel 4229: SKIP_BLANKS;
1.39 daniel 4230: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4231: if (URI == NULL) {
1.55 daniel 4232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4233: ctxt->sax->error(ctxt->userData,
1.39 daniel 4234: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4235: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4236: ctxt->wellFormed = 0;
1.180 daniel 4237: ctxt->disableSAX = 1;
1.59 daniel 4238: }
1.22 daniel 4239: }
1.39 daniel 4240: return(URI);
1.22 daniel 4241: }
4242:
1.50 daniel 4243: /**
4244: * xmlParseComment:
1.69 daniel 4245: * @ctxt: an XML parser context
1.50 daniel 4246: *
1.3 veillard 4247: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4248: * The spec says that "For compatibility, the string "--" (double-hyphen)
4249: * must not occur within comments. "
1.22 daniel 4250: *
4251: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4252: */
1.72 daniel 4253: void
1.114 daniel 4254: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4255: xmlChar *buf = NULL;
4256: int len = 0;
1.140 daniel 4257: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4258: int q, ql;
4259: int r, rl;
4260: int cur, l;
1.140 daniel 4261: xmlParserInputState state;
1.187 daniel 4262: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4263:
4264: /*
1.22 daniel 4265: * Check that there is a comment right here.
1.3 veillard 4266: */
1.152 daniel 4267: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4268: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4269:
1.140 daniel 4270: state = ctxt->instate;
1.97 daniel 4271: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4272: SHRINK;
1.40 daniel 4273: SKIP(4);
1.135 daniel 4274: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4275: if (buf == NULL) {
4276: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4277: ctxt->instate = state;
1.135 daniel 4278: return;
4279: }
1.152 daniel 4280: q = CUR_CHAR(ql);
4281: NEXTL(ql);
4282: r = CUR_CHAR(rl);
4283: NEXTL(rl);
4284: cur = CUR_CHAR(l);
1.135 daniel 4285: while (IS_CHAR(cur) &&
4286: ((cur != '>') ||
4287: (r != '-') || (q != '-'))) {
4288: if ((r == '-') && (q == '-')) {
1.55 daniel 4289: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4290: ctxt->sax->error(ctxt->userData,
1.38 daniel 4291: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4292: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4293: ctxt->wellFormed = 0;
1.180 daniel 4294: ctxt->disableSAX = 1;
1.59 daniel 4295: }
1.152 daniel 4296: if (len + 5 >= size) {
1.135 daniel 4297: size *= 2;
4298: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4299: if (buf == NULL) {
4300: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4301: ctxt->instate = state;
1.135 daniel 4302: return;
4303: }
4304: }
1.152 daniel 4305: COPY_BUF(ql,buf,len,q);
1.135 daniel 4306: q = r;
1.152 daniel 4307: ql = rl;
1.135 daniel 4308: r = cur;
1.152 daniel 4309: rl = l;
4310: NEXTL(l);
4311: cur = CUR_CHAR(l);
1.135 daniel 4312: if (cur == 0) {
4313: SHRINK;
4314: GROW;
1.152 daniel 4315: cur = CUR_CHAR(l);
1.135 daniel 4316: }
1.3 veillard 4317: }
1.135 daniel 4318: buf[len] = 0;
4319: if (!IS_CHAR(cur)) {
1.55 daniel 4320: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4321: ctxt->sax->error(ctxt->userData,
1.135 daniel 4322: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4323: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4324: ctxt->wellFormed = 0;
1.180 daniel 4325: ctxt->disableSAX = 1;
1.178 daniel 4326: xmlFree(buf);
1.3 veillard 4327: } else {
1.187 daniel 4328: if (input != ctxt->input) {
4329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4330: ctxt->sax->error(ctxt->userData,
4331: "Comment doesn't start and stop in the same entity\n");
4332: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4333: ctxt->wellFormed = 0;
4334: ctxt->disableSAX = 1;
4335: }
1.40 daniel 4336: NEXT;
1.171 daniel 4337: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4338: (!ctxt->disableSAX))
1.135 daniel 4339: ctxt->sax->comment(ctxt->userData, buf);
4340: xmlFree(buf);
1.3 veillard 4341: }
1.140 daniel 4342: ctxt->instate = state;
1.3 veillard 4343: }
4344:
1.50 daniel 4345: /**
4346: * xmlParsePITarget:
4347: * @ctxt: an XML parser context
4348: *
4349: * parse the name of a PI
1.22 daniel 4350: *
4351: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4352: *
4353: * Returns the PITarget name or NULL
1.22 daniel 4354: */
4355:
1.123 daniel 4356: xmlChar *
1.55 daniel 4357: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4358: xmlChar *name;
1.22 daniel 4359:
4360: name = xmlParseName(ctxt);
1.139 daniel 4361: if ((name != NULL) &&
1.22 daniel 4362: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4363: ((name[1] == 'm') || (name[1] == 'M')) &&
4364: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4365: int i;
1.177 daniel 4366: if ((name[0] == 'x') && (name[1] == 'm') &&
4367: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4369: ctxt->sax->error(ctxt->userData,
4370: "XML declaration allowed only at the start of the document\n");
4371: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4372: ctxt->wellFormed = 0;
1.180 daniel 4373: ctxt->disableSAX = 1;
1.151 daniel 4374: return(name);
4375: } else if (name[3] == 0) {
4376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4377: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4378: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4379: ctxt->wellFormed = 0;
1.180 daniel 4380: ctxt->disableSAX = 1;
1.151 daniel 4381: return(name);
4382: }
1.139 daniel 4383: for (i = 0;;i++) {
4384: if (xmlW3CPIs[i] == NULL) break;
4385: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4386: return(name);
4387: }
4388: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4389: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4390: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4391: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4392: }
1.22 daniel 4393: }
4394: return(name);
4395: }
4396:
1.50 daniel 4397: /**
4398: * xmlParsePI:
4399: * @ctxt: an XML parser context
4400: *
4401: * parse an XML Processing Instruction.
1.22 daniel 4402: *
4403: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4404: *
1.69 daniel 4405: * The processing is transfered to SAX once parsed.
1.3 veillard 4406: */
4407:
1.55 daniel 4408: void
4409: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4410: xmlChar *buf = NULL;
4411: int len = 0;
1.140 daniel 4412: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4413: int cur, l;
1.123 daniel 4414: xmlChar *target;
1.140 daniel 4415: xmlParserInputState state;
1.22 daniel 4416:
1.152 daniel 4417: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4418: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4419: state = ctxt->instate;
4420: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4421: /*
4422: * this is a Processing Instruction.
4423: */
1.40 daniel 4424: SKIP(2);
1.91 daniel 4425: SHRINK;
1.3 veillard 4426:
4427: /*
1.22 daniel 4428: * Parse the target name and check for special support like
4429: * namespace.
1.3 veillard 4430: */
1.22 daniel 4431: target = xmlParsePITarget(ctxt);
4432: if (target != NULL) {
1.156 daniel 4433: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4434: if (input != ctxt->input) {
4435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4436: ctxt->sax->error(ctxt->userData,
4437: "PI declaration doesn't start and stop in the same entity\n");
4438: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4439: ctxt->wellFormed = 0;
4440: ctxt->disableSAX = 1;
4441: }
1.156 daniel 4442: SKIP(2);
4443:
4444: /*
4445: * SAX: PI detected.
4446: */
1.171 daniel 4447: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4448: (ctxt->sax->processingInstruction != NULL))
4449: ctxt->sax->processingInstruction(ctxt->userData,
4450: target, NULL);
4451: ctxt->instate = state;
1.170 daniel 4452: xmlFree(target);
1.156 daniel 4453: return;
4454: }
1.135 daniel 4455: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4456: if (buf == NULL) {
4457: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4458: ctxt->instate = state;
1.135 daniel 4459: return;
4460: }
4461: cur = CUR;
4462: if (!IS_BLANK(cur)) {
1.114 daniel 4463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4464: ctxt->sax->error(ctxt->userData,
4465: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4466: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4467: ctxt->wellFormed = 0;
1.180 daniel 4468: ctxt->disableSAX = 1;
1.114 daniel 4469: }
4470: SKIP_BLANKS;
1.152 daniel 4471: cur = CUR_CHAR(l);
1.135 daniel 4472: while (IS_CHAR(cur) &&
4473: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4474: if (len + 5 >= size) {
1.135 daniel 4475: size *= 2;
4476: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4477: if (buf == NULL) {
4478: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4479: ctxt->instate = state;
1.135 daniel 4480: return;
4481: }
4482: }
1.152 daniel 4483: COPY_BUF(l,buf,len,cur);
4484: NEXTL(l);
4485: cur = CUR_CHAR(l);
1.135 daniel 4486: if (cur == 0) {
4487: SHRINK;
4488: GROW;
1.152 daniel 4489: cur = CUR_CHAR(l);
1.135 daniel 4490: }
4491: }
4492: buf[len] = 0;
1.152 daniel 4493: if (cur != '?') {
1.72 daniel 4494: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4495: ctxt->sax->error(ctxt->userData,
1.72 daniel 4496: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4497: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4498: ctxt->wellFormed = 0;
1.180 daniel 4499: ctxt->disableSAX = 1;
1.22 daniel 4500: } else {
1.187 daniel 4501: if (input != ctxt->input) {
4502: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4503: ctxt->sax->error(ctxt->userData,
4504: "PI declaration doesn't start and stop in the same entity\n");
4505: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4506: ctxt->wellFormed = 0;
4507: ctxt->disableSAX = 1;
4508: }
1.72 daniel 4509: SKIP(2);
1.44 daniel 4510:
1.72 daniel 4511: /*
4512: * SAX: PI detected.
4513: */
1.171 daniel 4514: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4515: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4516: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4517: target, buf);
1.22 daniel 4518: }
1.135 daniel 4519: xmlFree(buf);
1.119 daniel 4520: xmlFree(target);
1.3 veillard 4521: } else {
1.55 daniel 4522: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4523: ctxt->sax->error(ctxt->userData,
4524: "xmlParsePI : no target name\n");
1.123 daniel 4525: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4526: ctxt->wellFormed = 0;
1.180 daniel 4527: ctxt->disableSAX = 1;
1.22 daniel 4528: }
1.140 daniel 4529: ctxt->instate = state;
1.22 daniel 4530: }
4531: }
4532:
1.50 daniel 4533: /**
4534: * xmlParseNotationDecl:
4535: * @ctxt: an XML parser context
4536: *
4537: * parse a notation declaration
1.22 daniel 4538: *
4539: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4540: *
4541: * Hence there is actually 3 choices:
4542: * 'PUBLIC' S PubidLiteral
4543: * 'PUBLIC' S PubidLiteral S SystemLiteral
4544: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4545: *
1.67 daniel 4546: * See the NOTE on xmlParseExternalID().
1.22 daniel 4547: */
4548:
1.55 daniel 4549: void
4550: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4551: xmlChar *name;
4552: xmlChar *Pubid;
4553: xmlChar *Systemid;
1.22 daniel 4554:
1.152 daniel 4555: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4556: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4557: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4558: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4559: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4560: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4561: SHRINK;
1.40 daniel 4562: SKIP(10);
1.67 daniel 4563: if (!IS_BLANK(CUR)) {
4564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4565: ctxt->sax->error(ctxt->userData,
4566: "Space required after '<!NOTATION'\n");
1.123 daniel 4567: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4568: ctxt->wellFormed = 0;
1.180 daniel 4569: ctxt->disableSAX = 1;
1.67 daniel 4570: return;
4571: }
4572: SKIP_BLANKS;
1.22 daniel 4573:
4574: name = xmlParseName(ctxt);
4575: if (name == NULL) {
1.55 daniel 4576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4577: ctxt->sax->error(ctxt->userData,
4578: "NOTATION: Name expected here\n");
1.123 daniel 4579: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4580: ctxt->wellFormed = 0;
1.180 daniel 4581: ctxt->disableSAX = 1;
1.67 daniel 4582: return;
4583: }
4584: if (!IS_BLANK(CUR)) {
4585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4586: ctxt->sax->error(ctxt->userData,
1.67 daniel 4587: "Space required after the NOTATION name'\n");
1.123 daniel 4588: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4589: ctxt->wellFormed = 0;
1.180 daniel 4590: ctxt->disableSAX = 1;
1.22 daniel 4591: return;
4592: }
1.42 daniel 4593: SKIP_BLANKS;
1.67 daniel 4594:
1.22 daniel 4595: /*
1.67 daniel 4596: * Parse the IDs.
1.22 daniel 4597: */
1.160 daniel 4598: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4599: SKIP_BLANKS;
4600:
1.152 daniel 4601: if (RAW == '>') {
1.187 daniel 4602: if (input != ctxt->input) {
4603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4604: ctxt->sax->error(ctxt->userData,
4605: "Notation declaration doesn't start and stop in the same entity\n");
4606: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4607: ctxt->wellFormed = 0;
4608: ctxt->disableSAX = 1;
4609: }
1.40 daniel 4610: NEXT;
1.171 daniel 4611: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4612: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4613: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4614: } else {
4615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4616: ctxt->sax->error(ctxt->userData,
1.67 daniel 4617: "'>' required to close NOTATION declaration\n");
1.123 daniel 4618: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4619: ctxt->wellFormed = 0;
1.180 daniel 4620: ctxt->disableSAX = 1;
1.67 daniel 4621: }
1.119 daniel 4622: xmlFree(name);
4623: if (Systemid != NULL) xmlFree(Systemid);
4624: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4625: }
4626: }
4627:
1.50 daniel 4628: /**
4629: * xmlParseEntityDecl:
4630: * @ctxt: an XML parser context
4631: *
4632: * parse <!ENTITY declarations
1.22 daniel 4633: *
4634: * [70] EntityDecl ::= GEDecl | PEDecl
4635: *
4636: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4637: *
4638: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4639: *
4640: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4641: *
4642: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4643: *
4644: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4645: *
4646: * [ VC: Notation Declared ]
1.116 daniel 4647: * The Name must match the declared name of a notation.
1.22 daniel 4648: */
4649:
1.55 daniel 4650: void
4651: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4652: xmlChar *name = NULL;
4653: xmlChar *value = NULL;
4654: xmlChar *URI = NULL, *literal = NULL;
4655: xmlChar *ndata = NULL;
1.39 daniel 4656: int isParameter = 0;
1.123 daniel 4657: xmlChar *orig = NULL;
1.22 daniel 4658:
1.94 daniel 4659: GROW;
1.152 daniel 4660: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4661: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4662: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4663: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4664: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4665: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4666: SHRINK;
1.40 daniel 4667: SKIP(8);
1.59 daniel 4668: if (!IS_BLANK(CUR)) {
4669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4670: ctxt->sax->error(ctxt->userData,
4671: "Space required after '<!ENTITY'\n");
1.123 daniel 4672: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4673: ctxt->wellFormed = 0;
1.180 daniel 4674: ctxt->disableSAX = 1;
1.59 daniel 4675: }
4676: SKIP_BLANKS;
1.40 daniel 4677:
1.152 daniel 4678: if (RAW == '%') {
1.40 daniel 4679: NEXT;
1.59 daniel 4680: if (!IS_BLANK(CUR)) {
4681: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4682: ctxt->sax->error(ctxt->userData,
4683: "Space required after '%'\n");
1.123 daniel 4684: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4685: ctxt->wellFormed = 0;
1.180 daniel 4686: ctxt->disableSAX = 1;
1.59 daniel 4687: }
1.42 daniel 4688: SKIP_BLANKS;
1.39 daniel 4689: isParameter = 1;
1.22 daniel 4690: }
4691:
4692: name = xmlParseName(ctxt);
1.24 daniel 4693: if (name == NULL) {
1.55 daniel 4694: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4695: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4696: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4697: ctxt->wellFormed = 0;
1.180 daniel 4698: ctxt->disableSAX = 1;
1.24 daniel 4699: return;
4700: }
1.59 daniel 4701: if (!IS_BLANK(CUR)) {
4702: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4703: ctxt->sax->error(ctxt->userData,
1.59 daniel 4704: "Space required after the entity name\n");
1.123 daniel 4705: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4706: ctxt->wellFormed = 0;
1.180 daniel 4707: ctxt->disableSAX = 1;
1.59 daniel 4708: }
1.42 daniel 4709: SKIP_BLANKS;
1.24 daniel 4710:
1.22 daniel 4711: /*
1.68 daniel 4712: * handle the various case of definitions...
1.22 daniel 4713: */
1.39 daniel 4714: if (isParameter) {
1.152 daniel 4715: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4716: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4717: if (value) {
1.171 daniel 4718: if ((ctxt->sax != NULL) &&
4719: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4720: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4721: XML_INTERNAL_PARAMETER_ENTITY,
4722: NULL, NULL, value);
4723: }
1.24 daniel 4724: else {
1.67 daniel 4725: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4726: if ((URI == NULL) && (literal == NULL)) {
4727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4728: ctxt->sax->error(ctxt->userData,
4729: "Entity value required\n");
4730: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4731: ctxt->wellFormed = 0;
1.180 daniel 4732: ctxt->disableSAX = 1;
1.169 daniel 4733: }
1.39 daniel 4734: if (URI) {
1.171 daniel 4735: if ((ctxt->sax != NULL) &&
4736: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4737: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4738: XML_EXTERNAL_PARAMETER_ENTITY,
4739: literal, URI, NULL);
4740: }
1.24 daniel 4741: }
4742: } else {
1.152 daniel 4743: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4744: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4745: if ((ctxt->sax != NULL) &&
4746: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4747: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4748: XML_INTERNAL_GENERAL_ENTITY,
4749: NULL, NULL, value);
4750: } else {
1.67 daniel 4751: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4752: if ((URI == NULL) && (literal == NULL)) {
4753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4754: ctxt->sax->error(ctxt->userData,
4755: "Entity value required\n");
4756: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4757: ctxt->wellFormed = 0;
1.180 daniel 4758: ctxt->disableSAX = 1;
1.169 daniel 4759: }
1.152 daniel 4760: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4762: ctxt->sax->error(ctxt->userData,
1.59 daniel 4763: "Space required before 'NDATA'\n");
1.123 daniel 4764: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4765: ctxt->wellFormed = 0;
1.180 daniel 4766: ctxt->disableSAX = 1;
1.59 daniel 4767: }
1.42 daniel 4768: SKIP_BLANKS;
1.152 daniel 4769: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4770: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4771: (NXT(4) == 'A')) {
4772: SKIP(5);
1.59 daniel 4773: if (!IS_BLANK(CUR)) {
4774: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4775: ctxt->sax->error(ctxt->userData,
1.59 daniel 4776: "Space required after 'NDATA'\n");
1.123 daniel 4777: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4778: ctxt->wellFormed = 0;
1.180 daniel 4779: ctxt->disableSAX = 1;
1.59 daniel 4780: }
1.42 daniel 4781: SKIP_BLANKS;
1.24 daniel 4782: ndata = xmlParseName(ctxt);
1.171 daniel 4783: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4784: (ctxt->sax->unparsedEntityDecl != NULL))
4785: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4786: literal, URI, ndata);
4787: } else {
1.171 daniel 4788: if ((ctxt->sax != NULL) &&
4789: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4790: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4791: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4792: literal, URI, NULL);
1.24 daniel 4793: }
4794: }
4795: }
1.42 daniel 4796: SKIP_BLANKS;
1.152 daniel 4797: if (RAW != '>') {
1.55 daniel 4798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4799: ctxt->sax->error(ctxt->userData,
1.31 daniel 4800: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4801: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4802: ctxt->wellFormed = 0;
1.180 daniel 4803: ctxt->disableSAX = 1;
1.187 daniel 4804: } else {
4805: if (input != ctxt->input) {
4806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4807: ctxt->sax->error(ctxt->userData,
4808: "Entity declaration doesn't start and stop in the same entity\n");
4809: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4810: ctxt->wellFormed = 0;
4811: ctxt->disableSAX = 1;
4812: }
1.40 daniel 4813: NEXT;
1.187 daniel 4814: }
1.78 daniel 4815: if (orig != NULL) {
4816: /*
1.98 daniel 4817: * Ugly mechanism to save the raw entity value.
1.78 daniel 4818: */
4819: xmlEntityPtr cur = NULL;
4820:
1.98 daniel 4821: if (isParameter) {
4822: if ((ctxt->sax != NULL) &&
4823: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4824: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4825: } else {
4826: if ((ctxt->sax != NULL) &&
4827: (ctxt->sax->getEntity != NULL))
1.120 daniel 4828: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4829: }
4830: if (cur != NULL) {
4831: if (cur->orig != NULL)
1.119 daniel 4832: xmlFree(orig);
1.98 daniel 4833: else
4834: cur->orig = orig;
4835: } else
1.119 daniel 4836: xmlFree(orig);
1.78 daniel 4837: }
1.119 daniel 4838: if (name != NULL) xmlFree(name);
4839: if (value != NULL) xmlFree(value);
4840: if (URI != NULL) xmlFree(URI);
4841: if (literal != NULL) xmlFree(literal);
4842: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4843: }
4844: }
4845:
1.50 daniel 4846: /**
1.59 daniel 4847: * xmlParseDefaultDecl:
4848: * @ctxt: an XML parser context
4849: * @value: Receive a possible fixed default value for the attribute
4850: *
4851: * Parse an attribute default declaration
4852: *
4853: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4854: *
1.99 daniel 4855: * [ VC: Required Attribute ]
1.117 daniel 4856: * if the default declaration is the keyword #REQUIRED, then the
4857: * attribute must be specified for all elements of the type in the
4858: * attribute-list declaration.
1.99 daniel 4859: *
4860: * [ VC: Attribute Default Legal ]
1.102 daniel 4861: * The declared default value must meet the lexical constraints of
4862: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4863: *
4864: * [ VC: Fixed Attribute Default ]
1.117 daniel 4865: * if an attribute has a default value declared with the #FIXED
4866: * keyword, instances of that attribute must match the default value.
1.99 daniel 4867: *
4868: * [ WFC: No < in Attribute Values ]
4869: * handled in xmlParseAttValue()
4870: *
1.59 daniel 4871: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4872: * or XML_ATTRIBUTE_FIXED.
4873: */
4874:
4875: int
1.123 daniel 4876: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4877: int val;
1.123 daniel 4878: xmlChar *ret;
1.59 daniel 4879:
4880: *value = NULL;
1.152 daniel 4881: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4882: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4883: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4884: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4885: (NXT(8) == 'D')) {
4886: SKIP(9);
4887: return(XML_ATTRIBUTE_REQUIRED);
4888: }
1.152 daniel 4889: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4890: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4891: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4892: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4893: SKIP(8);
4894: return(XML_ATTRIBUTE_IMPLIED);
4895: }
4896: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4897: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4898: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4899: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4900: SKIP(6);
4901: val = XML_ATTRIBUTE_FIXED;
4902: if (!IS_BLANK(CUR)) {
4903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4904: ctxt->sax->error(ctxt->userData,
4905: "Space required after '#FIXED'\n");
1.123 daniel 4906: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4907: ctxt->wellFormed = 0;
1.180 daniel 4908: ctxt->disableSAX = 1;
1.59 daniel 4909: }
4910: SKIP_BLANKS;
4911: }
4912: ret = xmlParseAttValue(ctxt);
1.96 daniel 4913: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4914: if (ret == NULL) {
4915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4916: ctxt->sax->error(ctxt->userData,
1.59 daniel 4917: "Attribute default value declaration error\n");
4918: ctxt->wellFormed = 0;
1.180 daniel 4919: ctxt->disableSAX = 1;
1.59 daniel 4920: } else
4921: *value = ret;
4922: return(val);
4923: }
4924:
4925: /**
1.66 daniel 4926: * xmlParseNotationType:
4927: * @ctxt: an XML parser context
4928: *
4929: * parse an Notation attribute type.
4930: *
1.99 daniel 4931: * Note: the leading 'NOTATION' S part has already being parsed...
4932: *
1.66 daniel 4933: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4934: *
1.99 daniel 4935: * [ VC: Notation Attributes ]
1.117 daniel 4936: * Values of this type must match one of the notation names included
1.99 daniel 4937: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4938: *
4939: * Returns: the notation attribute tree built while parsing
4940: */
4941:
4942: xmlEnumerationPtr
4943: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4944: xmlChar *name;
1.66 daniel 4945: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4946:
1.152 daniel 4947: if (RAW != '(') {
1.66 daniel 4948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4949: ctxt->sax->error(ctxt->userData,
4950: "'(' required to start 'NOTATION'\n");
1.123 daniel 4951: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4952: ctxt->wellFormed = 0;
1.180 daniel 4953: ctxt->disableSAX = 1;
1.66 daniel 4954: return(NULL);
4955: }
1.91 daniel 4956: SHRINK;
1.66 daniel 4957: do {
4958: NEXT;
4959: SKIP_BLANKS;
4960: name = xmlParseName(ctxt);
4961: if (name == NULL) {
4962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4963: ctxt->sax->error(ctxt->userData,
1.66 daniel 4964: "Name expected in NOTATION declaration\n");
1.123 daniel 4965: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4966: ctxt->wellFormed = 0;
1.180 daniel 4967: ctxt->disableSAX = 1;
1.66 daniel 4968: return(ret);
4969: }
4970: cur = xmlCreateEnumeration(name);
1.119 daniel 4971: xmlFree(name);
1.66 daniel 4972: if (cur == NULL) return(ret);
4973: if (last == NULL) ret = last = cur;
4974: else {
4975: last->next = cur;
4976: last = cur;
4977: }
4978: SKIP_BLANKS;
1.152 daniel 4979: } while (RAW == '|');
4980: if (RAW != ')') {
1.66 daniel 4981: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4982: ctxt->sax->error(ctxt->userData,
1.66 daniel 4983: "')' required to finish NOTATION declaration\n");
1.123 daniel 4984: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4985: ctxt->wellFormed = 0;
1.180 daniel 4986: ctxt->disableSAX = 1;
1.170 daniel 4987: if ((last != NULL) && (last != ret))
4988: xmlFreeEnumeration(last);
1.66 daniel 4989: return(ret);
4990: }
4991: NEXT;
4992: return(ret);
4993: }
4994:
4995: /**
4996: * xmlParseEnumerationType:
4997: * @ctxt: an XML parser context
4998: *
4999: * parse an Enumeration attribute type.
5000: *
5001: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5002: *
1.99 daniel 5003: * [ VC: Enumeration ]
1.117 daniel 5004: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5005: * the declaration
5006: *
1.66 daniel 5007: * Returns: the enumeration attribute tree built while parsing
5008: */
5009:
5010: xmlEnumerationPtr
5011: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5012: xmlChar *name;
1.66 daniel 5013: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5014:
1.152 daniel 5015: if (RAW != '(') {
1.66 daniel 5016: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5017: ctxt->sax->error(ctxt->userData,
1.66 daniel 5018: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5019: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5020: ctxt->wellFormed = 0;
1.180 daniel 5021: ctxt->disableSAX = 1;
1.66 daniel 5022: return(NULL);
5023: }
1.91 daniel 5024: SHRINK;
1.66 daniel 5025: do {
5026: NEXT;
5027: SKIP_BLANKS;
5028: name = xmlParseNmtoken(ctxt);
5029: if (name == NULL) {
5030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5031: ctxt->sax->error(ctxt->userData,
1.66 daniel 5032: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5033: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5034: ctxt->wellFormed = 0;
1.180 daniel 5035: ctxt->disableSAX = 1;
1.66 daniel 5036: return(ret);
5037: }
5038: cur = xmlCreateEnumeration(name);
1.119 daniel 5039: xmlFree(name);
1.66 daniel 5040: if (cur == NULL) return(ret);
5041: if (last == NULL) ret = last = cur;
5042: else {
5043: last->next = cur;
5044: last = cur;
5045: }
5046: SKIP_BLANKS;
1.152 daniel 5047: } while (RAW == '|');
5048: if (RAW != ')') {
1.66 daniel 5049: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5050: ctxt->sax->error(ctxt->userData,
1.66 daniel 5051: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5052: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5053: ctxt->wellFormed = 0;
1.180 daniel 5054: ctxt->disableSAX = 1;
1.66 daniel 5055: return(ret);
5056: }
5057: NEXT;
5058: return(ret);
5059: }
5060:
5061: /**
1.50 daniel 5062: * xmlParseEnumeratedType:
5063: * @ctxt: an XML parser context
1.66 daniel 5064: * @tree: the enumeration tree built while parsing
1.50 daniel 5065: *
1.66 daniel 5066: * parse an Enumerated attribute type.
1.22 daniel 5067: *
5068: * [57] EnumeratedType ::= NotationType | Enumeration
5069: *
5070: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5071: *
1.50 daniel 5072: *
1.66 daniel 5073: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5074: */
5075:
1.66 daniel 5076: int
5077: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5078: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5079: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5080: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5081: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5082: SKIP(8);
5083: if (!IS_BLANK(CUR)) {
5084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5085: ctxt->sax->error(ctxt->userData,
5086: "Space required after 'NOTATION'\n");
1.123 daniel 5087: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5088: ctxt->wellFormed = 0;
1.180 daniel 5089: ctxt->disableSAX = 1;
1.66 daniel 5090: return(0);
5091: }
5092: SKIP_BLANKS;
5093: *tree = xmlParseNotationType(ctxt);
5094: if (*tree == NULL) return(0);
5095: return(XML_ATTRIBUTE_NOTATION);
5096: }
5097: *tree = xmlParseEnumerationType(ctxt);
5098: if (*tree == NULL) return(0);
5099: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5100: }
5101:
1.50 daniel 5102: /**
5103: * xmlParseAttributeType:
5104: * @ctxt: an XML parser context
1.66 daniel 5105: * @tree: the enumeration tree built while parsing
1.50 daniel 5106: *
1.59 daniel 5107: * parse the Attribute list def for an element
1.22 daniel 5108: *
5109: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5110: *
5111: * [55] StringType ::= 'CDATA'
5112: *
5113: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5114: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5115: *
1.102 daniel 5116: * Validity constraints for attribute values syntax are checked in
5117: * xmlValidateAttributeValue()
5118: *
1.99 daniel 5119: * [ VC: ID ]
1.117 daniel 5120: * Values of type ID must match the Name production. A name must not
1.99 daniel 5121: * appear more than once in an XML document as a value of this type;
5122: * i.e., ID values must uniquely identify the elements which bear them.
5123: *
5124: * [ VC: One ID per Element Type ]
1.117 daniel 5125: * No element type may have more than one ID attribute specified.
1.99 daniel 5126: *
5127: * [ VC: ID Attribute Default ]
1.117 daniel 5128: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5129: *
5130: * [ VC: IDREF ]
1.102 daniel 5131: * Values of type IDREF must match the Name production, and values
1.140 daniel 5132: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5133: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5134: * values must match the value of some ID attribute.
5135: *
5136: * [ VC: Entity Name ]
1.102 daniel 5137: * Values of type ENTITY must match the Name production, values
1.140 daniel 5138: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5139: * name of an unparsed entity declared in the DTD.
1.99 daniel 5140: *
5141: * [ VC: Name Token ]
1.102 daniel 5142: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5143: * of type NMTOKENS must match Nmtokens.
5144: *
1.69 daniel 5145: * Returns the attribute type
1.22 daniel 5146: */
1.59 daniel 5147: int
1.66 daniel 5148: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5149: SHRINK;
1.152 daniel 5150: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5151: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5152: (NXT(4) == 'A')) {
5153: SKIP(5);
1.66 daniel 5154: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5155: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5156: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5157: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5158: SKIP(6);
5159: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5160: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5161: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5162: (NXT(4) == 'F')) {
5163: SKIP(5);
1.59 daniel 5164: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5165: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5166: SKIP(2);
5167: return(XML_ATTRIBUTE_ID);
1.152 daniel 5168: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5169: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5170: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5171: SKIP(6);
1.59 daniel 5172: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5173: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5174: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5175: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5176: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5177: SKIP(8);
1.59 daniel 5178: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5179: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5180: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5181: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5182: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5183: SKIP(8);
5184: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5185: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5186: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5187: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5188: (NXT(6) == 'N')) {
5189: SKIP(7);
1.59 daniel 5190: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5191: }
1.66 daniel 5192: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5193: }
5194:
1.50 daniel 5195: /**
5196: * xmlParseAttributeListDecl:
5197: * @ctxt: an XML parser context
5198: *
5199: * : parse the Attribute list def for an element
1.22 daniel 5200: *
5201: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5202: *
5203: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5204: *
1.22 daniel 5205: */
1.55 daniel 5206: void
5207: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5208: xmlChar *elemName;
5209: xmlChar *attrName;
1.103 daniel 5210: xmlEnumerationPtr tree;
1.22 daniel 5211:
1.152 daniel 5212: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5213: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5214: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5215: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5216: (NXT(8) == 'T')) {
1.187 daniel 5217: xmlParserInputPtr input = ctxt->input;
5218:
1.40 daniel 5219: SKIP(9);
1.59 daniel 5220: if (!IS_BLANK(CUR)) {
5221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5222: ctxt->sax->error(ctxt->userData,
5223: "Space required after '<!ATTLIST'\n");
1.123 daniel 5224: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5225: ctxt->wellFormed = 0;
1.180 daniel 5226: ctxt->disableSAX = 1;
1.59 daniel 5227: }
1.42 daniel 5228: SKIP_BLANKS;
1.59 daniel 5229: elemName = xmlParseName(ctxt);
5230: if (elemName == NULL) {
1.55 daniel 5231: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5232: ctxt->sax->error(ctxt->userData,
5233: "ATTLIST: no name for Element\n");
1.123 daniel 5234: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5235: ctxt->wellFormed = 0;
1.180 daniel 5236: ctxt->disableSAX = 1;
1.22 daniel 5237: return;
5238: }
1.42 daniel 5239: SKIP_BLANKS;
1.152 daniel 5240: while (RAW != '>') {
1.123 daniel 5241: const xmlChar *check = CUR_PTR;
1.59 daniel 5242: int type;
5243: int def;
1.123 daniel 5244: xmlChar *defaultValue = NULL;
1.59 daniel 5245:
1.103 daniel 5246: tree = NULL;
1.59 daniel 5247: attrName = xmlParseName(ctxt);
5248: if (attrName == NULL) {
5249: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5250: ctxt->sax->error(ctxt->userData,
5251: "ATTLIST: no name for Attribute\n");
1.123 daniel 5252: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5253: ctxt->wellFormed = 0;
1.180 daniel 5254: ctxt->disableSAX = 1;
1.59 daniel 5255: break;
5256: }
1.97 daniel 5257: GROW;
1.59 daniel 5258: if (!IS_BLANK(CUR)) {
5259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5260: ctxt->sax->error(ctxt->userData,
1.59 daniel 5261: "Space required after the attribute name\n");
1.123 daniel 5262: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5263: ctxt->wellFormed = 0;
1.180 daniel 5264: ctxt->disableSAX = 1;
1.170 daniel 5265: if (attrName != NULL)
5266: xmlFree(attrName);
5267: if (defaultValue != NULL)
5268: xmlFree(defaultValue);
1.59 daniel 5269: break;
5270: }
5271: SKIP_BLANKS;
5272:
1.66 daniel 5273: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5274: if (type <= 0) {
5275: if (attrName != NULL)
5276: xmlFree(attrName);
5277: if (defaultValue != NULL)
5278: xmlFree(defaultValue);
5279: break;
5280: }
1.22 daniel 5281:
1.97 daniel 5282: GROW;
1.59 daniel 5283: if (!IS_BLANK(CUR)) {
5284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5285: ctxt->sax->error(ctxt->userData,
1.59 daniel 5286: "Space required after the attribute type\n");
1.123 daniel 5287: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5288: ctxt->wellFormed = 0;
1.180 daniel 5289: ctxt->disableSAX = 1;
1.170 daniel 5290: if (attrName != NULL)
5291: xmlFree(attrName);
5292: if (defaultValue != NULL)
5293: xmlFree(defaultValue);
5294: if (tree != NULL)
5295: xmlFreeEnumeration(tree);
1.59 daniel 5296: break;
5297: }
1.42 daniel 5298: SKIP_BLANKS;
1.59 daniel 5299:
5300: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5301: if (def <= 0) {
5302: if (attrName != NULL)
5303: xmlFree(attrName);
5304: if (defaultValue != NULL)
5305: xmlFree(defaultValue);
5306: if (tree != NULL)
5307: xmlFreeEnumeration(tree);
5308: break;
5309: }
1.59 daniel 5310:
1.97 daniel 5311: GROW;
1.152 daniel 5312: if (RAW != '>') {
1.59 daniel 5313: if (!IS_BLANK(CUR)) {
5314: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5315: ctxt->sax->error(ctxt->userData,
1.59 daniel 5316: "Space required after the attribute default value\n");
1.123 daniel 5317: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5318: ctxt->wellFormed = 0;
1.180 daniel 5319: ctxt->disableSAX = 1;
1.170 daniel 5320: if (attrName != NULL)
5321: xmlFree(attrName);
5322: if (defaultValue != NULL)
5323: xmlFree(defaultValue);
5324: if (tree != NULL)
5325: xmlFreeEnumeration(tree);
1.59 daniel 5326: break;
5327: }
5328: SKIP_BLANKS;
5329: }
1.40 daniel 5330: if (check == CUR_PTR) {
1.55 daniel 5331: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5332: ctxt->sax->error(ctxt->userData,
1.59 daniel 5333: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5334: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5335: if (attrName != NULL)
5336: xmlFree(attrName);
5337: if (defaultValue != NULL)
5338: xmlFree(defaultValue);
5339: if (tree != NULL)
5340: xmlFreeEnumeration(tree);
1.22 daniel 5341: break;
5342: }
1.171 daniel 5343: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5344: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5345: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5346: type, def, defaultValue, tree);
1.59 daniel 5347: if (attrName != NULL)
1.119 daniel 5348: xmlFree(attrName);
1.59 daniel 5349: if (defaultValue != NULL)
1.119 daniel 5350: xmlFree(defaultValue);
1.97 daniel 5351: GROW;
1.22 daniel 5352: }
1.187 daniel 5353: if (RAW == '>') {
5354: if (input != ctxt->input) {
5355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5356: ctxt->sax->error(ctxt->userData,
5357: "Attribute list declaration doesn't start and stop in the same entity\n");
5358: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5359: ctxt->wellFormed = 0;
5360: ctxt->disableSAX = 1;
5361: }
1.40 daniel 5362: NEXT;
1.187 daniel 5363: }
1.22 daniel 5364:
1.119 daniel 5365: xmlFree(elemName);
1.22 daniel 5366: }
5367: }
5368:
1.50 daniel 5369: /**
1.61 daniel 5370: * xmlParseElementMixedContentDecl:
5371: * @ctxt: an XML parser context
5372: *
5373: * parse the declaration for a Mixed Element content
5374: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5375: *
5376: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5377: * '(' S? '#PCDATA' S? ')'
5378: *
1.99 daniel 5379: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5380: *
5381: * [ VC: No Duplicate Types ]
1.117 daniel 5382: * The same name must not appear more than once in a single
5383: * mixed-content declaration.
1.99 daniel 5384: *
1.61 daniel 5385: * returns: the list of the xmlElementContentPtr describing the element choices
5386: */
5387: xmlElementContentPtr
1.62 daniel 5388: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5389: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5390: xmlChar *elem = NULL;
1.61 daniel 5391:
1.97 daniel 5392: GROW;
1.152 daniel 5393: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5394: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5395: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5396: (NXT(6) == 'A')) {
5397: SKIP(7);
5398: SKIP_BLANKS;
1.91 daniel 5399: SHRINK;
1.152 daniel 5400: if (RAW == ')') {
1.187 daniel 5401: ctxt->entity = ctxt->input;
1.63 daniel 5402: NEXT;
5403: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5404: if (RAW == '*') {
1.136 daniel 5405: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5406: NEXT;
5407: }
1.63 daniel 5408: return(ret);
5409: }
1.152 daniel 5410: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5411: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5412: if (ret == NULL) return(NULL);
1.99 daniel 5413: }
1.152 daniel 5414: while (RAW == '|') {
1.64 daniel 5415: NEXT;
1.61 daniel 5416: if (elem == NULL) {
5417: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5418: if (ret == NULL) return(NULL);
5419: ret->c1 = cur;
1.64 daniel 5420: cur = ret;
1.61 daniel 5421: } else {
1.64 daniel 5422: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5423: if (n == NULL) return(NULL);
5424: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5425: cur->c2 = n;
5426: cur = n;
1.119 daniel 5427: xmlFree(elem);
1.61 daniel 5428: }
5429: SKIP_BLANKS;
5430: elem = xmlParseName(ctxt);
5431: if (elem == NULL) {
5432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5433: ctxt->sax->error(ctxt->userData,
1.61 daniel 5434: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5435: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5436: ctxt->wellFormed = 0;
1.180 daniel 5437: ctxt->disableSAX = 1;
1.61 daniel 5438: xmlFreeElementContent(cur);
5439: return(NULL);
5440: }
5441: SKIP_BLANKS;
1.97 daniel 5442: GROW;
1.61 daniel 5443: }
1.152 daniel 5444: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5445: if (elem != NULL) {
1.61 daniel 5446: cur->c2 = xmlNewElementContent(elem,
5447: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5448: xmlFree(elem);
1.66 daniel 5449: }
1.65 daniel 5450: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5451: ctxt->entity = ctxt->input;
1.64 daniel 5452: SKIP(2);
1.61 daniel 5453: } else {
1.119 daniel 5454: if (elem != NULL) xmlFree(elem);
1.61 daniel 5455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5456: ctxt->sax->error(ctxt->userData,
1.63 daniel 5457: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5458: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5459: ctxt->wellFormed = 0;
1.180 daniel 5460: ctxt->disableSAX = 1;
1.61 daniel 5461: xmlFreeElementContent(ret);
5462: return(NULL);
5463: }
5464:
5465: } else {
5466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5467: ctxt->sax->error(ctxt->userData,
1.61 daniel 5468: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5469: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5470: ctxt->wellFormed = 0;
1.180 daniel 5471: ctxt->disableSAX = 1;
1.61 daniel 5472: }
5473: return(ret);
5474: }
5475:
5476: /**
5477: * xmlParseElementChildrenContentDecl:
1.50 daniel 5478: * @ctxt: an XML parser context
5479: *
1.61 daniel 5480: * parse the declaration for a Mixed Element content
5481: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5482: *
1.61 daniel 5483: *
1.22 daniel 5484: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5485: *
5486: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5487: *
5488: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5489: *
5490: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5491: *
1.99 daniel 5492: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5493: * TODO Parameter-entity replacement text must be properly nested
5494: * with parenthetized groups. That is to say, if either of the
5495: * opening or closing parentheses in a choice, seq, or Mixed
5496: * construct is contained in the replacement text for a parameter
5497: * entity, both must be contained in the same replacement text. For
5498: * interoperability, if a parameter-entity reference appears in a
5499: * choice, seq, or Mixed construct, its replacement text should not
5500: * be empty, and neither the first nor last non-blank character of
5501: * the replacement text should be a connector (| or ,).
5502: *
1.62 daniel 5503: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5504: * hierarchy.
5505: */
5506: xmlElementContentPtr
1.62 daniel 5507: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5508: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5509: xmlChar *elem;
5510: xmlChar type = 0;
1.62 daniel 5511:
5512: SKIP_BLANKS;
1.94 daniel 5513: GROW;
1.152 daniel 5514: if (RAW == '(') {
1.63 daniel 5515: /* Recurse on first child */
1.62 daniel 5516: NEXT;
5517: SKIP_BLANKS;
5518: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5519: SKIP_BLANKS;
1.101 daniel 5520: GROW;
1.62 daniel 5521: } else {
5522: elem = xmlParseName(ctxt);
5523: if (elem == NULL) {
5524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5525: ctxt->sax->error(ctxt->userData,
1.62 daniel 5526: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5527: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5528: ctxt->wellFormed = 0;
1.180 daniel 5529: ctxt->disableSAX = 1;
1.62 daniel 5530: return(NULL);
5531: }
5532: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5533: GROW;
1.152 daniel 5534: if (RAW == '?') {
1.104 daniel 5535: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5536: NEXT;
1.152 daniel 5537: } else if (RAW == '*') {
1.104 daniel 5538: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5539: NEXT;
1.152 daniel 5540: } else if (RAW == '+') {
1.104 daniel 5541: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5542: NEXT;
5543: } else {
1.104 daniel 5544: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5545: }
1.119 daniel 5546: xmlFree(elem);
1.101 daniel 5547: GROW;
1.62 daniel 5548: }
5549: SKIP_BLANKS;
1.91 daniel 5550: SHRINK;
1.152 daniel 5551: while (RAW != ')') {
1.63 daniel 5552: /*
5553: * Each loop we parse one separator and one element.
5554: */
1.152 daniel 5555: if (RAW == ',') {
1.62 daniel 5556: if (type == 0) type = CUR;
5557:
5558: /*
5559: * Detect "Name | Name , Name" error
5560: */
5561: else if (type != CUR) {
5562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5563: ctxt->sax->error(ctxt->userData,
1.62 daniel 5564: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5565: type);
1.123 daniel 5566: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5567: ctxt->wellFormed = 0;
1.180 daniel 5568: ctxt->disableSAX = 1;
1.170 daniel 5569: if ((op != NULL) && (op != ret))
5570: xmlFreeElementContent(op);
5571: if ((last != NULL) && (last != ret))
5572: xmlFreeElementContent(last);
5573: if (ret != NULL)
5574: xmlFreeElementContent(ret);
1.62 daniel 5575: return(NULL);
5576: }
1.64 daniel 5577: NEXT;
1.62 daniel 5578:
1.63 daniel 5579: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5580: if (op == NULL) {
5581: xmlFreeElementContent(ret);
5582: return(NULL);
5583: }
5584: if (last == NULL) {
5585: op->c1 = ret;
1.65 daniel 5586: ret = cur = op;
1.63 daniel 5587: } else {
5588: cur->c2 = op;
5589: op->c1 = last;
5590: cur =op;
1.65 daniel 5591: last = NULL;
1.63 daniel 5592: }
1.152 daniel 5593: } else if (RAW == '|') {
1.62 daniel 5594: if (type == 0) type = CUR;
5595:
5596: /*
1.63 daniel 5597: * Detect "Name , Name | Name" error
1.62 daniel 5598: */
5599: else if (type != CUR) {
5600: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5601: ctxt->sax->error(ctxt->userData,
1.62 daniel 5602: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5603: type);
1.123 daniel 5604: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5605: ctxt->wellFormed = 0;
1.180 daniel 5606: ctxt->disableSAX = 1;
1.170 daniel 5607: if ((op != NULL) && (op != ret))
5608: xmlFreeElementContent(op);
5609: if ((last != NULL) && (last != ret))
5610: xmlFreeElementContent(last);
5611: if (ret != NULL)
5612: xmlFreeElementContent(ret);
1.62 daniel 5613: return(NULL);
5614: }
1.64 daniel 5615: NEXT;
1.62 daniel 5616:
1.63 daniel 5617: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5618: if (op == NULL) {
1.170 daniel 5619: if ((op != NULL) && (op != ret))
5620: xmlFreeElementContent(op);
5621: if ((last != NULL) && (last != ret))
5622: xmlFreeElementContent(last);
5623: if (ret != NULL)
5624: xmlFreeElementContent(ret);
1.63 daniel 5625: return(NULL);
5626: }
5627: if (last == NULL) {
5628: op->c1 = ret;
1.65 daniel 5629: ret = cur = op;
1.63 daniel 5630: } else {
5631: cur->c2 = op;
5632: op->c1 = last;
5633: cur =op;
1.65 daniel 5634: last = NULL;
1.63 daniel 5635: }
1.62 daniel 5636: } else {
5637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5638: ctxt->sax->error(ctxt->userData,
1.62 daniel 5639: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5640: ctxt->wellFormed = 0;
1.180 daniel 5641: ctxt->disableSAX = 1;
1.123 daniel 5642: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5643: if ((op != NULL) && (op != ret))
5644: xmlFreeElementContent(op);
5645: if ((last != NULL) && (last != ret))
5646: xmlFreeElementContent(last);
5647: if (ret != NULL)
5648: xmlFreeElementContent(ret);
1.62 daniel 5649: return(NULL);
5650: }
1.101 daniel 5651: GROW;
1.62 daniel 5652: SKIP_BLANKS;
1.101 daniel 5653: GROW;
1.152 daniel 5654: if (RAW == '(') {
1.63 daniel 5655: /* Recurse on second child */
1.62 daniel 5656: NEXT;
5657: SKIP_BLANKS;
1.65 daniel 5658: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5659: SKIP_BLANKS;
5660: } else {
5661: elem = xmlParseName(ctxt);
5662: if (elem == NULL) {
5663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5664: ctxt->sax->error(ctxt->userData,
1.122 daniel 5665: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5666: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5667: ctxt->wellFormed = 0;
1.180 daniel 5668: ctxt->disableSAX = 1;
1.170 daniel 5669: if ((op != NULL) && (op != ret))
5670: xmlFreeElementContent(op);
5671: if ((last != NULL) && (last != ret))
5672: xmlFreeElementContent(last);
5673: if (ret != NULL)
5674: xmlFreeElementContent(ret);
1.62 daniel 5675: return(NULL);
5676: }
1.65 daniel 5677: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5678: xmlFree(elem);
1.152 daniel 5679: if (RAW == '?') {
1.105 daniel 5680: last->ocur = XML_ELEMENT_CONTENT_OPT;
5681: NEXT;
1.152 daniel 5682: } else if (RAW == '*') {
1.105 daniel 5683: last->ocur = XML_ELEMENT_CONTENT_MULT;
5684: NEXT;
1.152 daniel 5685: } else if (RAW == '+') {
1.105 daniel 5686: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5687: NEXT;
5688: } else {
5689: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5690: }
1.63 daniel 5691: }
5692: SKIP_BLANKS;
1.97 daniel 5693: GROW;
1.64 daniel 5694: }
1.65 daniel 5695: if ((cur != NULL) && (last != NULL)) {
5696: cur->c2 = last;
1.62 daniel 5697: }
1.187 daniel 5698: ctxt->entity = ctxt->input;
1.62 daniel 5699: NEXT;
1.152 daniel 5700: if (RAW == '?') {
1.62 daniel 5701: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5702: NEXT;
1.152 daniel 5703: } else if (RAW == '*') {
1.62 daniel 5704: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5705: NEXT;
1.152 daniel 5706: } else if (RAW == '+') {
1.62 daniel 5707: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5708: NEXT;
5709: }
5710: return(ret);
1.61 daniel 5711: }
5712:
5713: /**
5714: * xmlParseElementContentDecl:
5715: * @ctxt: an XML parser context
5716: * @name: the name of the element being defined.
5717: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5718: *
1.61 daniel 5719: * parse the declaration for an Element content either Mixed or Children,
5720: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5721: *
5722: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5723: *
1.61 daniel 5724: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5725: */
5726:
1.61 daniel 5727: int
1.123 daniel 5728: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5729: xmlElementContentPtr *result) {
5730:
5731: xmlElementContentPtr tree = NULL;
1.187 daniel 5732: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5733: int res;
5734:
5735: *result = NULL;
5736:
1.152 daniel 5737: if (RAW != '(') {
1.61 daniel 5738: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5739: ctxt->sax->error(ctxt->userData,
1.61 daniel 5740: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5741: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5742: ctxt->wellFormed = 0;
1.180 daniel 5743: ctxt->disableSAX = 1;
1.61 daniel 5744: return(-1);
5745: }
5746: NEXT;
1.97 daniel 5747: GROW;
1.61 daniel 5748: SKIP_BLANKS;
1.152 daniel 5749: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5750: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5751: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5752: (NXT(6) == 'A')) {
1.62 daniel 5753: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5754: res = XML_ELEMENT_TYPE_MIXED;
5755: } else {
1.62 daniel 5756: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5757: res = XML_ELEMENT_TYPE_ELEMENT;
5758: }
1.187 daniel 5759: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5761: ctxt->sax->error(ctxt->userData,
5762: "Element content declaration doesn't start and stop in the same entity\n");
5763: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5764: ctxt->wellFormed = 0;
5765: ctxt->disableSAX = 1;
5766: }
1.61 daniel 5767: SKIP_BLANKS;
1.63 daniel 5768: /****************************
1.152 daniel 5769: if (RAW != ')') {
1.61 daniel 5770: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5771: ctxt->sax->error(ctxt->userData,
1.61 daniel 5772: "xmlParseElementContentDecl : ')' expected\n");
5773: ctxt->wellFormed = 0;
1.180 daniel 5774: ctxt->disableSAX = 1;
1.61 daniel 5775: return(-1);
5776: }
1.63 daniel 5777: ****************************/
5778: *result = tree;
1.61 daniel 5779: return(res);
1.22 daniel 5780: }
5781:
1.50 daniel 5782: /**
5783: * xmlParseElementDecl:
5784: * @ctxt: an XML parser context
5785: *
5786: * parse an Element declaration.
1.22 daniel 5787: *
5788: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5789: *
1.99 daniel 5790: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5791: * No element type may be declared more than once
1.69 daniel 5792: *
5793: * Returns the type of the element, or -1 in case of error
1.22 daniel 5794: */
1.59 daniel 5795: int
1.55 daniel 5796: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5797: xmlChar *name;
1.59 daniel 5798: int ret = -1;
1.61 daniel 5799: xmlElementContentPtr content = NULL;
1.22 daniel 5800:
1.97 daniel 5801: GROW;
1.152 daniel 5802: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5803: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5804: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5805: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5806: (NXT(8) == 'T')) {
1.187 daniel 5807: xmlParserInputPtr input = ctxt->input;
5808:
1.40 daniel 5809: SKIP(9);
1.59 daniel 5810: if (!IS_BLANK(CUR)) {
5811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5812: ctxt->sax->error(ctxt->userData,
1.59 daniel 5813: "Space required after 'ELEMENT'\n");
1.123 daniel 5814: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5815: ctxt->wellFormed = 0;
1.180 daniel 5816: ctxt->disableSAX = 1;
1.59 daniel 5817: }
1.42 daniel 5818: SKIP_BLANKS;
1.22 daniel 5819: name = xmlParseName(ctxt);
5820: if (name == NULL) {
1.55 daniel 5821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5822: ctxt->sax->error(ctxt->userData,
1.59 daniel 5823: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5824: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5825: ctxt->wellFormed = 0;
1.180 daniel 5826: ctxt->disableSAX = 1;
1.59 daniel 5827: return(-1);
5828: }
5829: if (!IS_BLANK(CUR)) {
5830: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5831: ctxt->sax->error(ctxt->userData,
1.59 daniel 5832: "Space required after the element name\n");
1.123 daniel 5833: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5834: ctxt->wellFormed = 0;
1.180 daniel 5835: ctxt->disableSAX = 1;
1.22 daniel 5836: }
1.42 daniel 5837: SKIP_BLANKS;
1.152 daniel 5838: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5839: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5840: (NXT(4) == 'Y')) {
5841: SKIP(5);
1.22 daniel 5842: /*
5843: * Element must always be empty.
5844: */
1.59 daniel 5845: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5846: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5847: (NXT(2) == 'Y')) {
5848: SKIP(3);
1.22 daniel 5849: /*
5850: * Element is a generic container.
5851: */
1.59 daniel 5852: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5853: } else if (RAW == '(') {
1.61 daniel 5854: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5855: } else {
1.98 daniel 5856: /*
5857: * [ WFC: PEs in Internal Subset ] error handling.
5858: */
1.152 daniel 5859: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5860: (ctxt->inputNr == 1)) {
5861: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5862: ctxt->sax->error(ctxt->userData,
5863: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5864: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5865: } else {
5866: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5867: ctxt->sax->error(ctxt->userData,
5868: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5869: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5870: }
1.61 daniel 5871: ctxt->wellFormed = 0;
1.180 daniel 5872: ctxt->disableSAX = 1;
1.119 daniel 5873: if (name != NULL) xmlFree(name);
1.61 daniel 5874: return(-1);
1.22 daniel 5875: }
1.142 daniel 5876:
5877: SKIP_BLANKS;
5878: /*
5879: * Pop-up of finished entities.
5880: */
1.152 daniel 5881: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5882: xmlPopInput(ctxt);
1.42 daniel 5883: SKIP_BLANKS;
1.142 daniel 5884:
1.152 daniel 5885: if (RAW != '>') {
1.55 daniel 5886: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5887: ctxt->sax->error(ctxt->userData,
1.31 daniel 5888: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5889: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5890: ctxt->wellFormed = 0;
1.180 daniel 5891: ctxt->disableSAX = 1;
1.61 daniel 5892: } else {
1.187 daniel 5893: if (input != ctxt->input) {
5894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5895: ctxt->sax->error(ctxt->userData,
5896: "Element declaration doesn't start and stop in the same entity\n");
5897: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5898: ctxt->wellFormed = 0;
5899: ctxt->disableSAX = 1;
5900: }
5901:
1.40 daniel 5902: NEXT;
1.171 daniel 5903: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5904: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5905: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5906: content);
1.61 daniel 5907: }
1.84 daniel 5908: if (content != NULL) {
5909: xmlFreeElementContent(content);
5910: }
1.61 daniel 5911: if (name != NULL) {
1.119 daniel 5912: xmlFree(name);
1.61 daniel 5913: }
1.22 daniel 5914: }
1.59 daniel 5915: return(ret);
1.22 daniel 5916: }
5917:
1.50 daniel 5918: /**
5919: * xmlParseMarkupDecl:
5920: * @ctxt: an XML parser context
5921: *
5922: * parse Markup declarations
1.22 daniel 5923: *
5924: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5925: * NotationDecl | PI | Comment
5926: *
1.98 daniel 5927: * [ VC: Proper Declaration/PE Nesting ]
5928: * TODO Parameter-entity replacement text must be properly nested with
5929: * markup declarations. That is to say, if either the first character
5930: * or the last character of a markup declaration (markupdecl above) is
5931: * contained in the replacement text for a parameter-entity reference,
5932: * both must be contained in the same replacement text.
5933: *
5934: * [ WFC: PEs in Internal Subset ]
5935: * In the internal DTD subset, parameter-entity references can occur
5936: * only where markup declarations can occur, not within markup declarations.
5937: * (This does not apply to references that occur in external parameter
5938: * entities or to the external subset.)
1.22 daniel 5939: */
1.55 daniel 5940: void
5941: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5942: GROW;
1.22 daniel 5943: xmlParseElementDecl(ctxt);
5944: xmlParseAttributeListDecl(ctxt);
5945: xmlParseEntityDecl(ctxt);
5946: xmlParseNotationDecl(ctxt);
5947: xmlParsePI(ctxt);
1.114 daniel 5948: xmlParseComment(ctxt);
1.98 daniel 5949: /*
5950: * This is only for internal subset. On external entities,
5951: * the replacement is done before parsing stage
5952: */
5953: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5954: xmlParsePEReference(ctxt);
1.97 daniel 5955: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5956: }
5957:
1.50 daniel 5958: /**
1.76 daniel 5959: * xmlParseTextDecl:
5960: * @ctxt: an XML parser context
5961: *
5962: * parse an XML declaration header for external entities
5963: *
5964: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 5965: *
5966: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 5967: */
5968:
1.172 daniel 5969: void
1.76 daniel 5970: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5971: xmlChar *version;
1.76 daniel 5972:
5973: /*
5974: * We know that '<?xml' is here.
5975: */
5976: SKIP(5);
5977:
5978: if (!IS_BLANK(CUR)) {
5979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5980: ctxt->sax->error(ctxt->userData,
5981: "Space needed after '<?xml'\n");
1.123 daniel 5982: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5983: ctxt->wellFormed = 0;
1.180 daniel 5984: ctxt->disableSAX = 1;
1.76 daniel 5985: }
5986: SKIP_BLANKS;
5987:
5988: /*
5989: * We may have the VersionInfo here.
5990: */
5991: version = xmlParseVersionInfo(ctxt);
5992: if (version == NULL)
5993: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 5994: ctxt->input->version = version;
1.76 daniel 5995:
5996: /*
5997: * We must have the encoding declaration
5998: */
5999: if (!IS_BLANK(CUR)) {
6000: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6001: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6002: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6003: ctxt->wellFormed = 0;
1.180 daniel 6004: ctxt->disableSAX = 1;
1.76 daniel 6005: }
1.172 daniel 6006: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.76 daniel 6007:
6008: SKIP_BLANKS;
1.152 daniel 6009: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6010: SKIP(2);
1.152 daniel 6011: } else if (RAW == '>') {
1.76 daniel 6012: /* Deprecated old WD ... */
6013: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6014: ctxt->sax->error(ctxt->userData,
6015: "XML declaration must end-up with '?>'\n");
1.123 daniel 6016: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6017: ctxt->wellFormed = 0;
1.180 daniel 6018: ctxt->disableSAX = 1;
1.76 daniel 6019: NEXT;
6020: } else {
6021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6022: ctxt->sax->error(ctxt->userData,
6023: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6024: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6025: ctxt->wellFormed = 0;
1.180 daniel 6026: ctxt->disableSAX = 1;
1.76 daniel 6027: MOVETO_ENDTAG(CUR_PTR);
6028: NEXT;
6029: }
6030: }
6031:
6032: /*
6033: * xmlParseConditionalSections
6034: * @ctxt: an XML parser context
6035: *
6036: * TODO : Conditionnal section are not yet supported !
6037: *
6038: * [61] conditionalSect ::= includeSect | ignoreSect
6039: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6040: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6041: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6042: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6043: */
6044:
6045: void
6046: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6047: SKIP(3);
6048: SKIP_BLANKS;
1.168 daniel 6049: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6050: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6051: (NXT(6) == 'E')) {
1.165 daniel 6052: SKIP(7);
1.168 daniel 6053: SKIP_BLANKS;
6054: if (RAW != '[') {
6055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6056: ctxt->sax->error(ctxt->userData,
6057: "XML conditional section '[' expected\n");
6058: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6059: ctxt->wellFormed = 0;
1.180 daniel 6060: ctxt->disableSAX = 1;
1.168 daniel 6061: } else {
6062: NEXT;
6063: }
1.165 daniel 6064: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6065: (NXT(2) != '>'))) {
6066: const xmlChar *check = CUR_PTR;
6067: int cons = ctxt->input->consumed;
6068: int tok = ctxt->token;
6069:
6070: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6071: xmlParseConditionalSections(ctxt);
6072: } else if (IS_BLANK(CUR)) {
6073: NEXT;
6074: } else if (RAW == '%') {
6075: xmlParsePEReference(ctxt);
6076: } else
6077: xmlParseMarkupDecl(ctxt);
6078:
6079: /*
6080: * Pop-up of finished entities.
6081: */
6082: while ((RAW == 0) && (ctxt->inputNr > 1))
6083: xmlPopInput(ctxt);
6084:
6085: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6086: (tok == ctxt->token)) {
6087: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6088: ctxt->sax->error(ctxt->userData,
6089: "Content error in the external subset\n");
6090: ctxt->wellFormed = 0;
1.180 daniel 6091: ctxt->disableSAX = 1;
1.165 daniel 6092: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6093: break;
6094: }
6095: }
1.168 daniel 6096: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6097: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6098: int state;
6099:
1.168 daniel 6100: SKIP(6);
6101: SKIP_BLANKS;
6102: if (RAW != '[') {
6103: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6104: ctxt->sax->error(ctxt->userData,
6105: "XML conditional section '[' expected\n");
6106: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6107: ctxt->wellFormed = 0;
1.180 daniel 6108: ctxt->disableSAX = 1;
1.168 daniel 6109: } else {
6110: NEXT;
6111: }
1.171 daniel 6112:
1.143 daniel 6113: /*
1.171 daniel 6114: * Parse up to the end of the conditionnal section
6115: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6116: */
1.171 daniel 6117: state = ctxt->disableSAX;
1.165 daniel 6118: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6119: (NXT(2) != '>'))) {
1.171 daniel 6120: const xmlChar *check = CUR_PTR;
6121: int cons = ctxt->input->consumed;
6122: int tok = ctxt->token;
6123:
6124: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6125: xmlParseConditionalSections(ctxt);
6126: } else if (IS_BLANK(CUR)) {
6127: NEXT;
6128: } else if (RAW == '%') {
6129: xmlParsePEReference(ctxt);
6130: } else
6131: xmlParseMarkupDecl(ctxt);
6132:
1.165 daniel 6133: /*
6134: * Pop-up of finished entities.
6135: */
6136: while ((RAW == 0) && (ctxt->inputNr > 1))
6137: xmlPopInput(ctxt);
1.143 daniel 6138:
1.171 daniel 6139: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6140: (tok == ctxt->token)) {
6141: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6142: ctxt->sax->error(ctxt->userData,
6143: "Content error in the external subset\n");
6144: ctxt->wellFormed = 0;
1.180 daniel 6145: ctxt->disableSAX = 1;
1.171 daniel 6146: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6147: break;
6148: }
1.165 daniel 6149: }
1.171 daniel 6150: ctxt->disableSAX = state;
1.168 daniel 6151: } else {
6152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153: ctxt->sax->error(ctxt->userData,
6154: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6155: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6156: ctxt->wellFormed = 0;
1.180 daniel 6157: ctxt->disableSAX = 1;
1.143 daniel 6158: }
6159:
1.152 daniel 6160: if (RAW == 0)
1.143 daniel 6161: SHRINK;
6162:
1.152 daniel 6163: if (RAW == 0) {
1.76 daniel 6164: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6165: ctxt->sax->error(ctxt->userData,
6166: "XML conditional section not closed\n");
1.123 daniel 6167: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6168: ctxt->wellFormed = 0;
1.180 daniel 6169: ctxt->disableSAX = 1;
1.143 daniel 6170: } else {
6171: SKIP(3);
1.76 daniel 6172: }
6173: }
6174:
6175: /**
1.124 daniel 6176: * xmlParseExternalSubset:
1.76 daniel 6177: * @ctxt: an XML parser context
1.124 daniel 6178: * @ExternalID: the external identifier
6179: * @SystemID: the system identifier (or URL)
1.76 daniel 6180: *
6181: * parse Markup declarations from an external subset
6182: *
6183: * [30] extSubset ::= textDecl? extSubsetDecl
6184: *
6185: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6186: */
6187: void
1.123 daniel 6188: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6189: const xmlChar *SystemID) {
1.132 daniel 6190: GROW;
1.152 daniel 6191: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6192: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6193: (NXT(4) == 'l')) {
1.172 daniel 6194: xmlParseTextDecl(ctxt);
1.76 daniel 6195: }
1.79 daniel 6196: if (ctxt->myDoc == NULL) {
1.116 daniel 6197: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6198: }
6199: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6200: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6201:
1.96 daniel 6202: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6203: ctxt->external = 1;
1.152 daniel 6204: while (((RAW == '<') && (NXT(1) == '?')) ||
6205: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6206: IS_BLANK(CUR)) {
1.123 daniel 6207: const xmlChar *check = CUR_PTR;
1.115 daniel 6208: int cons = ctxt->input->consumed;
1.164 daniel 6209: int tok = ctxt->token;
1.115 daniel 6210:
1.152 daniel 6211: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6212: xmlParseConditionalSections(ctxt);
6213: } else if (IS_BLANK(CUR)) {
6214: NEXT;
1.152 daniel 6215: } else if (RAW == '%') {
1.76 daniel 6216: xmlParsePEReference(ctxt);
6217: } else
6218: xmlParseMarkupDecl(ctxt);
1.77 daniel 6219:
6220: /*
6221: * Pop-up of finished entities.
6222: */
1.166 daniel 6223: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6224: xmlPopInput(ctxt);
6225:
1.164 daniel 6226: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6227: (tok == ctxt->token)) {
1.115 daniel 6228: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6229: ctxt->sax->error(ctxt->userData,
6230: "Content error in the external subset\n");
6231: ctxt->wellFormed = 0;
1.180 daniel 6232: ctxt->disableSAX = 1;
1.123 daniel 6233: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6234: break;
6235: }
1.76 daniel 6236: }
6237:
1.152 daniel 6238: if (RAW != 0) {
1.76 daniel 6239: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6240: ctxt->sax->error(ctxt->userData,
6241: "Extra content at the end of the document\n");
1.123 daniel 6242: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6243: ctxt->wellFormed = 0;
1.180 daniel 6244: ctxt->disableSAX = 1;
1.76 daniel 6245: }
6246:
6247: }
6248:
6249: /**
1.77 daniel 6250: * xmlParseReference:
6251: * @ctxt: an XML parser context
6252: *
6253: * parse and handle entity references in content, depending on the SAX
6254: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6255: * CharRef, a predefined entity, if there is no reference() callback.
6256: * or if the parser was asked to switch to that mode.
1.77 daniel 6257: *
6258: * [67] Reference ::= EntityRef | CharRef
6259: */
6260: void
6261: xmlParseReference(xmlParserCtxtPtr ctxt) {
6262: xmlEntityPtr ent;
1.123 daniel 6263: xmlChar *val;
1.152 daniel 6264: if (RAW != '&') return;
1.77 daniel 6265:
1.113 daniel 6266: if (ctxt->inputNr > 1) {
1.123 daniel 6267: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6268:
1.171 daniel 6269: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6270: (!ctxt->disableSAX))
1.113 daniel 6271: ctxt->sax->characters(ctxt->userData, cur, 1);
6272: if (ctxt->token == '&')
6273: ctxt->token = 0;
6274: else {
6275: SKIP(1);
6276: }
6277: return;
6278: }
1.77 daniel 6279: if (NXT(1) == '#') {
1.152 daniel 6280: int i = 0;
1.153 daniel 6281: xmlChar out[10];
6282: int hex = NXT(2);
1.77 daniel 6283: int val = xmlParseCharRef(ctxt);
1.152 daniel 6284:
1.153 daniel 6285: if (ctxt->encoding != NULL) {
6286: /*
6287: * So we are using non-UTF-8 buffers
6288: * Check that the char fit on 8bits, if not
6289: * generate a CharRef.
6290: */
6291: if (val <= 0xFF) {
6292: out[0] = val;
6293: out[1] = 0;
1.171 daniel 6294: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6295: (!ctxt->disableSAX))
1.153 daniel 6296: ctxt->sax->characters(ctxt->userData, out, 1);
6297: } else {
6298: if ((hex == 'x') || (hex == 'X'))
6299: sprintf((char *)out, "#x%X", val);
6300: else
6301: sprintf((char *)out, "#%d", val);
1.171 daniel 6302: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6303: (!ctxt->disableSAX))
1.153 daniel 6304: ctxt->sax->reference(ctxt->userData, out);
6305: }
6306: } else {
6307: /*
6308: * Just encode the value in UTF-8
6309: */
6310: COPY_BUF(0 ,out, i, val);
6311: out[i] = 0;
1.171 daniel 6312: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6313: (!ctxt->disableSAX))
1.153 daniel 6314: ctxt->sax->characters(ctxt->userData, out, i);
6315: }
1.77 daniel 6316: } else {
6317: ent = xmlParseEntityRef(ctxt);
6318: if (ent == NULL) return;
6319: if ((ent->name != NULL) &&
1.159 daniel 6320: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6321: xmlNodePtr list = NULL;
6322: int ret;
6323:
6324:
6325: /*
6326: * The first reference to the entity trigger a parsing phase
6327: * where the ent->children is filled with the result from
6328: * the parsing.
6329: */
6330: if (ent->children == NULL) {
6331: xmlChar *value;
6332: value = ent->content;
6333:
6334: /*
6335: * Check that this entity is well formed
6336: */
6337: if ((value != NULL) &&
6338: (value[1] == 0) && (value[0] == '<') &&
6339: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6340: /*
6341: * TODO: get definite answer on this !!!
6342: * Lots of entity decls are used to declare a single
6343: * char
6344: * <!ENTITY lt "<">
6345: * Which seems to be valid since
6346: * 2.4: The ampersand character (&) and the left angle
6347: * bracket (<) may appear in their literal form only
6348: * when used ... They are also legal within the literal
6349: * entity value of an internal entity declaration;i
6350: * see "4.3.2 Well-Formed Parsed Entities".
6351: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6352: * Looking at the OASIS test suite and James Clark
6353: * tests, this is broken. However the XML REC uses
6354: * it. Is the XML REC not well-formed ????
6355: * This is a hack to avoid this problem
6356: */
6357: list = xmlNewDocText(ctxt->myDoc, value);
6358: if (list != NULL) {
6359: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6360: (ent->children == NULL)) {
6361: ent->children = list;
6362: ent->last = list;
6363: list->parent = (xmlNodePtr) ent;
6364: } else {
6365: xmlFreeNodeList(list);
6366: }
6367: } else if (list != NULL) {
6368: xmlFreeNodeList(list);
6369: }
1.181 daniel 6370: } else {
1.180 daniel 6371: /*
6372: * 4.3.2: An internal general parsed entity is well-formed
6373: * if its replacement text matches the production labeled
6374: * content.
6375: */
1.185 daniel 6376: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6377: ctxt->depth++;
1.180 daniel 6378: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6379: ctxt->sax, NULL, ctxt->depth,
6380: value, &list);
6381: ctxt->depth--;
6382: } else if (ent->etype ==
6383: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6384: ctxt->depth++;
1.180 daniel 6385: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6386: ctxt->sax, NULL, ctxt->depth,
6387: ent->SystemID, ent->ExternalID, &list);
6388: ctxt->depth--;
6389: } else {
1.180 daniel 6390: ret = -1;
6391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6392: ctxt->sax->error(ctxt->userData,
6393: "Internal: invalid entity type\n");
6394: }
1.185 daniel 6395: if (ret == XML_ERR_ENTITY_LOOP) {
6396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6397: ctxt->sax->error(ctxt->userData,
6398: "Detected entity reference loop\n");
6399: ctxt->wellFormed = 0;
6400: ctxt->disableSAX = 1;
6401: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6402: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6403: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6404: (ent->children == NULL)) {
6405: ent->children = list;
6406: while (list != NULL) {
6407: list->parent = (xmlNodePtr) ent;
6408: if (list->next == NULL)
6409: ent->last = list;
6410: list = list->next;
6411: }
6412: } else {
6413: xmlFreeNodeList(list);
6414: }
6415: } else if (ret > 0) {
6416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6417: ctxt->sax->error(ctxt->userData,
6418: "Entity value required\n");
6419: ctxt->errNo = ret;
6420: ctxt->wellFormed = 0;
6421: ctxt->disableSAX = 1;
6422: } else if (list != NULL) {
6423: xmlFreeNodeList(list);
6424: }
6425: }
6426: }
1.113 daniel 6427: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6428: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6429: /*
6430: * Create a node.
6431: */
6432: ctxt->sax->reference(ctxt->userData, ent->name);
6433: return;
6434: } else if (ctxt->replaceEntities) {
6435: xmlParserInputPtr input;
1.79 daniel 6436:
1.113 daniel 6437: input = xmlNewEntityInputStream(ctxt, ent);
6438: xmlPushInput(ctxt, input);
1.167 daniel 6439: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6440: (RAW == '<') && (NXT(1) == '?') &&
6441: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6442: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6443: xmlParseTextDecl(ctxt);
1.167 daniel 6444: if (input->standalone) {
6445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6446: ctxt->sax->error(ctxt->userData,
6447: "external parsed entities cannot be standalone\n");
6448: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6449: ctxt->wellFormed = 0;
1.180 daniel 6450: ctxt->disableSAX = 1;
1.167 daniel 6451: }
6452: }
1.179 daniel 6453: /*
6454: * !!! TODO: build the tree under the entity first
6455: * 1234
6456: */
1.113 daniel 6457: return;
6458: }
1.77 daniel 6459: }
6460: val = ent->content;
6461: if (val == NULL) return;
6462: /*
6463: * inline the entity.
6464: */
1.171 daniel 6465: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6466: (!ctxt->disableSAX))
1.77 daniel 6467: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6468: }
1.24 daniel 6469: }
6470:
1.50 daniel 6471: /**
6472: * xmlParseEntityRef:
6473: * @ctxt: an XML parser context
6474: *
6475: * parse ENTITY references declarations
1.24 daniel 6476: *
6477: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6478: *
1.98 daniel 6479: * [ WFC: Entity Declared ]
6480: * In a document without any DTD, a document with only an internal DTD
6481: * subset which contains no parameter entity references, or a document
6482: * with "standalone='yes'", the Name given in the entity reference
6483: * must match that in an entity declaration, except that well-formed
6484: * documents need not declare any of the following entities: amp, lt,
6485: * gt, apos, quot. The declaration of a parameter entity must precede
6486: * any reference to it. Similarly, the declaration of a general entity
6487: * must precede any reference to it which appears in a default value in an
6488: * attribute-list declaration. Note that if entities are declared in the
6489: * external subset or in external parameter entities, a non-validating
6490: * processor is not obligated to read and process their declarations;
6491: * for such documents, the rule that an entity must be declared is a
6492: * well-formedness constraint only if standalone='yes'.
6493: *
6494: * [ WFC: Parsed Entity ]
6495: * An entity reference must not contain the name of an unparsed entity
6496: *
1.77 daniel 6497: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6498: */
1.77 daniel 6499: xmlEntityPtr
1.55 daniel 6500: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6501: xmlChar *name;
1.72 daniel 6502: xmlEntityPtr ent = NULL;
1.24 daniel 6503:
1.91 daniel 6504: GROW;
1.111 daniel 6505:
1.152 daniel 6506: if (RAW == '&') {
1.40 daniel 6507: NEXT;
1.24 daniel 6508: name = xmlParseName(ctxt);
6509: if (name == NULL) {
1.55 daniel 6510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6511: ctxt->sax->error(ctxt->userData,
6512: "xmlParseEntityRef: no name\n");
1.123 daniel 6513: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6514: ctxt->wellFormed = 0;
1.180 daniel 6515: ctxt->disableSAX = 1;
1.24 daniel 6516: } else {
1.152 daniel 6517: if (RAW == ';') {
1.40 daniel 6518: NEXT;
1.24 daniel 6519: /*
1.77 daniel 6520: * Ask first SAX for entity resolution, otherwise try the
6521: * predefined set.
6522: */
6523: if (ctxt->sax != NULL) {
6524: if (ctxt->sax->getEntity != NULL)
6525: ent = ctxt->sax->getEntity(ctxt->userData, name);
6526: if (ent == NULL)
6527: ent = xmlGetPredefinedEntity(name);
6528: }
6529: /*
1.98 daniel 6530: * [ WFC: Entity Declared ]
6531: * In a document without any DTD, a document with only an
6532: * internal DTD subset which contains no parameter entity
6533: * references, or a document with "standalone='yes'", the
6534: * Name given in the entity reference must match that in an
6535: * entity declaration, except that well-formed documents
6536: * need not declare any of the following entities: amp, lt,
6537: * gt, apos, quot.
6538: * The declaration of a parameter entity must precede any
6539: * reference to it.
6540: * Similarly, the declaration of a general entity must
6541: * precede any reference to it which appears in a default
6542: * value in an attribute-list declaration. Note that if
6543: * entities are declared in the external subset or in
6544: * external parameter entities, a non-validating processor
6545: * is not obligated to read and process their declarations;
6546: * for such documents, the rule that an entity must be
6547: * declared is a well-formedness constraint only if
6548: * standalone='yes'.
1.59 daniel 6549: */
1.77 daniel 6550: if (ent == NULL) {
1.98 daniel 6551: if ((ctxt->standalone == 1) ||
6552: ((ctxt->hasExternalSubset == 0) &&
6553: (ctxt->hasPErefs == 0))) {
6554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6555: ctxt->sax->error(ctxt->userData,
6556: "Entity '%s' not defined\n", name);
1.123 daniel 6557: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6558: ctxt->wellFormed = 0;
1.180 daniel 6559: ctxt->disableSAX = 1;
1.77 daniel 6560: } else {
1.98 daniel 6561: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6562: ctxt->sax->warning(ctxt->userData,
6563: "Entity '%s' not defined\n", name);
1.123 daniel 6564: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6565: }
1.77 daniel 6566: }
1.59 daniel 6567:
6568: /*
1.98 daniel 6569: * [ WFC: Parsed Entity ]
6570: * An entity reference must not contain the name of an
6571: * unparsed entity
6572: */
1.159 daniel 6573: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6575: ctxt->sax->error(ctxt->userData,
6576: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6577: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6578: ctxt->wellFormed = 0;
1.180 daniel 6579: ctxt->disableSAX = 1;
1.98 daniel 6580: }
6581:
6582: /*
6583: * [ WFC: No External Entity References ]
6584: * Attribute values cannot contain direct or indirect
6585: * entity references to external entities.
6586: */
6587: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6588: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6589: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6590: ctxt->sax->error(ctxt->userData,
6591: "Attribute references external entity '%s'\n", name);
1.123 daniel 6592: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6593: ctxt->wellFormed = 0;
1.180 daniel 6594: ctxt->disableSAX = 1;
1.98 daniel 6595: }
6596: /*
6597: * [ WFC: No < in Attribute Values ]
6598: * The replacement text of any entity referred to directly or
6599: * indirectly in an attribute value (other than "<") must
6600: * not contain a <.
1.59 daniel 6601: */
1.98 daniel 6602: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6603: (ent != NULL) &&
6604: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6605: (ent->content != NULL) &&
6606: (xmlStrchr(ent->content, '<'))) {
6607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6608: ctxt->sax->error(ctxt->userData,
6609: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6610: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6611: ctxt->wellFormed = 0;
1.180 daniel 6612: ctxt->disableSAX = 1;
1.98 daniel 6613: }
6614:
6615: /*
6616: * Internal check, no parameter entities here ...
6617: */
6618: else {
1.159 daniel 6619: switch (ent->etype) {
1.59 daniel 6620: case XML_INTERNAL_PARAMETER_ENTITY:
6621: case XML_EXTERNAL_PARAMETER_ENTITY:
6622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6623: ctxt->sax->error(ctxt->userData,
1.59 daniel 6624: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6625: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6626: ctxt->wellFormed = 0;
1.180 daniel 6627: ctxt->disableSAX = 1;
6628: break;
6629: default:
1.59 daniel 6630: break;
6631: }
6632: }
6633:
6634: /*
1.98 daniel 6635: * [ WFC: No Recursion ]
1.117 daniel 6636: * TODO A parsed entity must not contain a recursive reference
6637: * to itself, either directly or indirectly.
1.59 daniel 6638: */
1.77 daniel 6639:
1.24 daniel 6640: } else {
1.55 daniel 6641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6642: ctxt->sax->error(ctxt->userData,
1.59 daniel 6643: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6644: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6645: ctxt->wellFormed = 0;
1.180 daniel 6646: ctxt->disableSAX = 1;
1.24 daniel 6647: }
1.119 daniel 6648: xmlFree(name);
1.24 daniel 6649: }
6650: }
1.77 daniel 6651: return(ent);
1.24 daniel 6652: }
1.135 daniel 6653: /**
6654: * xmlParseStringEntityRef:
6655: * @ctxt: an XML parser context
6656: * @str: a pointer to an index in the string
6657: *
6658: * parse ENTITY references declarations, but this version parses it from
6659: * a string value.
6660: *
6661: * [68] EntityRef ::= '&' Name ';'
6662: *
6663: * [ WFC: Entity Declared ]
6664: * In a document without any DTD, a document with only an internal DTD
6665: * subset which contains no parameter entity references, or a document
6666: * with "standalone='yes'", the Name given in the entity reference
6667: * must match that in an entity declaration, except that well-formed
6668: * documents need not declare any of the following entities: amp, lt,
6669: * gt, apos, quot. The declaration of a parameter entity must precede
6670: * any reference to it. Similarly, the declaration of a general entity
6671: * must precede any reference to it which appears in a default value in an
6672: * attribute-list declaration. Note that if entities are declared in the
6673: * external subset or in external parameter entities, a non-validating
6674: * processor is not obligated to read and process their declarations;
6675: * for such documents, the rule that an entity must be declared is a
6676: * well-formedness constraint only if standalone='yes'.
6677: *
6678: * [ WFC: Parsed Entity ]
6679: * An entity reference must not contain the name of an unparsed entity
6680: *
6681: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6682: * is updated to the current location in the string.
6683: */
6684: xmlEntityPtr
6685: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6686: xmlChar *name;
6687: const xmlChar *ptr;
6688: xmlChar cur;
6689: xmlEntityPtr ent = NULL;
6690:
1.156 daniel 6691: if ((str == NULL) || (*str == NULL))
6692: return(NULL);
1.135 daniel 6693: ptr = *str;
6694: cur = *ptr;
6695: if (cur == '&') {
6696: ptr++;
6697: cur = *ptr;
6698: name = xmlParseStringName(ctxt, &ptr);
6699: if (name == NULL) {
6700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6701: ctxt->sax->error(ctxt->userData,
6702: "xmlParseEntityRef: no name\n");
6703: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6704: ctxt->wellFormed = 0;
1.180 daniel 6705: ctxt->disableSAX = 1;
1.135 daniel 6706: } else {
1.185 daniel 6707: if (*ptr == ';') {
6708: ptr++;
1.135 daniel 6709: /*
6710: * Ask first SAX for entity resolution, otherwise try the
6711: * predefined set.
6712: */
6713: if (ctxt->sax != NULL) {
6714: if (ctxt->sax->getEntity != NULL)
6715: ent = ctxt->sax->getEntity(ctxt->userData, name);
6716: if (ent == NULL)
6717: ent = xmlGetPredefinedEntity(name);
6718: }
6719: /*
6720: * [ WFC: Entity Declared ]
6721: * In a document without any DTD, a document with only an
6722: * internal DTD subset which contains no parameter entity
6723: * references, or a document with "standalone='yes'", the
6724: * Name given in the entity reference must match that in an
6725: * entity declaration, except that well-formed documents
6726: * need not declare any of the following entities: amp, lt,
6727: * gt, apos, quot.
6728: * The declaration of a parameter entity must precede any
6729: * reference to it.
6730: * Similarly, the declaration of a general entity must
6731: * precede any reference to it which appears in a default
6732: * value in an attribute-list declaration. Note that if
6733: * entities are declared in the external subset or in
6734: * external parameter entities, a non-validating processor
6735: * is not obligated to read and process their declarations;
6736: * for such documents, the rule that an entity must be
6737: * declared is a well-formedness constraint only if
6738: * standalone='yes'.
6739: */
6740: if (ent == NULL) {
6741: if ((ctxt->standalone == 1) ||
6742: ((ctxt->hasExternalSubset == 0) &&
6743: (ctxt->hasPErefs == 0))) {
6744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6745: ctxt->sax->error(ctxt->userData,
6746: "Entity '%s' not defined\n", name);
6747: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6748: ctxt->wellFormed = 0;
1.180 daniel 6749: ctxt->disableSAX = 1;
1.135 daniel 6750: } else {
6751: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6752: ctxt->sax->warning(ctxt->userData,
6753: "Entity '%s' not defined\n", name);
6754: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6755: }
6756: }
6757:
6758: /*
6759: * [ WFC: Parsed Entity ]
6760: * An entity reference must not contain the name of an
6761: * unparsed entity
6762: */
1.159 daniel 6763: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6764: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6765: ctxt->sax->error(ctxt->userData,
6766: "Entity reference to unparsed entity %s\n", name);
6767: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6768: ctxt->wellFormed = 0;
1.180 daniel 6769: ctxt->disableSAX = 1;
1.135 daniel 6770: }
6771:
6772: /*
6773: * [ WFC: No External Entity References ]
6774: * Attribute values cannot contain direct or indirect
6775: * entity references to external entities.
6776: */
6777: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6778: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6780: ctxt->sax->error(ctxt->userData,
6781: "Attribute references external entity '%s'\n", name);
6782: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6783: ctxt->wellFormed = 0;
1.180 daniel 6784: ctxt->disableSAX = 1;
1.135 daniel 6785: }
6786: /*
6787: * [ WFC: No < in Attribute Values ]
6788: * The replacement text of any entity referred to directly or
6789: * indirectly in an attribute value (other than "<") must
6790: * not contain a <.
6791: */
6792: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6793: (ent != NULL) &&
6794: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6795: (ent->content != NULL) &&
6796: (xmlStrchr(ent->content, '<'))) {
6797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6798: ctxt->sax->error(ctxt->userData,
6799: "'<' in entity '%s' is not allowed in attributes values\n", name);
6800: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6801: ctxt->wellFormed = 0;
1.180 daniel 6802: ctxt->disableSAX = 1;
1.135 daniel 6803: }
6804:
6805: /*
6806: * Internal check, no parameter entities here ...
6807: */
6808: else {
1.159 daniel 6809: switch (ent->etype) {
1.135 daniel 6810: case XML_INTERNAL_PARAMETER_ENTITY:
6811: case XML_EXTERNAL_PARAMETER_ENTITY:
6812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6813: ctxt->sax->error(ctxt->userData,
6814: "Attempt to reference the parameter entity '%s'\n", name);
6815: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6816: ctxt->wellFormed = 0;
1.180 daniel 6817: ctxt->disableSAX = 1;
6818: break;
6819: default:
1.135 daniel 6820: break;
6821: }
6822: }
6823:
6824: /*
6825: * [ WFC: No Recursion ]
6826: * TODO A parsed entity must not contain a recursive reference
6827: * to itself, either directly or indirectly.
6828: */
6829:
6830: } else {
6831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6832: ctxt->sax->error(ctxt->userData,
6833: "xmlParseEntityRef: expecting ';'\n");
6834: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6835: ctxt->wellFormed = 0;
1.180 daniel 6836: ctxt->disableSAX = 1;
1.135 daniel 6837: }
6838: xmlFree(name);
6839: }
6840: }
1.185 daniel 6841: *str = ptr;
1.135 daniel 6842: return(ent);
6843: }
1.24 daniel 6844:
1.50 daniel 6845: /**
6846: * xmlParsePEReference:
6847: * @ctxt: an XML parser context
6848: *
6849: * parse PEReference declarations
1.77 daniel 6850: * The entity content is handled directly by pushing it's content as
6851: * a new input stream.
1.22 daniel 6852: *
6853: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6854: *
1.98 daniel 6855: * [ WFC: No Recursion ]
6856: * TODO A parsed entity must not contain a recursive
6857: * reference to itself, either directly or indirectly.
6858: *
6859: * [ WFC: Entity Declared ]
6860: * In a document without any DTD, a document with only an internal DTD
6861: * subset which contains no parameter entity references, or a document
6862: * with "standalone='yes'", ... ... The declaration of a parameter
6863: * entity must precede any reference to it...
6864: *
6865: * [ VC: Entity Declared ]
6866: * In a document with an external subset or external parameter entities
6867: * with "standalone='no'", ... ... The declaration of a parameter entity
6868: * must precede any reference to it...
6869: *
6870: * [ WFC: In DTD ]
6871: * Parameter-entity references may only appear in the DTD.
6872: * NOTE: misleading but this is handled.
1.22 daniel 6873: */
1.77 daniel 6874: void
1.55 daniel 6875: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6876: xmlChar *name;
1.72 daniel 6877: xmlEntityPtr entity = NULL;
1.50 daniel 6878: xmlParserInputPtr input;
1.22 daniel 6879:
1.152 daniel 6880: if (RAW == '%') {
1.40 daniel 6881: NEXT;
1.22 daniel 6882: name = xmlParseName(ctxt);
6883: if (name == NULL) {
1.55 daniel 6884: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6885: ctxt->sax->error(ctxt->userData,
6886: "xmlParsePEReference: no name\n");
1.123 daniel 6887: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6888: ctxt->wellFormed = 0;
1.180 daniel 6889: ctxt->disableSAX = 1;
1.22 daniel 6890: } else {
1.152 daniel 6891: if (RAW == ';') {
1.40 daniel 6892: NEXT;
1.98 daniel 6893: if ((ctxt->sax != NULL) &&
6894: (ctxt->sax->getParameterEntity != NULL))
6895: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6896: name);
1.45 daniel 6897: if (entity == NULL) {
1.98 daniel 6898: /*
6899: * [ WFC: Entity Declared ]
6900: * In a document without any DTD, a document with only an
6901: * internal DTD subset which contains no parameter entity
6902: * references, or a document with "standalone='yes'", ...
6903: * ... The declaration of a parameter entity must precede
6904: * any reference to it...
6905: */
6906: if ((ctxt->standalone == 1) ||
6907: ((ctxt->hasExternalSubset == 0) &&
6908: (ctxt->hasPErefs == 0))) {
6909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6910: ctxt->sax->error(ctxt->userData,
6911: "PEReference: %%%s; not found\n", name);
1.123 daniel 6912: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6913: ctxt->wellFormed = 0;
1.180 daniel 6914: ctxt->disableSAX = 1;
1.98 daniel 6915: } else {
6916: /*
6917: * [ VC: Entity Declared ]
6918: * In a document with an external subset or external
6919: * parameter entities with "standalone='no'", ...
6920: * ... The declaration of a parameter entity must precede
6921: * any reference to it...
6922: */
6923: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6924: ctxt->sax->warning(ctxt->userData,
6925: "PEReference: %%%s; not found\n", name);
6926: ctxt->valid = 0;
6927: }
1.50 daniel 6928: } else {
1.98 daniel 6929: /*
6930: * Internal checking in case the entity quest barfed
6931: */
1.159 daniel 6932: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6933: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6934: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6935: ctxt->sax->warning(ctxt->userData,
6936: "Internal: %%%s; is not a parameter entity\n", name);
6937: } else {
1.164 daniel 6938: /*
6939: * TODO !!!
6940: * handle the extra spaces added before and after
6941: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6942: */
1.98 daniel 6943: input = xmlNewEntityInputStream(ctxt, entity);
6944: xmlPushInput(ctxt, input);
1.164 daniel 6945: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6946: (RAW == '<') && (NXT(1) == '?') &&
6947: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6948: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6949: xmlParseTextDecl(ctxt);
1.164 daniel 6950: }
6951: if (ctxt->token == 0)
6952: ctxt->token = ' ';
1.98 daniel 6953: }
1.45 daniel 6954: }
1.98 daniel 6955: ctxt->hasPErefs = 1;
1.22 daniel 6956: } else {
1.55 daniel 6957: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6958: ctxt->sax->error(ctxt->userData,
1.59 daniel 6959: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6960: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6961: ctxt->wellFormed = 0;
1.180 daniel 6962: ctxt->disableSAX = 1;
1.22 daniel 6963: }
1.119 daniel 6964: xmlFree(name);
1.3 veillard 6965: }
6966: }
6967: }
6968:
1.50 daniel 6969: /**
1.135 daniel 6970: * xmlParseStringPEReference:
6971: * @ctxt: an XML parser context
6972: * @str: a pointer to an index in the string
6973: *
6974: * parse PEReference declarations
6975: *
6976: * [69] PEReference ::= '%' Name ';'
6977: *
6978: * [ WFC: No Recursion ]
6979: * TODO A parsed entity must not contain a recursive
6980: * reference to itself, either directly or indirectly.
6981: *
6982: * [ WFC: Entity Declared ]
6983: * In a document without any DTD, a document with only an internal DTD
6984: * subset which contains no parameter entity references, or a document
6985: * with "standalone='yes'", ... ... The declaration of a parameter
6986: * entity must precede any reference to it...
6987: *
6988: * [ VC: Entity Declared ]
6989: * In a document with an external subset or external parameter entities
6990: * with "standalone='no'", ... ... The declaration of a parameter entity
6991: * must precede any reference to it...
6992: *
6993: * [ WFC: In DTD ]
6994: * Parameter-entity references may only appear in the DTD.
6995: * NOTE: misleading but this is handled.
6996: *
6997: * Returns the string of the entity content.
6998: * str is updated to the current value of the index
6999: */
7000: xmlEntityPtr
7001: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7002: const xmlChar *ptr;
7003: xmlChar cur;
7004: xmlChar *name;
7005: xmlEntityPtr entity = NULL;
7006:
7007: if ((str == NULL) || (*str == NULL)) return(NULL);
7008: ptr = *str;
7009: cur = *ptr;
7010: if (cur == '%') {
7011: ptr++;
7012: cur = *ptr;
7013: name = xmlParseStringName(ctxt, &ptr);
7014: if (name == NULL) {
7015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7016: ctxt->sax->error(ctxt->userData,
7017: "xmlParseStringPEReference: no name\n");
7018: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7019: ctxt->wellFormed = 0;
1.180 daniel 7020: ctxt->disableSAX = 1;
1.135 daniel 7021: } else {
7022: cur = *ptr;
7023: if (cur == ';') {
7024: ptr++;
7025: cur = *ptr;
7026: if ((ctxt->sax != NULL) &&
7027: (ctxt->sax->getParameterEntity != NULL))
7028: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7029: name);
7030: if (entity == NULL) {
7031: /*
7032: * [ WFC: Entity Declared ]
7033: * In a document without any DTD, a document with only an
7034: * internal DTD subset which contains no parameter entity
7035: * references, or a document with "standalone='yes'", ...
7036: * ... The declaration of a parameter entity must precede
7037: * any reference to it...
7038: */
7039: if ((ctxt->standalone == 1) ||
7040: ((ctxt->hasExternalSubset == 0) &&
7041: (ctxt->hasPErefs == 0))) {
7042: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7043: ctxt->sax->error(ctxt->userData,
7044: "PEReference: %%%s; not found\n", name);
7045: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7046: ctxt->wellFormed = 0;
1.180 daniel 7047: ctxt->disableSAX = 1;
1.135 daniel 7048: } else {
7049: /*
7050: * [ VC: Entity Declared ]
7051: * In a document with an external subset or external
7052: * parameter entities with "standalone='no'", ...
7053: * ... The declaration of a parameter entity must
7054: * precede any reference to it...
7055: */
7056: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7057: ctxt->sax->warning(ctxt->userData,
7058: "PEReference: %%%s; not found\n", name);
7059: ctxt->valid = 0;
7060: }
7061: } else {
7062: /*
7063: * Internal checking in case the entity quest barfed
7064: */
1.159 daniel 7065: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7066: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7067: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7068: ctxt->sax->warning(ctxt->userData,
7069: "Internal: %%%s; is not a parameter entity\n", name);
7070: }
7071: }
7072: ctxt->hasPErefs = 1;
7073: } else {
7074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7075: ctxt->sax->error(ctxt->userData,
7076: "xmlParseStringPEReference: expecting ';'\n");
7077: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7078: ctxt->wellFormed = 0;
1.180 daniel 7079: ctxt->disableSAX = 1;
1.135 daniel 7080: }
7081: xmlFree(name);
7082: }
7083: }
7084: *str = ptr;
7085: return(entity);
7086: }
7087:
7088: /**
1.181 daniel 7089: * xmlParseDocTypeDecl:
1.50 daniel 7090: * @ctxt: an XML parser context
7091: *
7092: * parse a DOCTYPE declaration
1.21 daniel 7093: *
1.22 daniel 7094: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7095: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7096: *
7097: * [ VC: Root Element Type ]
1.99 daniel 7098: * The Name in the document type declaration must match the element
1.98 daniel 7099: * type of the root element.
1.21 daniel 7100: */
7101:
1.55 daniel 7102: void
7103: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7104: xmlChar *name = NULL;
1.123 daniel 7105: xmlChar *ExternalID = NULL;
7106: xmlChar *URI = NULL;
1.21 daniel 7107:
7108: /*
7109: * We know that '<!DOCTYPE' has been detected.
7110: */
1.40 daniel 7111: SKIP(9);
1.21 daniel 7112:
1.42 daniel 7113: SKIP_BLANKS;
1.21 daniel 7114:
7115: /*
7116: * Parse the DOCTYPE name.
7117: */
7118: name = xmlParseName(ctxt);
7119: if (name == NULL) {
1.55 daniel 7120: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7121: ctxt->sax->error(ctxt->userData,
7122: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7123: ctxt->wellFormed = 0;
1.180 daniel 7124: ctxt->disableSAX = 1;
1.123 daniel 7125: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7126: }
1.165 daniel 7127: ctxt->intSubName = name;
1.21 daniel 7128:
1.42 daniel 7129: SKIP_BLANKS;
1.21 daniel 7130:
7131: /*
1.22 daniel 7132: * Check for SystemID and ExternalID
7133: */
1.67 daniel 7134: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7135:
7136: if ((URI != NULL) || (ExternalID != NULL)) {
7137: ctxt->hasExternalSubset = 1;
7138: }
1.165 daniel 7139: ctxt->extSubURI = URI;
7140: ctxt->extSubSystem = ExternalID;
1.98 daniel 7141:
1.42 daniel 7142: SKIP_BLANKS;
1.36 daniel 7143:
1.76 daniel 7144: /*
1.165 daniel 7145: * Create and update the internal subset.
1.76 daniel 7146: */
1.171 daniel 7147: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7148: (!ctxt->disableSAX))
1.74 daniel 7149: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7150:
7151: /*
1.140 daniel 7152: * Is there any internal subset declarations ?
7153: * they are handled separately in xmlParseInternalSubset()
7154: */
1.152 daniel 7155: if (RAW == '[')
1.140 daniel 7156: return;
7157:
7158: /*
7159: * We should be at the end of the DOCTYPE declaration.
7160: */
1.152 daniel 7161: if (RAW != '>') {
1.140 daniel 7162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7163: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7164: ctxt->wellFormed = 0;
1.180 daniel 7165: ctxt->disableSAX = 1;
1.140 daniel 7166: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7167: }
7168: NEXT;
7169: }
7170:
7171: /**
1.181 daniel 7172: * xmlParseInternalsubset:
1.140 daniel 7173: * @ctxt: an XML parser context
7174: *
7175: * parse the internal subset declaration
7176: *
7177: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7178: */
7179:
7180: void
7181: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7182: /*
1.22 daniel 7183: * Is there any DTD definition ?
7184: */
1.152 daniel 7185: if (RAW == '[') {
1.96 daniel 7186: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7187: NEXT;
1.22 daniel 7188: /*
7189: * Parse the succession of Markup declarations and
7190: * PEReferences.
7191: * Subsequence (markupdecl | PEReference | S)*
7192: */
1.152 daniel 7193: while (RAW != ']') {
1.123 daniel 7194: const xmlChar *check = CUR_PTR;
1.115 daniel 7195: int cons = ctxt->input->consumed;
1.22 daniel 7196:
1.42 daniel 7197: SKIP_BLANKS;
1.22 daniel 7198: xmlParseMarkupDecl(ctxt);
1.50 daniel 7199: xmlParsePEReference(ctxt);
1.22 daniel 7200:
1.115 daniel 7201: /*
7202: * Pop-up of finished entities.
7203: */
1.152 daniel 7204: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7205: xmlPopInput(ctxt);
7206:
1.118 daniel 7207: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7209: ctxt->sax->error(ctxt->userData,
1.140 daniel 7210: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7211: ctxt->wellFormed = 0;
1.180 daniel 7212: ctxt->disableSAX = 1;
1.123 daniel 7213: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7214: break;
7215: }
7216: }
1.152 daniel 7217: if (RAW == ']') NEXT;
1.22 daniel 7218: }
7219:
7220: /*
7221: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7222: */
1.152 daniel 7223: if (RAW != '>') {
1.55 daniel 7224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7225: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7226: ctxt->wellFormed = 0;
1.180 daniel 7227: ctxt->disableSAX = 1;
1.123 daniel 7228: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7229: }
1.40 daniel 7230: NEXT;
1.21 daniel 7231: }
7232:
1.50 daniel 7233: /**
7234: * xmlParseAttribute:
7235: * @ctxt: an XML parser context
1.123 daniel 7236: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7237: *
7238: * parse an attribute
1.3 veillard 7239: *
1.22 daniel 7240: * [41] Attribute ::= Name Eq AttValue
7241: *
1.98 daniel 7242: * [ WFC: No External Entity References ]
7243: * Attribute values cannot contain direct or indirect entity references
7244: * to external entities.
7245: *
7246: * [ WFC: No < in Attribute Values ]
7247: * The replacement text of any entity referred to directly or indirectly in
7248: * an attribute value (other than "<") must not contain a <.
7249: *
7250: * [ VC: Attribute Value Type ]
1.117 daniel 7251: * The attribute must have been declared; the value must be of the type
1.99 daniel 7252: * declared for it.
1.98 daniel 7253: *
1.22 daniel 7254: * [25] Eq ::= S? '=' S?
7255: *
1.29 daniel 7256: * With namespace:
7257: *
7258: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7259: *
7260: * Also the case QName == xmlns:??? is handled independently as a namespace
7261: * definition.
1.69 daniel 7262: *
1.72 daniel 7263: * Returns the attribute name, and the value in *value.
1.3 veillard 7264: */
7265:
1.123 daniel 7266: xmlChar *
7267: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7268: xmlChar *name, *val;
1.3 veillard 7269:
1.72 daniel 7270: *value = NULL;
7271: name = xmlParseName(ctxt);
1.22 daniel 7272: if (name == NULL) {
1.55 daniel 7273: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7274: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7275: ctxt->wellFormed = 0;
1.180 daniel 7276: ctxt->disableSAX = 1;
1.123 daniel 7277: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7278: return(NULL);
1.3 veillard 7279: }
7280:
7281: /*
1.29 daniel 7282: * read the value
1.3 veillard 7283: */
1.42 daniel 7284: SKIP_BLANKS;
1.152 daniel 7285: if (RAW == '=') {
1.40 daniel 7286: NEXT;
1.42 daniel 7287: SKIP_BLANKS;
1.72 daniel 7288: val = xmlParseAttValue(ctxt);
1.96 daniel 7289: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7290: } else {
1.55 daniel 7291: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7292: ctxt->sax->error(ctxt->userData,
1.59 daniel 7293: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7294: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7295: ctxt->wellFormed = 0;
1.180 daniel 7296: ctxt->disableSAX = 1;
1.170 daniel 7297: xmlFree(name);
1.52 daniel 7298: return(NULL);
1.43 daniel 7299: }
7300:
1.172 daniel 7301: /*
7302: * Check that xml:lang conforms to the specification
7303: */
7304: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7305: if (!xmlCheckLanguageID(val)) {
7306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7307: ctxt->sax->error(ctxt->userData,
7308: "Invalid value for xml:lang : %s\n", val);
7309: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7310: ctxt->wellFormed = 0;
1.180 daniel 7311: ctxt->disableSAX = 1;
1.172 daniel 7312: }
7313: }
7314:
1.176 daniel 7315: /*
7316: * Check that xml:space conforms to the specification
7317: */
7318: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7319: if (!xmlStrcmp(val, BAD_CAST "default"))
7320: *(ctxt->space) = 0;
7321: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7322: *(ctxt->space) = 1;
7323: else {
7324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7325: ctxt->sax->error(ctxt->userData,
7326: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7327: val);
7328: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7329: ctxt->wellFormed = 0;
1.180 daniel 7330: ctxt->disableSAX = 1;
1.176 daniel 7331: }
7332: }
7333:
1.72 daniel 7334: *value = val;
7335: return(name);
1.3 veillard 7336: }
7337:
1.50 daniel 7338: /**
7339: * xmlParseStartTag:
7340: * @ctxt: an XML parser context
7341: *
7342: * parse a start of tag either for rule element or
7343: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7344: *
7345: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7346: *
1.98 daniel 7347: * [ WFC: Unique Att Spec ]
7348: * No attribute name may appear more than once in the same start-tag or
7349: * empty-element tag.
7350: *
1.29 daniel 7351: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7352: *
1.98 daniel 7353: * [ WFC: Unique Att Spec ]
7354: * No attribute name may appear more than once in the same start-tag or
7355: * empty-element tag.
7356: *
1.29 daniel 7357: * With namespace:
7358: *
7359: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7360: *
7361: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7362: *
1.129 daniel 7363: * Returne the element name parsed
1.2 veillard 7364: */
7365:
1.123 daniel 7366: xmlChar *
1.69 daniel 7367: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7368: xmlChar *name;
7369: xmlChar *attname;
7370: xmlChar *attvalue;
7371: const xmlChar **atts = NULL;
1.72 daniel 7372: int nbatts = 0;
7373: int maxatts = 0;
7374: int i;
1.2 veillard 7375:
1.152 daniel 7376: if (RAW != '<') return(NULL);
1.40 daniel 7377: NEXT;
1.3 veillard 7378:
1.72 daniel 7379: name = xmlParseName(ctxt);
1.59 daniel 7380: if (name == NULL) {
7381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7382: ctxt->sax->error(ctxt->userData,
1.59 daniel 7383: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7384: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7385: ctxt->wellFormed = 0;
1.180 daniel 7386: ctxt->disableSAX = 1;
1.83 daniel 7387: return(NULL);
1.50 daniel 7388: }
7389:
7390: /*
1.3 veillard 7391: * Now parse the attributes, it ends up with the ending
7392: *
7393: * (S Attribute)* S?
7394: */
1.42 daniel 7395: SKIP_BLANKS;
1.91 daniel 7396: GROW;
1.168 daniel 7397:
1.153 daniel 7398: while ((IS_CHAR(RAW)) &&
1.152 daniel 7399: (RAW != '>') &&
7400: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7401: const xmlChar *q = CUR_PTR;
1.91 daniel 7402: int cons = ctxt->input->consumed;
1.29 daniel 7403:
1.72 daniel 7404: attname = xmlParseAttribute(ctxt, &attvalue);
7405: if ((attname != NULL) && (attvalue != NULL)) {
7406: /*
1.98 daniel 7407: * [ WFC: Unique Att Spec ]
7408: * No attribute name may appear more than once in the same
7409: * start-tag or empty-element tag.
1.72 daniel 7410: */
7411: for (i = 0; i < nbatts;i += 2) {
7412: if (!xmlStrcmp(atts[i], attname)) {
7413: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7414: ctxt->sax->error(ctxt->userData,
7415: "Attribute %s redefined\n",
7416: attname);
1.72 daniel 7417: ctxt->wellFormed = 0;
1.180 daniel 7418: ctxt->disableSAX = 1;
1.123 daniel 7419: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7420: xmlFree(attname);
7421: xmlFree(attvalue);
1.98 daniel 7422: goto failed;
1.72 daniel 7423: }
7424: }
7425:
7426: /*
7427: * Add the pair to atts
7428: */
7429: if (atts == NULL) {
7430: maxatts = 10;
1.123 daniel 7431: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7432: if (atts == NULL) {
1.86 daniel 7433: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7434: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7435: return(NULL);
1.72 daniel 7436: }
1.127 daniel 7437: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7438: maxatts *= 2;
1.123 daniel 7439: atts = (const xmlChar **) xmlRealloc(atts,
7440: maxatts * sizeof(xmlChar *));
1.72 daniel 7441: if (atts == NULL) {
1.86 daniel 7442: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7443: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7444: return(NULL);
1.72 daniel 7445: }
7446: }
7447: atts[nbatts++] = attname;
7448: atts[nbatts++] = attvalue;
7449: atts[nbatts] = NULL;
7450: atts[nbatts + 1] = NULL;
1.176 daniel 7451: } else {
7452: if (attname != NULL)
7453: xmlFree(attname);
7454: if (attvalue != NULL)
7455: xmlFree(attvalue);
1.72 daniel 7456: }
7457:
1.116 daniel 7458: failed:
1.168 daniel 7459:
7460: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7461: break;
7462: if (!IS_BLANK(RAW)) {
7463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7464: ctxt->sax->error(ctxt->userData,
7465: "attributes construct error\n");
7466: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7467: ctxt->wellFormed = 0;
1.180 daniel 7468: ctxt->disableSAX = 1;
1.168 daniel 7469: }
1.42 daniel 7470: SKIP_BLANKS;
1.91 daniel 7471: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7473: ctxt->sax->error(ctxt->userData,
1.31 daniel 7474: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7475: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7476: ctxt->wellFormed = 0;
1.180 daniel 7477: ctxt->disableSAX = 1;
1.29 daniel 7478: break;
1.3 veillard 7479: }
1.91 daniel 7480: GROW;
1.3 veillard 7481: }
7482:
1.43 daniel 7483: /*
1.72 daniel 7484: * SAX: Start of Element !
1.43 daniel 7485: */
1.171 daniel 7486: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7487: (!ctxt->disableSAX))
1.74 daniel 7488: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7489:
1.72 daniel 7490: if (atts != NULL) {
1.123 daniel 7491: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7492: xmlFree(atts);
1.72 daniel 7493: }
1.83 daniel 7494: return(name);
1.3 veillard 7495: }
7496:
1.50 daniel 7497: /**
7498: * xmlParseEndTag:
7499: * @ctxt: an XML parser context
7500: *
7501: * parse an end of tag
1.27 daniel 7502: *
7503: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7504: *
7505: * With namespace
7506: *
1.72 daniel 7507: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7508: */
7509:
1.55 daniel 7510: void
1.140 daniel 7511: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7512: xmlChar *name;
1.140 daniel 7513: xmlChar *oldname;
1.7 veillard 7514:
1.91 daniel 7515: GROW;
1.152 daniel 7516: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7518: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7519: ctxt->wellFormed = 0;
1.180 daniel 7520: ctxt->disableSAX = 1;
1.123 daniel 7521: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7522: return;
7523: }
1.40 daniel 7524: SKIP(2);
1.7 veillard 7525:
1.72 daniel 7526: name = xmlParseName(ctxt);
1.7 veillard 7527:
7528: /*
7529: * We should definitely be at the ending "S? '>'" part
7530: */
1.91 daniel 7531: GROW;
1.42 daniel 7532: SKIP_BLANKS;
1.153 daniel 7533: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7535: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7536: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7537: ctxt->wellFormed = 0;
1.180 daniel 7538: ctxt->disableSAX = 1;
1.7 veillard 7539: } else
1.40 daniel 7540: NEXT;
1.7 veillard 7541:
1.72 daniel 7542: /*
1.98 daniel 7543: * [ WFC: Element Type Match ]
7544: * The Name in an element's end-tag must match the element type in the
7545: * start-tag.
7546: *
1.83 daniel 7547: */
1.147 daniel 7548: if ((name == NULL) || (ctxt->name == NULL) ||
7549: (xmlStrcmp(name, ctxt->name))) {
7550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7551: if ((name != NULL) && (ctxt->name != NULL)) {
7552: ctxt->sax->error(ctxt->userData,
7553: "Opening and ending tag mismatch: %s and %s\n",
7554: ctxt->name, name);
7555: } else if (ctxt->name != NULL) {
7556: ctxt->sax->error(ctxt->userData,
7557: "Ending tag eror for: %s\n", ctxt->name);
7558: } else {
7559: ctxt->sax->error(ctxt->userData,
7560: "Ending tag error: internal error ???\n");
7561: }
1.122 daniel 7562:
1.147 daniel 7563: }
1.123 daniel 7564: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7565: ctxt->wellFormed = 0;
1.180 daniel 7566: ctxt->disableSAX = 1;
1.83 daniel 7567: }
7568:
7569: /*
1.72 daniel 7570: * SAX: End of Tag
7571: */
1.171 daniel 7572: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7573: (!ctxt->disableSAX))
1.74 daniel 7574: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7575:
7576: if (name != NULL)
1.119 daniel 7577: xmlFree(name);
1.140 daniel 7578: oldname = namePop(ctxt);
1.176 daniel 7579: spacePop(ctxt);
1.140 daniel 7580: if (oldname != NULL) {
7581: #ifdef DEBUG_STACK
7582: fprintf(stderr,"Close: popped %s\n", oldname);
7583: #endif
7584: xmlFree(oldname);
7585: }
1.7 veillard 7586: return;
7587: }
7588:
1.50 daniel 7589: /**
7590: * xmlParseCDSect:
7591: * @ctxt: an XML parser context
7592: *
7593: * Parse escaped pure raw content.
1.29 daniel 7594: *
7595: * [18] CDSect ::= CDStart CData CDEnd
7596: *
7597: * [19] CDStart ::= '<![CDATA['
7598: *
7599: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7600: *
7601: * [21] CDEnd ::= ']]>'
1.3 veillard 7602: */
1.55 daniel 7603: void
7604: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7605: xmlChar *buf = NULL;
7606: int len = 0;
1.140 daniel 7607: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7608: int r, rl;
7609: int s, sl;
7610: int cur, l;
1.3 veillard 7611:
1.106 daniel 7612: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7613: (NXT(2) == '[') && (NXT(3) == 'C') &&
7614: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7615: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7616: (NXT(8) == '[')) {
7617: SKIP(9);
1.29 daniel 7618: } else
1.45 daniel 7619: return;
1.109 daniel 7620:
7621: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7622: r = CUR_CHAR(rl);
7623: if (!IS_CHAR(r)) {
1.55 daniel 7624: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7625: ctxt->sax->error(ctxt->userData,
1.135 daniel 7626: "CData section not finished\n");
1.59 daniel 7627: ctxt->wellFormed = 0;
1.180 daniel 7628: ctxt->disableSAX = 1;
1.123 daniel 7629: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7630: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7631: return;
1.3 veillard 7632: }
1.152 daniel 7633: NEXTL(rl);
7634: s = CUR_CHAR(sl);
7635: if (!IS_CHAR(s)) {
1.55 daniel 7636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7637: ctxt->sax->error(ctxt->userData,
1.135 daniel 7638: "CData section not finished\n");
1.123 daniel 7639: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7640: ctxt->wellFormed = 0;
1.180 daniel 7641: ctxt->disableSAX = 1;
1.109 daniel 7642: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7643: return;
1.3 veillard 7644: }
1.152 daniel 7645: NEXTL(sl);
7646: cur = CUR_CHAR(l);
1.135 daniel 7647: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7648: if (buf == NULL) {
7649: fprintf(stderr, "malloc of %d byte failed\n", size);
7650: return;
7651: }
1.108 veillard 7652: while (IS_CHAR(cur) &&
1.110 daniel 7653: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7654: if (len + 5 >= size) {
1.135 daniel 7655: size *= 2;
7656: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7657: if (buf == NULL) {
7658: fprintf(stderr, "realloc of %d byte failed\n", size);
7659: return;
7660: }
7661: }
1.152 daniel 7662: COPY_BUF(rl,buf,len,r);
1.110 daniel 7663: r = s;
1.152 daniel 7664: rl = sl;
1.110 daniel 7665: s = cur;
1.152 daniel 7666: sl = l;
7667: NEXTL(l);
7668: cur = CUR_CHAR(l);
1.3 veillard 7669: }
1.135 daniel 7670: buf[len] = 0;
1.109 daniel 7671: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7672: if (cur != '>') {
1.55 daniel 7673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7674: ctxt->sax->error(ctxt->userData,
1.135 daniel 7675: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7676: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7677: ctxt->wellFormed = 0;
1.180 daniel 7678: ctxt->disableSAX = 1;
1.135 daniel 7679: xmlFree(buf);
1.45 daniel 7680: return;
1.3 veillard 7681: }
1.152 daniel 7682: NEXTL(l);
1.16 daniel 7683:
1.45 daniel 7684: /*
1.135 daniel 7685: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7686: */
1.171 daniel 7687: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7688: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7689: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7690: }
1.135 daniel 7691: xmlFree(buf);
1.2 veillard 7692: }
7693:
1.50 daniel 7694: /**
7695: * xmlParseContent:
7696: * @ctxt: an XML parser context
7697: *
7698: * Parse a content:
1.2 veillard 7699: *
1.27 daniel 7700: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7701: */
7702:
1.55 daniel 7703: void
7704: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7705: GROW;
1.176 daniel 7706: while (((RAW != 0) || (ctxt->token != 0)) &&
7707: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7708: const xmlChar *test = CUR_PTR;
1.91 daniel 7709: int cons = ctxt->input->consumed;
1.123 daniel 7710: xmlChar tok = ctxt->token;
1.27 daniel 7711:
7712: /*
1.152 daniel 7713: * Handle possible processed charrefs.
7714: */
7715: if (ctxt->token != 0) {
7716: xmlParseCharData(ctxt, 0);
7717: }
7718: /*
1.27 daniel 7719: * First case : a Processing Instruction.
7720: */
1.152 daniel 7721: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7722: xmlParsePI(ctxt);
7723: }
1.72 daniel 7724:
1.27 daniel 7725: /*
7726: * Second case : a CDSection
7727: */
1.152 daniel 7728: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7729: (NXT(2) == '[') && (NXT(3) == 'C') &&
7730: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7731: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7732: (NXT(8) == '[')) {
1.45 daniel 7733: xmlParseCDSect(ctxt);
1.27 daniel 7734: }
1.72 daniel 7735:
1.27 daniel 7736: /*
7737: * Third case : a comment
7738: */
1.152 daniel 7739: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7740: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7741: xmlParseComment(ctxt);
1.97 daniel 7742: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7743: }
1.72 daniel 7744:
1.27 daniel 7745: /*
7746: * Fourth case : a sub-element.
7747: */
1.152 daniel 7748: else if (RAW == '<') {
1.72 daniel 7749: xmlParseElement(ctxt);
1.45 daniel 7750: }
1.72 daniel 7751:
1.45 daniel 7752: /*
1.50 daniel 7753: * Fifth case : a reference. If if has not been resolved,
7754: * parsing returns it's Name, create the node
1.45 daniel 7755: */
1.97 daniel 7756:
1.152 daniel 7757: else if (RAW == '&') {
1.77 daniel 7758: xmlParseReference(ctxt);
1.27 daniel 7759: }
1.72 daniel 7760:
1.27 daniel 7761: /*
7762: * Last case, text. Note that References are handled directly.
7763: */
7764: else {
1.45 daniel 7765: xmlParseCharData(ctxt, 0);
1.3 veillard 7766: }
1.14 veillard 7767:
1.91 daniel 7768: GROW;
1.14 veillard 7769: /*
1.45 daniel 7770: * Pop-up of finished entities.
1.14 veillard 7771: */
1.152 daniel 7772: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7773: xmlPopInput(ctxt);
1.135 daniel 7774: SHRINK;
1.45 daniel 7775:
1.113 daniel 7776: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7777: (tok == ctxt->token)) {
1.55 daniel 7778: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7779: ctxt->sax->error(ctxt->userData,
1.59 daniel 7780: "detected an error in element content\n");
1.123 daniel 7781: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7782: ctxt->wellFormed = 0;
1.180 daniel 7783: ctxt->disableSAX = 1;
1.29 daniel 7784: break;
7785: }
1.3 veillard 7786: }
1.2 veillard 7787: }
7788:
1.50 daniel 7789: /**
7790: * xmlParseElement:
7791: * @ctxt: an XML parser context
7792: *
7793: * parse an XML element, this is highly recursive
1.26 daniel 7794: *
7795: * [39] element ::= EmptyElemTag | STag content ETag
7796: *
1.98 daniel 7797: * [ WFC: Element Type Match ]
7798: * The Name in an element's end-tag must match the element type in the
7799: * start-tag.
7800: *
7801: * [ VC: Element Valid ]
1.117 daniel 7802: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7803: * where the Name matches the element type and one of the following holds:
7804: * - The declaration matches EMPTY and the element has no content.
7805: * - The declaration matches children and the sequence of child elements
7806: * belongs to the language generated by the regular expression in the
7807: * content model, with optional white space (characters matching the
7808: * nonterminal S) between each pair of child elements.
7809: * - The declaration matches Mixed and the content consists of character
7810: * data and child elements whose types match names in the content model.
7811: * - The declaration matches ANY, and the types of any child elements have
7812: * been declared.
1.2 veillard 7813: */
1.26 daniel 7814:
1.72 daniel 7815: void
1.69 daniel 7816: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7817: const xmlChar *openTag = CUR_PTR;
7818: xmlChar *name;
1.140 daniel 7819: xmlChar *oldname;
1.32 daniel 7820: xmlParserNodeInfo node_info;
1.118 daniel 7821: xmlNodePtr ret;
1.2 veillard 7822:
1.32 daniel 7823: /* Capture start position */
1.118 daniel 7824: if (ctxt->record_info) {
7825: node_info.begin_pos = ctxt->input->consumed +
7826: (CUR_PTR - ctxt->input->base);
7827: node_info.begin_line = ctxt->input->line;
7828: }
1.32 daniel 7829:
1.176 daniel 7830: if (ctxt->spaceNr == 0)
7831: spacePush(ctxt, -1);
7832: else
7833: spacePush(ctxt, *ctxt->space);
7834:
1.83 daniel 7835: name = xmlParseStartTag(ctxt);
7836: if (name == NULL) {
1.176 daniel 7837: spacePop(ctxt);
1.83 daniel 7838: return;
7839: }
1.140 daniel 7840: namePush(ctxt, name);
1.118 daniel 7841: ret = ctxt->node;
1.2 veillard 7842:
7843: /*
1.99 daniel 7844: * [ VC: Root Element Type ]
7845: * The Name in the document type declaration must match the element
7846: * type of the root element.
7847: */
1.105 daniel 7848: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7849: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7850: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7851:
7852: /*
1.2 veillard 7853: * Check for an Empty Element.
7854: */
1.152 daniel 7855: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7856: SKIP(2);
1.171 daniel 7857: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7858: (!ctxt->disableSAX))
1.83 daniel 7859: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7860: oldname = namePop(ctxt);
1.176 daniel 7861: spacePop(ctxt);
1.140 daniel 7862: if (oldname != NULL) {
7863: #ifdef DEBUG_STACK
7864: fprintf(stderr,"Close: popped %s\n", oldname);
7865: #endif
7866: xmlFree(oldname);
7867: }
1.72 daniel 7868: return;
1.2 veillard 7869: }
1.152 daniel 7870: if (RAW == '>') {
1.91 daniel 7871: NEXT;
7872: } else {
1.55 daniel 7873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7874: ctxt->sax->error(ctxt->userData,
7875: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7876: openTag);
1.59 daniel 7877: ctxt->wellFormed = 0;
1.180 daniel 7878: ctxt->disableSAX = 1;
1.123 daniel 7879: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7880:
7881: /*
7882: * end of parsing of this node.
7883: */
7884: nodePop(ctxt);
1.140 daniel 7885: oldname = namePop(ctxt);
1.176 daniel 7886: spacePop(ctxt);
1.140 daniel 7887: if (oldname != NULL) {
7888: #ifdef DEBUG_STACK
7889: fprintf(stderr,"Close: popped %s\n", oldname);
7890: #endif
7891: xmlFree(oldname);
7892: }
1.118 daniel 7893:
7894: /*
7895: * Capture end position and add node
7896: */
7897: if ( ret != NULL && ctxt->record_info ) {
7898: node_info.end_pos = ctxt->input->consumed +
7899: (CUR_PTR - ctxt->input->base);
7900: node_info.end_line = ctxt->input->line;
7901: node_info.node = ret;
7902: xmlParserAddNodeInfo(ctxt, &node_info);
7903: }
1.72 daniel 7904: return;
1.2 veillard 7905: }
7906:
7907: /*
7908: * Parse the content of the element:
7909: */
1.45 daniel 7910: xmlParseContent(ctxt);
1.153 daniel 7911: if (!IS_CHAR(RAW)) {
1.55 daniel 7912: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7913: ctxt->sax->error(ctxt->userData,
1.57 daniel 7914: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7915: ctxt->wellFormed = 0;
1.180 daniel 7916: ctxt->disableSAX = 1;
1.123 daniel 7917: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7918:
7919: /*
7920: * end of parsing of this node.
7921: */
7922: nodePop(ctxt);
1.140 daniel 7923: oldname = namePop(ctxt);
1.176 daniel 7924: spacePop(ctxt);
1.140 daniel 7925: if (oldname != NULL) {
7926: #ifdef DEBUG_STACK
7927: fprintf(stderr,"Close: popped %s\n", oldname);
7928: #endif
7929: xmlFree(oldname);
7930: }
1.72 daniel 7931: return;
1.2 veillard 7932: }
7933:
7934: /*
1.27 daniel 7935: * parse the end of tag: '</' should be here.
1.2 veillard 7936: */
1.140 daniel 7937: xmlParseEndTag(ctxt);
1.118 daniel 7938:
7939: /*
7940: * Capture end position and add node
7941: */
7942: if ( ret != NULL && ctxt->record_info ) {
7943: node_info.end_pos = ctxt->input->consumed +
7944: (CUR_PTR - ctxt->input->base);
7945: node_info.end_line = ctxt->input->line;
7946: node_info.node = ret;
7947: xmlParserAddNodeInfo(ctxt, &node_info);
7948: }
1.2 veillard 7949: }
7950:
1.50 daniel 7951: /**
7952: * xmlParseVersionNum:
7953: * @ctxt: an XML parser context
7954: *
7955: * parse the XML version value.
1.29 daniel 7956: *
7957: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7958: *
7959: * Returns the string giving the XML version number, or NULL
1.29 daniel 7960: */
1.123 daniel 7961: xmlChar *
1.55 daniel 7962: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7963: xmlChar *buf = NULL;
7964: int len = 0;
7965: int size = 10;
7966: xmlChar cur;
1.29 daniel 7967:
1.135 daniel 7968: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7969: if (buf == NULL) {
7970: fprintf(stderr, "malloc of %d byte failed\n", size);
7971: return(NULL);
7972: }
7973: cur = CUR;
1.152 daniel 7974: while (((cur >= 'a') && (cur <= 'z')) ||
7975: ((cur >= 'A') && (cur <= 'Z')) ||
7976: ((cur >= '0') && (cur <= '9')) ||
7977: (cur == '_') || (cur == '.') ||
7978: (cur == ':') || (cur == '-')) {
1.135 daniel 7979: if (len + 1 >= size) {
7980: size *= 2;
7981: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7982: if (buf == NULL) {
7983: fprintf(stderr, "realloc of %d byte failed\n", size);
7984: return(NULL);
7985: }
7986: }
7987: buf[len++] = cur;
7988: NEXT;
7989: cur=CUR;
7990: }
7991: buf[len] = 0;
7992: return(buf);
1.29 daniel 7993: }
7994:
1.50 daniel 7995: /**
7996: * xmlParseVersionInfo:
7997: * @ctxt: an XML parser context
7998: *
7999: * parse the XML version.
1.29 daniel 8000: *
8001: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8002: *
8003: * [25] Eq ::= S? '=' S?
1.50 daniel 8004: *
1.68 daniel 8005: * Returns the version string, e.g. "1.0"
1.29 daniel 8006: */
8007:
1.123 daniel 8008: xmlChar *
1.55 daniel 8009: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8010: xmlChar *version = NULL;
8011: const xmlChar *q;
1.29 daniel 8012:
1.152 daniel 8013: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8014: (NXT(2) == 'r') && (NXT(3) == 's') &&
8015: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8016: (NXT(6) == 'n')) {
8017: SKIP(7);
1.42 daniel 8018: SKIP_BLANKS;
1.152 daniel 8019: if (RAW != '=') {
1.55 daniel 8020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8021: ctxt->sax->error(ctxt->userData,
8022: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8023: ctxt->wellFormed = 0;
1.180 daniel 8024: ctxt->disableSAX = 1;
1.123 daniel 8025: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8026: return(NULL);
8027: }
1.40 daniel 8028: NEXT;
1.42 daniel 8029: SKIP_BLANKS;
1.152 daniel 8030: if (RAW == '"') {
1.40 daniel 8031: NEXT;
8032: q = CUR_PTR;
1.29 daniel 8033: version = xmlParseVersionNum(ctxt);
1.152 daniel 8034: if (RAW != '"') {
1.55 daniel 8035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8036: ctxt->sax->error(ctxt->userData,
8037: "String not closed\n%.50s\n", q);
1.59 daniel 8038: ctxt->wellFormed = 0;
1.180 daniel 8039: ctxt->disableSAX = 1;
1.123 daniel 8040: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8041: } else
1.40 daniel 8042: NEXT;
1.152 daniel 8043: } else if (RAW == '\''){
1.40 daniel 8044: NEXT;
8045: q = CUR_PTR;
1.29 daniel 8046: version = xmlParseVersionNum(ctxt);
1.152 daniel 8047: if (RAW != '\'') {
1.55 daniel 8048: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8049: ctxt->sax->error(ctxt->userData,
8050: "String not closed\n%.50s\n", q);
1.123 daniel 8051: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8052: ctxt->wellFormed = 0;
1.180 daniel 8053: ctxt->disableSAX = 1;
1.55 daniel 8054: } else
1.40 daniel 8055: NEXT;
1.31 daniel 8056: } else {
1.55 daniel 8057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8058: ctxt->sax->error(ctxt->userData,
1.59 daniel 8059: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8060: ctxt->wellFormed = 0;
1.180 daniel 8061: ctxt->disableSAX = 1;
1.123 daniel 8062: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8063: }
8064: }
8065: return(version);
8066: }
8067:
1.50 daniel 8068: /**
8069: * xmlParseEncName:
8070: * @ctxt: an XML parser context
8071: *
8072: * parse the XML encoding name
1.29 daniel 8073: *
8074: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8075: *
1.68 daniel 8076: * Returns the encoding name value or NULL
1.29 daniel 8077: */
1.123 daniel 8078: xmlChar *
1.55 daniel 8079: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8080: xmlChar *buf = NULL;
8081: int len = 0;
8082: int size = 10;
8083: xmlChar cur;
1.29 daniel 8084:
1.135 daniel 8085: cur = CUR;
8086: if (((cur >= 'a') && (cur <= 'z')) ||
8087: ((cur >= 'A') && (cur <= 'Z'))) {
8088: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8089: if (buf == NULL) {
8090: fprintf(stderr, "malloc of %d byte failed\n", size);
8091: return(NULL);
8092: }
8093:
8094: buf[len++] = cur;
1.40 daniel 8095: NEXT;
1.135 daniel 8096: cur = CUR;
1.152 daniel 8097: while (((cur >= 'a') && (cur <= 'z')) ||
8098: ((cur >= 'A') && (cur <= 'Z')) ||
8099: ((cur >= '0') && (cur <= '9')) ||
8100: (cur == '.') || (cur == '_') ||
8101: (cur == '-')) {
1.135 daniel 8102: if (len + 1 >= size) {
8103: size *= 2;
8104: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8105: if (buf == NULL) {
8106: fprintf(stderr, "realloc of %d byte failed\n", size);
8107: return(NULL);
8108: }
8109: }
8110: buf[len++] = cur;
8111: NEXT;
8112: cur = CUR;
8113: if (cur == 0) {
8114: SHRINK;
8115: GROW;
8116: cur = CUR;
8117: }
8118: }
8119: buf[len] = 0;
1.29 daniel 8120: } else {
1.55 daniel 8121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8122: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8123: ctxt->wellFormed = 0;
1.180 daniel 8124: ctxt->disableSAX = 1;
1.123 daniel 8125: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8126: }
1.135 daniel 8127: return(buf);
1.29 daniel 8128: }
8129:
1.50 daniel 8130: /**
8131: * xmlParseEncodingDecl:
8132: * @ctxt: an XML parser context
8133: *
8134: * parse the XML encoding declaration
1.29 daniel 8135: *
8136: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8137: *
8138: * TODO: this should setup the conversion filters.
8139: *
1.68 daniel 8140: * Returns the encoding value or NULL
1.29 daniel 8141: */
8142:
1.123 daniel 8143: xmlChar *
1.55 daniel 8144: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8145: xmlChar *encoding = NULL;
8146: const xmlChar *q;
1.29 daniel 8147:
1.42 daniel 8148: SKIP_BLANKS;
1.152 daniel 8149: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8150: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8151: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8152: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8153: SKIP(8);
1.42 daniel 8154: SKIP_BLANKS;
1.152 daniel 8155: if (RAW != '=') {
1.55 daniel 8156: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8157: ctxt->sax->error(ctxt->userData,
8158: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8159: ctxt->wellFormed = 0;
1.180 daniel 8160: ctxt->disableSAX = 1;
1.123 daniel 8161: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8162: return(NULL);
8163: }
1.40 daniel 8164: NEXT;
1.42 daniel 8165: SKIP_BLANKS;
1.152 daniel 8166: if (RAW == '"') {
1.40 daniel 8167: NEXT;
8168: q = CUR_PTR;
1.29 daniel 8169: encoding = xmlParseEncName(ctxt);
1.152 daniel 8170: if (RAW != '"') {
1.55 daniel 8171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8172: ctxt->sax->error(ctxt->userData,
8173: "String not closed\n%.50s\n", q);
1.59 daniel 8174: ctxt->wellFormed = 0;
1.180 daniel 8175: ctxt->disableSAX = 1;
1.123 daniel 8176: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8177: } else
1.40 daniel 8178: NEXT;
1.152 daniel 8179: } else if (RAW == '\''){
1.40 daniel 8180: NEXT;
8181: q = CUR_PTR;
1.29 daniel 8182: encoding = xmlParseEncName(ctxt);
1.152 daniel 8183: if (RAW != '\'') {
1.55 daniel 8184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8185: ctxt->sax->error(ctxt->userData,
8186: "String not closed\n%.50s\n", q);
1.59 daniel 8187: ctxt->wellFormed = 0;
1.180 daniel 8188: ctxt->disableSAX = 1;
1.123 daniel 8189: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8190: } else
1.40 daniel 8191: NEXT;
1.152 daniel 8192: } else if (RAW == '"'){
1.55 daniel 8193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8194: ctxt->sax->error(ctxt->userData,
1.59 daniel 8195: "xmlParseEncodingDecl : expected ' or \"\n");
8196: ctxt->wellFormed = 0;
1.180 daniel 8197: ctxt->disableSAX = 1;
1.123 daniel 8198: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8199: }
8200: }
8201: return(encoding);
8202: }
8203:
1.50 daniel 8204: /**
8205: * xmlParseSDDecl:
8206: * @ctxt: an XML parser context
8207: *
8208: * parse the XML standalone declaration
1.29 daniel 8209: *
8210: * [32] SDDecl ::= S 'standalone' Eq
8211: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8212: *
8213: * [ VC: Standalone Document Declaration ]
8214: * TODO The standalone document declaration must have the value "no"
8215: * if any external markup declarations contain declarations of:
8216: * - attributes with default values, if elements to which these
8217: * attributes apply appear in the document without specifications
8218: * of values for these attributes, or
8219: * - entities (other than amp, lt, gt, apos, quot), if references
8220: * to those entities appear in the document, or
8221: * - attributes with values subject to normalization, where the
8222: * attribute appears in the document with a value which will change
8223: * as a result of normalization, or
8224: * - element types with element content, if white space occurs directly
8225: * within any instance of those types.
1.68 daniel 8226: *
8227: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8228: */
8229:
1.55 daniel 8230: int
8231: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8232: int standalone = -1;
8233:
1.42 daniel 8234: SKIP_BLANKS;
1.152 daniel 8235: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8236: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8237: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8238: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8239: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8240: SKIP(10);
1.81 daniel 8241: SKIP_BLANKS;
1.152 daniel 8242: if (RAW != '=') {
1.55 daniel 8243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8244: ctxt->sax->error(ctxt->userData,
1.59 daniel 8245: "XML standalone declaration : expected '='\n");
1.123 daniel 8246: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8247: ctxt->wellFormed = 0;
1.180 daniel 8248: ctxt->disableSAX = 1;
1.32 daniel 8249: return(standalone);
8250: }
1.40 daniel 8251: NEXT;
1.42 daniel 8252: SKIP_BLANKS;
1.152 daniel 8253: if (RAW == '\''){
1.40 daniel 8254: NEXT;
1.152 daniel 8255: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8256: standalone = 0;
1.40 daniel 8257: SKIP(2);
1.152 daniel 8258: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8259: (NXT(2) == 's')) {
1.29 daniel 8260: standalone = 1;
1.40 daniel 8261: SKIP(3);
1.29 daniel 8262: } else {
1.55 daniel 8263: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8264: ctxt->sax->error(ctxt->userData,
8265: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8266: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8267: ctxt->wellFormed = 0;
1.180 daniel 8268: ctxt->disableSAX = 1;
1.29 daniel 8269: }
1.152 daniel 8270: if (RAW != '\'') {
1.55 daniel 8271: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8272: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8273: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8274: ctxt->wellFormed = 0;
1.180 daniel 8275: ctxt->disableSAX = 1;
1.55 daniel 8276: } else
1.40 daniel 8277: NEXT;
1.152 daniel 8278: } else if (RAW == '"'){
1.40 daniel 8279: NEXT;
1.152 daniel 8280: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8281: standalone = 0;
1.40 daniel 8282: SKIP(2);
1.152 daniel 8283: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8284: (NXT(2) == 's')) {
1.29 daniel 8285: standalone = 1;
1.40 daniel 8286: SKIP(3);
1.29 daniel 8287: } else {
1.55 daniel 8288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8289: ctxt->sax->error(ctxt->userData,
1.59 daniel 8290: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8291: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8292: ctxt->wellFormed = 0;
1.180 daniel 8293: ctxt->disableSAX = 1;
1.29 daniel 8294: }
1.152 daniel 8295: if (RAW != '"') {
1.55 daniel 8296: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8297: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8298: ctxt->wellFormed = 0;
1.180 daniel 8299: ctxt->disableSAX = 1;
1.123 daniel 8300: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8301: } else
1.40 daniel 8302: NEXT;
1.37 daniel 8303: } else {
1.55 daniel 8304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8305: ctxt->sax->error(ctxt->userData,
8306: "Standalone value not found\n");
1.59 daniel 8307: ctxt->wellFormed = 0;
1.180 daniel 8308: ctxt->disableSAX = 1;
1.123 daniel 8309: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8310: }
1.29 daniel 8311: }
8312: return(standalone);
8313: }
8314:
1.50 daniel 8315: /**
8316: * xmlParseXMLDecl:
8317: * @ctxt: an XML parser context
8318: *
8319: * parse an XML declaration header
1.29 daniel 8320: *
8321: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8322: */
8323:
1.55 daniel 8324: void
8325: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8326: xmlChar *version;
1.1 veillard 8327:
8328: /*
1.19 daniel 8329: * We know that '<?xml' is here.
1.1 veillard 8330: */
1.40 daniel 8331: SKIP(5);
1.1 veillard 8332:
1.153 daniel 8333: if (!IS_BLANK(RAW)) {
1.59 daniel 8334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8335: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8336: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8337: ctxt->wellFormed = 0;
1.180 daniel 8338: ctxt->disableSAX = 1;
1.59 daniel 8339: }
1.42 daniel 8340: SKIP_BLANKS;
1.1 veillard 8341:
8342: /*
1.29 daniel 8343: * We should have the VersionInfo here.
1.1 veillard 8344: */
1.29 daniel 8345: version = xmlParseVersionInfo(ctxt);
8346: if (version == NULL)
1.45 daniel 8347: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8348: ctxt->version = xmlStrdup(version);
1.119 daniel 8349: xmlFree(version);
1.29 daniel 8350:
8351: /*
8352: * We may have the encoding declaration
8353: */
1.153 daniel 8354: if (!IS_BLANK(RAW)) {
1.152 daniel 8355: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8356: SKIP(2);
8357: return;
8358: }
8359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8360: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8361: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8362: ctxt->wellFormed = 0;
1.180 daniel 8363: ctxt->disableSAX = 1;
1.59 daniel 8364: }
1.164 daniel 8365: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 8366:
8367: /*
1.29 daniel 8368: * We may have the standalone status.
1.1 veillard 8369: */
1.164 daniel 8370: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8371: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8372: SKIP(2);
8373: return;
8374: }
8375: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8376: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8377: ctxt->wellFormed = 0;
1.180 daniel 8378: ctxt->disableSAX = 1;
1.123 daniel 8379: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8380: }
8381: SKIP_BLANKS;
1.167 daniel 8382: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8383:
1.42 daniel 8384: SKIP_BLANKS;
1.152 daniel 8385: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8386: SKIP(2);
1.152 daniel 8387: } else if (RAW == '>') {
1.31 daniel 8388: /* Deprecated old WD ... */
1.55 daniel 8389: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8390: ctxt->sax->error(ctxt->userData,
8391: "XML declaration must end-up with '?>'\n");
1.59 daniel 8392: ctxt->wellFormed = 0;
1.180 daniel 8393: ctxt->disableSAX = 1;
1.123 daniel 8394: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8395: NEXT;
1.29 daniel 8396: } else {
1.55 daniel 8397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8398: ctxt->sax->error(ctxt->userData,
8399: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8400: ctxt->wellFormed = 0;
1.180 daniel 8401: ctxt->disableSAX = 1;
1.123 daniel 8402: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8403: MOVETO_ENDTAG(CUR_PTR);
8404: NEXT;
1.29 daniel 8405: }
1.1 veillard 8406: }
8407:
1.50 daniel 8408: /**
8409: * xmlParseMisc:
8410: * @ctxt: an XML parser context
8411: *
8412: * parse an XML Misc* optionnal field.
1.21 daniel 8413: *
1.22 daniel 8414: * [27] Misc ::= Comment | PI | S
1.1 veillard 8415: */
8416:
1.55 daniel 8417: void
8418: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8419: while (((RAW == '<') && (NXT(1) == '?')) ||
8420: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8421: (NXT(2) == '-') && (NXT(3) == '-')) ||
8422: IS_BLANK(CUR)) {
1.152 daniel 8423: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8424: xmlParsePI(ctxt);
1.40 daniel 8425: } else if (IS_BLANK(CUR)) {
8426: NEXT;
1.1 veillard 8427: } else
1.114 daniel 8428: xmlParseComment(ctxt);
1.1 veillard 8429: }
8430: }
8431:
1.50 daniel 8432: /**
1.181 daniel 8433: * xmlParseDocument:
1.50 daniel 8434: * @ctxt: an XML parser context
8435: *
8436: * parse an XML document (and build a tree if using the standard SAX
8437: * interface).
1.21 daniel 8438: *
1.22 daniel 8439: * [1] document ::= prolog element Misc*
1.29 daniel 8440: *
8441: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8442: *
1.68 daniel 8443: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8444: * as a result of the parsing.
1.1 veillard 8445: */
8446:
1.55 daniel 8447: int
8448: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8449: xmlChar start[4];
8450: xmlCharEncoding enc;
8451:
1.45 daniel 8452: xmlDefaultSAXHandlerInit();
8453:
1.91 daniel 8454: GROW;
8455:
1.14 veillard 8456: /*
1.44 daniel 8457: * SAX: beginning of the document processing.
8458: */
1.72 daniel 8459: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8460: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8461:
1.156 daniel 8462: /*
8463: * Get the 4 first bytes and decode the charset
8464: * if enc != XML_CHAR_ENCODING_NONE
8465: * plug some encoding conversion routines.
8466: */
8467: start[0] = RAW;
8468: start[1] = NXT(1);
8469: start[2] = NXT(2);
8470: start[3] = NXT(3);
8471: enc = xmlDetectCharEncoding(start, 4);
8472: if (enc != XML_CHAR_ENCODING_NONE) {
8473: xmlSwitchEncoding(ctxt, enc);
8474: }
8475:
1.1 veillard 8476:
1.59 daniel 8477: if (CUR == 0) {
8478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8479: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8480: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8481: ctxt->wellFormed = 0;
1.180 daniel 8482: ctxt->disableSAX = 1;
1.59 daniel 8483: }
1.1 veillard 8484:
8485: /*
8486: * Check for the XMLDecl in the Prolog.
8487: */
1.91 daniel 8488: GROW;
1.152 daniel 8489: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8490: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8491: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 8492: xmlParseXMLDecl(ctxt);
1.167 daniel 8493: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8494: SKIP_BLANKS;
1.164 daniel 8495: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
8496: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8497:
1.1 veillard 8498: } else {
1.72 daniel 8499: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8500: }
1.171 daniel 8501: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8502: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8503:
8504: /*
8505: * The Misc part of the Prolog
8506: */
1.91 daniel 8507: GROW;
1.16 daniel 8508: xmlParseMisc(ctxt);
1.1 veillard 8509:
8510: /*
1.29 daniel 8511: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8512: * (doctypedecl Misc*)?
8513: */
1.91 daniel 8514: GROW;
1.152 daniel 8515: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8516: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8517: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8518: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8519: (NXT(8) == 'E')) {
1.165 daniel 8520:
1.166 daniel 8521: ctxt->inSubset = 1;
1.22 daniel 8522: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8523: if (RAW == '[') {
1.140 daniel 8524: ctxt->instate = XML_PARSER_DTD;
8525: xmlParseInternalSubset(ctxt);
8526: }
1.165 daniel 8527:
8528: /*
8529: * Create and update the external subset.
8530: */
1.166 daniel 8531: ctxt->inSubset = 2;
1.171 daniel 8532: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8533: (!ctxt->disableSAX))
1.165 daniel 8534: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8535: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8536: ctxt->inSubset = 0;
1.165 daniel 8537:
8538:
1.96 daniel 8539: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8540: xmlParseMisc(ctxt);
1.21 daniel 8541: }
8542:
8543: /*
8544: * Time to start parsing the tree itself
1.1 veillard 8545: */
1.91 daniel 8546: GROW;
1.152 daniel 8547: if (RAW != '<') {
1.59 daniel 8548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8549: ctxt->sax->error(ctxt->userData,
1.151 daniel 8550: "Start tag expected, '<' not found\n");
1.140 daniel 8551: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8552: ctxt->wellFormed = 0;
1.180 daniel 8553: ctxt->disableSAX = 1;
1.140 daniel 8554: ctxt->instate = XML_PARSER_EOF;
8555: } else {
8556: ctxt->instate = XML_PARSER_CONTENT;
8557: xmlParseElement(ctxt);
8558: ctxt->instate = XML_PARSER_EPILOG;
8559:
8560:
8561: /*
8562: * The Misc part at the end
8563: */
8564: xmlParseMisc(ctxt);
8565:
1.152 daniel 8566: if (RAW != 0) {
1.140 daniel 8567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8568: ctxt->sax->error(ctxt->userData,
8569: "Extra content at the end of the document\n");
8570: ctxt->wellFormed = 0;
1.180 daniel 8571: ctxt->disableSAX = 1;
1.140 daniel 8572: ctxt->errNo = XML_ERR_DOCUMENT_END;
8573: }
8574: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8575: }
8576:
1.44 daniel 8577: /*
8578: * SAX: end of the document processing.
8579: */
1.171 daniel 8580: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8581: (!ctxt->disableSAX))
1.74 daniel 8582: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8583:
8584: /*
8585: * Grab the encoding if it was added on-the-fly
8586: */
8587: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8588: (ctxt->myDoc->encoding == NULL)) {
8589: ctxt->myDoc->encoding = ctxt->encoding;
8590: ctxt->encoding = NULL;
8591: }
1.59 daniel 8592: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8593: return(0);
8594: }
8595:
1.98 daniel 8596: /************************************************************************
8597: * *
1.128 daniel 8598: * Progressive parsing interfaces *
8599: * *
8600: ************************************************************************/
8601:
8602: /**
8603: * xmlParseLookupSequence:
8604: * @ctxt: an XML parser context
8605: * @first: the first char to lookup
1.140 daniel 8606: * @next: the next char to lookup or zero
8607: * @third: the next char to lookup or zero
1.128 daniel 8608: *
1.140 daniel 8609: * Try to find if a sequence (first, next, third) or just (first next) or
8610: * (first) is available in the input stream.
8611: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8612: * to avoid rescanning sequences of bytes, it DOES change the state of the
8613: * parser, do not use liberally.
1.128 daniel 8614: *
1.140 daniel 8615: * Returns the index to the current parsing point if the full sequence
8616: * is available, -1 otherwise.
1.128 daniel 8617: */
8618: int
1.140 daniel 8619: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8620: xmlChar next, xmlChar third) {
8621: int base, len;
8622: xmlParserInputPtr in;
8623: const xmlChar *buf;
8624:
8625: in = ctxt->input;
8626: if (in == NULL) return(-1);
8627: base = in->cur - in->base;
8628: if (base < 0) return(-1);
8629: if (ctxt->checkIndex > base)
8630: base = ctxt->checkIndex;
8631: if (in->buf == NULL) {
8632: buf = in->base;
8633: len = in->length;
8634: } else {
8635: buf = in->buf->buffer->content;
8636: len = in->buf->buffer->use;
8637: }
8638: /* take into account the sequence length */
8639: if (third) len -= 2;
8640: else if (next) len --;
8641: for (;base < len;base++) {
8642: if (buf[base] == first) {
8643: if (third != 0) {
8644: if ((buf[base + 1] != next) ||
8645: (buf[base + 2] != third)) continue;
8646: } else if (next != 0) {
8647: if (buf[base + 1] != next) continue;
8648: }
8649: ctxt->checkIndex = 0;
8650: #ifdef DEBUG_PUSH
8651: if (next == 0)
8652: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8653: first, base);
8654: else if (third == 0)
8655: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8656: first, next, base);
8657: else
8658: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8659: first, next, third, base);
8660: #endif
8661: return(base - (in->cur - in->base));
8662: }
8663: }
8664: ctxt->checkIndex = base;
8665: #ifdef DEBUG_PUSH
8666: if (next == 0)
8667: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8668: else if (third == 0)
8669: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8670: else
8671: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8672: #endif
8673: return(-1);
1.128 daniel 8674: }
8675:
8676: /**
1.143 daniel 8677: * xmlParseTryOrFinish:
1.128 daniel 8678: * @ctxt: an XML parser context
1.143 daniel 8679: * @terminate: last chunk indicator
1.128 daniel 8680: *
8681: * Try to progress on parsing
8682: *
8683: * Returns zero if no parsing was possible
8684: */
8685: int
1.143 daniel 8686: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8687: int ret = 0;
1.140 daniel 8688: int avail;
8689: xmlChar cur, next;
8690:
8691: #ifdef DEBUG_PUSH
8692: switch (ctxt->instate) {
8693: case XML_PARSER_EOF:
8694: fprintf(stderr, "PP: try EOF\n"); break;
8695: case XML_PARSER_START:
8696: fprintf(stderr, "PP: try START\n"); break;
8697: case XML_PARSER_MISC:
8698: fprintf(stderr, "PP: try MISC\n");break;
8699: case XML_PARSER_COMMENT:
8700: fprintf(stderr, "PP: try COMMENT\n");break;
8701: case XML_PARSER_PROLOG:
8702: fprintf(stderr, "PP: try PROLOG\n");break;
8703: case XML_PARSER_START_TAG:
8704: fprintf(stderr, "PP: try START_TAG\n");break;
8705: case XML_PARSER_CONTENT:
8706: fprintf(stderr, "PP: try CONTENT\n");break;
8707: case XML_PARSER_CDATA_SECTION:
8708: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8709: case XML_PARSER_END_TAG:
8710: fprintf(stderr, "PP: try END_TAG\n");break;
8711: case XML_PARSER_ENTITY_DECL:
8712: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8713: case XML_PARSER_ENTITY_VALUE:
8714: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8715: case XML_PARSER_ATTRIBUTE_VALUE:
8716: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8717: case XML_PARSER_DTD:
8718: fprintf(stderr, "PP: try DTD\n");break;
8719: case XML_PARSER_EPILOG:
8720: fprintf(stderr, "PP: try EPILOG\n");break;
8721: case XML_PARSER_PI:
8722: fprintf(stderr, "PP: try PI\n");break;
8723: }
8724: #endif
1.128 daniel 8725:
8726: while (1) {
1.140 daniel 8727: /*
8728: * Pop-up of finished entities.
8729: */
1.152 daniel 8730: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8731: xmlPopInput(ctxt);
8732:
1.184 daniel 8733: if (ctxt->input ==NULL) break;
8734: if (ctxt->input->buf == NULL)
8735: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8736: else
1.184 daniel 8737: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8738: if (avail < 1)
8739: goto done;
1.128 daniel 8740: switch (ctxt->instate) {
8741: case XML_PARSER_EOF:
1.140 daniel 8742: /*
8743: * Document parsing is done !
8744: */
8745: goto done;
8746: case XML_PARSER_START:
8747: /*
8748: * Very first chars read from the document flow.
8749: */
1.184 daniel 8750: cur = ctxt->input->cur[0];
1.140 daniel 8751: if (IS_BLANK(cur)) {
8752: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8753: ctxt->sax->setDocumentLocator(ctxt->userData,
8754: &xmlDefaultSAXLocator);
8755: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8756: ctxt->sax->error(ctxt->userData,
8757: "Extra spaces at the beginning of the document are not allowed\n");
8758: ctxt->errNo = XML_ERR_DOCUMENT_START;
8759: ctxt->wellFormed = 0;
1.180 daniel 8760: ctxt->disableSAX = 1;
1.140 daniel 8761: SKIP_BLANKS;
8762: ret++;
1.184 daniel 8763: if (ctxt->input->buf == NULL)
8764: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8765: else
1.184 daniel 8766: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8767: }
8768: if (avail < 2)
8769: goto done;
8770:
1.184 daniel 8771: cur = ctxt->input->cur[0];
8772: next = ctxt->input->cur[1];
1.140 daniel 8773: if (cur == 0) {
8774: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8775: ctxt->sax->setDocumentLocator(ctxt->userData,
8776: &xmlDefaultSAXLocator);
8777: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8778: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8779: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8780: ctxt->wellFormed = 0;
1.180 daniel 8781: ctxt->disableSAX = 1;
1.140 daniel 8782: ctxt->instate = XML_PARSER_EOF;
8783: #ifdef DEBUG_PUSH
8784: fprintf(stderr, "PP: entering EOF\n");
8785: #endif
8786: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8787: ctxt->sax->endDocument(ctxt->userData);
8788: goto done;
8789: }
8790: if ((cur == '<') && (next == '?')) {
8791: /* PI or XML decl */
8792: if (avail < 5) return(ret);
1.143 daniel 8793: if ((!terminate) &&
8794: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8795: return(ret);
8796: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8797: ctxt->sax->setDocumentLocator(ctxt->userData,
8798: &xmlDefaultSAXLocator);
1.184 daniel 8799: if ((ctxt->input->cur[2] == 'x') &&
8800: (ctxt->input->cur[3] == 'm') &&
8801: (ctxt->input->cur[4] == 'l') &&
8802: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8803: ret += 5;
8804: #ifdef DEBUG_PUSH
8805: fprintf(stderr, "PP: Parsing XML Decl\n");
8806: #endif
8807: xmlParseXMLDecl(ctxt);
1.167 daniel 8808: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8809: if ((ctxt->encoding == NULL) &&
8810: (ctxt->input->encoding != NULL))
8811: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8812: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8813: (!ctxt->disableSAX))
1.140 daniel 8814: ctxt->sax->startDocument(ctxt->userData);
8815: ctxt->instate = XML_PARSER_MISC;
8816: #ifdef DEBUG_PUSH
8817: fprintf(stderr, "PP: entering MISC\n");
8818: #endif
8819: } else {
8820: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8821: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8822: (!ctxt->disableSAX))
1.140 daniel 8823: ctxt->sax->startDocument(ctxt->userData);
8824: ctxt->instate = XML_PARSER_MISC;
8825: #ifdef DEBUG_PUSH
8826: fprintf(stderr, "PP: entering MISC\n");
8827: #endif
8828: }
8829: } else {
8830: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8831: ctxt->sax->setDocumentLocator(ctxt->userData,
8832: &xmlDefaultSAXLocator);
8833: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8834: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8835: (!ctxt->disableSAX))
1.140 daniel 8836: ctxt->sax->startDocument(ctxt->userData);
8837: ctxt->instate = XML_PARSER_MISC;
8838: #ifdef DEBUG_PUSH
8839: fprintf(stderr, "PP: entering MISC\n");
8840: #endif
8841: }
8842: break;
8843: case XML_PARSER_MISC:
8844: SKIP_BLANKS;
1.184 daniel 8845: if (ctxt->input->buf == NULL)
8846: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8847: else
1.184 daniel 8848: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8849: if (avail < 2)
8850: goto done;
1.184 daniel 8851: cur = ctxt->input->cur[0];
8852: next = ctxt->input->cur[1];
1.140 daniel 8853: if ((cur == '<') && (next == '?')) {
1.143 daniel 8854: if ((!terminate) &&
8855: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8856: goto done;
8857: #ifdef DEBUG_PUSH
8858: fprintf(stderr, "PP: Parsing PI\n");
8859: #endif
8860: xmlParsePI(ctxt);
8861: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8862: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8863: if ((!terminate) &&
8864: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8865: goto done;
8866: #ifdef DEBUG_PUSH
8867: fprintf(stderr, "PP: Parsing Comment\n");
8868: #endif
8869: xmlParseComment(ctxt);
8870: ctxt->instate = XML_PARSER_MISC;
8871: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8872: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8873: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8874: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8875: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 8876: if ((!terminate) &&
8877: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8878: goto done;
8879: #ifdef DEBUG_PUSH
8880: fprintf(stderr, "PP: Parsing internal subset\n");
8881: #endif
1.166 daniel 8882: ctxt->inSubset = 1;
1.140 daniel 8883: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8884: if (RAW == '[') {
1.140 daniel 8885: ctxt->instate = XML_PARSER_DTD;
8886: #ifdef DEBUG_PUSH
8887: fprintf(stderr, "PP: entering DTD\n");
8888: #endif
8889: } else {
1.166 daniel 8890: /*
8891: * Create and update the external subset.
8892: */
8893: ctxt->inSubset = 2;
1.171 daniel 8894: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8895: (ctxt->sax->externalSubset != NULL))
8896: ctxt->sax->externalSubset(ctxt->userData,
8897: ctxt->intSubName, ctxt->extSubSystem,
8898: ctxt->extSubURI);
8899: ctxt->inSubset = 0;
1.140 daniel 8900: ctxt->instate = XML_PARSER_PROLOG;
8901: #ifdef DEBUG_PUSH
8902: fprintf(stderr, "PP: entering PROLOG\n");
8903: #endif
8904: }
8905: } else if ((cur == '<') && (next == '!') &&
8906: (avail < 9)) {
8907: goto done;
8908: } else {
8909: ctxt->instate = XML_PARSER_START_TAG;
8910: #ifdef DEBUG_PUSH
8911: fprintf(stderr, "PP: entering START_TAG\n");
8912: #endif
8913: }
8914: break;
1.128 daniel 8915: case XML_PARSER_PROLOG:
1.140 daniel 8916: SKIP_BLANKS;
1.184 daniel 8917: if (ctxt->input->buf == NULL)
8918: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8919: else
1.184 daniel 8920: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8921: if (avail < 2)
8922: goto done;
1.184 daniel 8923: cur = ctxt->input->cur[0];
8924: next = ctxt->input->cur[1];
1.140 daniel 8925: if ((cur == '<') && (next == '?')) {
1.143 daniel 8926: if ((!terminate) &&
8927: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8928: goto done;
8929: #ifdef DEBUG_PUSH
8930: fprintf(stderr, "PP: Parsing PI\n");
8931: #endif
8932: xmlParsePI(ctxt);
8933: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8934: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8935: if ((!terminate) &&
8936: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8937: goto done;
8938: #ifdef DEBUG_PUSH
8939: fprintf(stderr, "PP: Parsing Comment\n");
8940: #endif
8941: xmlParseComment(ctxt);
8942: ctxt->instate = XML_PARSER_PROLOG;
8943: } else if ((cur == '<') && (next == '!') &&
8944: (avail < 4)) {
8945: goto done;
8946: } else {
8947: ctxt->instate = XML_PARSER_START_TAG;
8948: #ifdef DEBUG_PUSH
8949: fprintf(stderr, "PP: entering START_TAG\n");
8950: #endif
8951: }
8952: break;
8953: case XML_PARSER_EPILOG:
8954: SKIP_BLANKS;
1.184 daniel 8955: if (ctxt->input->buf == NULL)
8956: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8957: else
1.184 daniel 8958: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8959: if (avail < 2)
8960: goto done;
1.184 daniel 8961: cur = ctxt->input->cur[0];
8962: next = ctxt->input->cur[1];
1.140 daniel 8963: if ((cur == '<') && (next == '?')) {
1.143 daniel 8964: if ((!terminate) &&
8965: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8966: goto done;
8967: #ifdef DEBUG_PUSH
8968: fprintf(stderr, "PP: Parsing PI\n");
8969: #endif
8970: xmlParsePI(ctxt);
8971: ctxt->instate = XML_PARSER_EPILOG;
8972: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8973: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8974: if ((!terminate) &&
8975: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8976: goto done;
8977: #ifdef DEBUG_PUSH
8978: fprintf(stderr, "PP: Parsing Comment\n");
8979: #endif
8980: xmlParseComment(ctxt);
8981: ctxt->instate = XML_PARSER_EPILOG;
8982: } else if ((cur == '<') && (next == '!') &&
8983: (avail < 4)) {
8984: goto done;
8985: } else {
8986: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8987: ctxt->sax->error(ctxt->userData,
8988: "Extra content at the end of the document\n");
8989: ctxt->wellFormed = 0;
1.180 daniel 8990: ctxt->disableSAX = 1;
1.140 daniel 8991: ctxt->errNo = XML_ERR_DOCUMENT_END;
8992: ctxt->instate = XML_PARSER_EOF;
8993: #ifdef DEBUG_PUSH
8994: fprintf(stderr, "PP: entering EOF\n");
8995: #endif
1.171 daniel 8996: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8997: (!ctxt->disableSAX))
1.140 daniel 8998: ctxt->sax->endDocument(ctxt->userData);
8999: goto done;
9000: }
9001: break;
9002: case XML_PARSER_START_TAG: {
9003: xmlChar *name, *oldname;
9004:
1.184 daniel 9005: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9006: goto done;
1.184 daniel 9007: cur = ctxt->input->cur[0];
1.140 daniel 9008: if (cur != '<') {
9009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9010: ctxt->sax->error(ctxt->userData,
9011: "Start tag expect, '<' not found\n");
9012: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9013: ctxt->wellFormed = 0;
1.180 daniel 9014: ctxt->disableSAX = 1;
1.140 daniel 9015: ctxt->instate = XML_PARSER_EOF;
9016: #ifdef DEBUG_PUSH
9017: fprintf(stderr, "PP: entering EOF\n");
9018: #endif
1.171 daniel 9019: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9020: (!ctxt->disableSAX))
1.140 daniel 9021: ctxt->sax->endDocument(ctxt->userData);
9022: goto done;
9023: }
1.143 daniel 9024: if ((!terminate) &&
9025: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9026: goto done;
1.176 daniel 9027: if (ctxt->spaceNr == 0)
9028: spacePush(ctxt, -1);
9029: else
9030: spacePush(ctxt, *ctxt->space);
1.140 daniel 9031: name = xmlParseStartTag(ctxt);
9032: if (name == NULL) {
1.176 daniel 9033: spacePop(ctxt);
1.140 daniel 9034: ctxt->instate = XML_PARSER_EOF;
9035: #ifdef DEBUG_PUSH
9036: fprintf(stderr, "PP: entering EOF\n");
9037: #endif
1.171 daniel 9038: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9039: (!ctxt->disableSAX))
1.140 daniel 9040: ctxt->sax->endDocument(ctxt->userData);
9041: goto done;
9042: }
9043: namePush(ctxt, xmlStrdup(name));
9044:
9045: /*
9046: * [ VC: Root Element Type ]
9047: * The Name in the document type declaration must match
9048: * the element type of the root element.
9049: */
9050: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9051: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9052: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9053:
9054: /*
9055: * Check for an Empty Element.
9056: */
1.152 daniel 9057: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9058: SKIP(2);
1.171 daniel 9059: if ((ctxt->sax != NULL) &&
9060: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9061: ctxt->sax->endElement(ctxt->userData, name);
9062: xmlFree(name);
9063: oldname = namePop(ctxt);
1.176 daniel 9064: spacePop(ctxt);
1.140 daniel 9065: if (oldname != NULL) {
9066: #ifdef DEBUG_STACK
9067: fprintf(stderr,"Close: popped %s\n", oldname);
9068: #endif
9069: xmlFree(oldname);
9070: }
9071: if (ctxt->name == NULL) {
9072: ctxt->instate = XML_PARSER_EPILOG;
9073: #ifdef DEBUG_PUSH
9074: fprintf(stderr, "PP: entering EPILOG\n");
9075: #endif
9076: } else {
9077: ctxt->instate = XML_PARSER_CONTENT;
9078: #ifdef DEBUG_PUSH
9079: fprintf(stderr, "PP: entering CONTENT\n");
9080: #endif
9081: }
9082: break;
9083: }
1.152 daniel 9084: if (RAW == '>') {
1.140 daniel 9085: NEXT;
9086: } else {
9087: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9088: ctxt->sax->error(ctxt->userData,
9089: "Couldn't find end of Start Tag %s\n",
9090: name);
9091: ctxt->wellFormed = 0;
1.180 daniel 9092: ctxt->disableSAX = 1;
1.140 daniel 9093: ctxt->errNo = XML_ERR_GT_REQUIRED;
9094:
9095: /*
9096: * end of parsing of this node.
9097: */
9098: nodePop(ctxt);
9099: oldname = namePop(ctxt);
1.176 daniel 9100: spacePop(ctxt);
1.140 daniel 9101: if (oldname != NULL) {
9102: #ifdef DEBUG_STACK
9103: fprintf(stderr,"Close: popped %s\n", oldname);
9104: #endif
9105: xmlFree(oldname);
9106: }
9107: }
9108: xmlFree(name);
9109: ctxt->instate = XML_PARSER_CONTENT;
9110: #ifdef DEBUG_PUSH
9111: fprintf(stderr, "PP: entering CONTENT\n");
9112: #endif
9113: break;
9114: }
1.128 daniel 9115: case XML_PARSER_CONTENT:
1.140 daniel 9116: /*
9117: * Handle preparsed entities and charRef
9118: */
9119: if (ctxt->token != 0) {
9120: xmlChar cur[2] = { 0 , 0 } ;
9121:
9122: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9123: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9124: (ctxt->sax->characters != NULL))
1.140 daniel 9125: ctxt->sax->characters(ctxt->userData, cur, 1);
9126: ctxt->token = 0;
9127: }
1.184 daniel 9128: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9129: goto done;
1.184 daniel 9130: cur = ctxt->input->cur[0];
9131: next = ctxt->input->cur[1];
1.140 daniel 9132: if ((cur == '<') && (next == '?')) {
1.143 daniel 9133: if ((!terminate) &&
9134: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9135: goto done;
9136: #ifdef DEBUG_PUSH
9137: fprintf(stderr, "PP: Parsing PI\n");
9138: #endif
9139: xmlParsePI(ctxt);
9140: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9141: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9142: if ((!terminate) &&
9143: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9144: goto done;
9145: #ifdef DEBUG_PUSH
9146: fprintf(stderr, "PP: Parsing Comment\n");
9147: #endif
9148: xmlParseComment(ctxt);
9149: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9150: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9151: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9152: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9153: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9154: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9155: SKIP(9);
9156: ctxt->instate = XML_PARSER_CDATA_SECTION;
9157: #ifdef DEBUG_PUSH
9158: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9159: #endif
9160: break;
9161: } else if ((cur == '<') && (next == '!') &&
9162: (avail < 9)) {
9163: goto done;
9164: } else if ((cur == '<') && (next == '/')) {
9165: ctxt->instate = XML_PARSER_END_TAG;
9166: #ifdef DEBUG_PUSH
9167: fprintf(stderr, "PP: entering END_TAG\n");
9168: #endif
9169: break;
9170: } else if (cur == '<') {
9171: ctxt->instate = XML_PARSER_START_TAG;
9172: #ifdef DEBUG_PUSH
9173: fprintf(stderr, "PP: entering START_TAG\n");
9174: #endif
9175: break;
9176: } else if (cur == '&') {
1.143 daniel 9177: if ((!terminate) &&
9178: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9179: goto done;
9180: #ifdef DEBUG_PUSH
9181: fprintf(stderr, "PP: Parsing Reference\n");
9182: #endif
9183: /* TODO: check generation of subtrees if noent !!! */
9184: xmlParseReference(ctxt);
9185: } else {
1.156 daniel 9186: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9187: /*
1.181 daniel 9188: * Goal of the following test is:
1.140 daniel 9189: * - minimize calls to the SAX 'character' callback
9190: * when they are mergeable
9191: * - handle an problem for isBlank when we only parse
9192: * a sequence of blank chars and the next one is
9193: * not available to check against '<' presence.
9194: * - tries to homogenize the differences in SAX
9195: * callbacks beween the push and pull versions
9196: * of the parser.
9197: */
9198: if ((ctxt->inputNr == 1) &&
9199: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9200: if ((!terminate) &&
9201: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9202: goto done;
9203: }
9204: ctxt->checkIndex = 0;
9205: #ifdef DEBUG_PUSH
9206: fprintf(stderr, "PP: Parsing char data\n");
9207: #endif
9208: xmlParseCharData(ctxt, 0);
9209: }
9210: /*
9211: * Pop-up of finished entities.
9212: */
1.152 daniel 9213: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9214: xmlPopInput(ctxt);
9215: break;
9216: case XML_PARSER_CDATA_SECTION: {
9217: /*
9218: * The Push mode need to have the SAX callback for
9219: * cdataBlock merge back contiguous callbacks.
9220: */
9221: int base;
9222:
9223: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9224: if (base < 0) {
9225: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9226: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9227: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9228: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9229: XML_PARSER_BIG_BUFFER_SIZE);
9230: }
9231: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9232: ctxt->checkIndex = 0;
9233: }
9234: goto done;
9235: } else {
1.171 daniel 9236: if ((ctxt->sax != NULL) && (base > 0) &&
9237: (!ctxt->disableSAX)) {
1.140 daniel 9238: if (ctxt->sax->cdataBlock != NULL)
9239: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9240: ctxt->input->cur, base);
1.140 daniel 9241: }
9242: SKIP(base + 3);
9243: ctxt->checkIndex = 0;
9244: ctxt->instate = XML_PARSER_CONTENT;
9245: #ifdef DEBUG_PUSH
9246: fprintf(stderr, "PP: entering CONTENT\n");
9247: #endif
9248: }
9249: break;
9250: }
1.141 daniel 9251: case XML_PARSER_END_TAG:
1.140 daniel 9252: if (avail < 2)
9253: goto done;
1.143 daniel 9254: if ((!terminate) &&
9255: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9256: goto done;
9257: xmlParseEndTag(ctxt);
9258: if (ctxt->name == NULL) {
9259: ctxt->instate = XML_PARSER_EPILOG;
9260: #ifdef DEBUG_PUSH
9261: fprintf(stderr, "PP: entering EPILOG\n");
9262: #endif
9263: } else {
9264: ctxt->instate = XML_PARSER_CONTENT;
9265: #ifdef DEBUG_PUSH
9266: fprintf(stderr, "PP: entering CONTENT\n");
9267: #endif
9268: }
9269: break;
9270: case XML_PARSER_DTD: {
9271: /*
9272: * Sorry but progressive parsing of the internal subset
9273: * is not expected to be supported. We first check that
9274: * the full content of the internal subset is available and
9275: * the parsing is launched only at that point.
9276: * Internal subset ends up with "']' S? '>'" in an unescaped
9277: * section and not in a ']]>' sequence which are conditional
9278: * sections (whoever argued to keep that crap in XML deserve
9279: * a place in hell !).
9280: */
9281: int base, i;
9282: xmlChar *buf;
9283: xmlChar quote = 0;
9284:
1.184 daniel 9285: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9286: if (base < 0) return(0);
9287: if (ctxt->checkIndex > base)
9288: base = ctxt->checkIndex;
1.184 daniel 9289: buf = ctxt->input->buf->buffer->content;
9290: for (;base < ctxt->input->buf->buffer->use;base++) {
1.140 daniel 9291: if (quote != 0) {
9292: if (buf[base] == quote)
9293: quote = 0;
9294: continue;
9295: }
9296: if (buf[base] == '"') {
9297: quote = '"';
9298: continue;
9299: }
9300: if (buf[base] == '\'') {
9301: quote = '\'';
9302: continue;
9303: }
9304: if (buf[base] == ']') {
1.184 daniel 9305: if (base +1 >= ctxt->input->buf->buffer->use)
1.140 daniel 9306: break;
9307: if (buf[base + 1] == ']') {
9308: /* conditional crap, skip both ']' ! */
9309: base++;
9310: continue;
9311: }
1.184 daniel 9312: for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
1.140 daniel 9313: if (buf[base + i] == '>')
9314: goto found_end_int_subset;
9315: }
9316: break;
9317: }
9318: }
9319: /*
9320: * We didn't found the end of the Internal subset
9321: */
9322: if (quote == 0)
9323: ctxt->checkIndex = base;
9324: #ifdef DEBUG_PUSH
9325: if (next == 0)
9326: fprintf(stderr, "PP: lookup of int subset end filed\n");
9327: #endif
9328: goto done;
9329:
9330: found_end_int_subset:
9331: xmlParseInternalSubset(ctxt);
1.166 daniel 9332: ctxt->inSubset = 2;
1.171 daniel 9333: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9334: (ctxt->sax->externalSubset != NULL))
9335: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9336: ctxt->extSubSystem, ctxt->extSubURI);
9337: ctxt->inSubset = 0;
1.140 daniel 9338: ctxt->instate = XML_PARSER_PROLOG;
9339: ctxt->checkIndex = 0;
9340: #ifdef DEBUG_PUSH
9341: fprintf(stderr, "PP: entering PROLOG\n");
9342: #endif
9343: break;
9344: }
9345: case XML_PARSER_COMMENT:
9346: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9347: ctxt->instate = XML_PARSER_CONTENT;
9348: #ifdef DEBUG_PUSH
9349: fprintf(stderr, "PP: entering CONTENT\n");
9350: #endif
9351: break;
9352: case XML_PARSER_PI:
9353: fprintf(stderr, "PP: internal error, state == PI\n");
9354: ctxt->instate = XML_PARSER_CONTENT;
9355: #ifdef DEBUG_PUSH
9356: fprintf(stderr, "PP: entering CONTENT\n");
9357: #endif
9358: break;
1.128 daniel 9359: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9360: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9361: ctxt->instate = XML_PARSER_DTD;
9362: #ifdef DEBUG_PUSH
9363: fprintf(stderr, "PP: entering DTD\n");
9364: #endif
9365: break;
1.128 daniel 9366: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9367: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9368: ctxt->instate = XML_PARSER_CONTENT;
9369: #ifdef DEBUG_PUSH
9370: fprintf(stderr, "PP: entering DTD\n");
9371: #endif
9372: break;
1.128 daniel 9373: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9374: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9375: ctxt->instate = XML_PARSER_START_TAG;
9376: #ifdef DEBUG_PUSH
9377: fprintf(stderr, "PP: entering START_TAG\n");
9378: #endif
9379: break;
9380: case XML_PARSER_SYSTEM_LITERAL:
9381: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9382: ctxt->instate = XML_PARSER_START_TAG;
9383: #ifdef DEBUG_PUSH
9384: fprintf(stderr, "PP: entering START_TAG\n");
9385: #endif
9386: break;
1.128 daniel 9387: }
9388: }
1.140 daniel 9389: done:
9390: #ifdef DEBUG_PUSH
9391: fprintf(stderr, "PP: done %d\n", ret);
9392: #endif
1.128 daniel 9393: return(ret);
9394: }
9395:
9396: /**
1.143 daniel 9397: * xmlParseTry:
9398: * @ctxt: an XML parser context
9399: *
9400: * Try to progress on parsing
9401: *
9402: * Returns zero if no parsing was possible
9403: */
9404: int
9405: xmlParseTry(xmlParserCtxtPtr ctxt) {
9406: return(xmlParseTryOrFinish(ctxt, 0));
9407: }
9408:
9409: /**
1.128 daniel 9410: * xmlParseChunk:
9411: * @ctxt: an XML parser context
9412: * @chunk: an char array
9413: * @size: the size in byte of the chunk
9414: * @terminate: last chunk indicator
9415: *
9416: * Parse a Chunk of memory
9417: *
9418: * Returns zero if no error, the xmlParserErrors otherwise.
9419: */
1.140 daniel 9420: int
1.128 daniel 9421: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9422: int terminate) {
1.132 daniel 9423: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9424: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9425: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9426: int cur = ctxt->input->cur - ctxt->input->base;
9427:
1.132 daniel 9428: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9429: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9430: ctxt->input->cur = ctxt->input->base + cur;
9431: #ifdef DEBUG_PUSH
9432: fprintf(stderr, "PP: pushed %d\n", size);
9433: #endif
9434:
1.150 daniel 9435: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9436: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9437: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9438: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9439: if (terminate) {
1.151 daniel 9440: /*
9441: * Grab the encoding if it was added on-the-fly
9442: */
9443: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
9444: (ctxt->myDoc->encoding == NULL)) {
9445: ctxt->myDoc->encoding = ctxt->encoding;
9446: ctxt->encoding = NULL;
9447: }
9448:
9449: /*
9450: * Check for termination
9451: */
1.140 daniel 9452: if ((ctxt->instate != XML_PARSER_EOF) &&
9453: (ctxt->instate != XML_PARSER_EPILOG)) {
9454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9455: ctxt->sax->error(ctxt->userData,
9456: "Extra content at the end of the document\n");
9457: ctxt->wellFormed = 0;
1.180 daniel 9458: ctxt->disableSAX = 1;
1.140 daniel 9459: ctxt->errNo = XML_ERR_DOCUMENT_END;
9460: }
9461: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9462: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9463: (!ctxt->disableSAX))
1.140 daniel 9464: ctxt->sax->endDocument(ctxt->userData);
9465: }
9466: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9467: }
9468: return((xmlParserErrors) ctxt->errNo);
9469: }
9470:
9471: /************************************************************************
9472: * *
1.98 daniel 9473: * I/O front end functions to the parser *
9474: * *
9475: ************************************************************************/
9476:
1.50 daniel 9477: /**
1.181 daniel 9478: * xmlCreatePushParserCtxt:
1.140 daniel 9479: * @sax: a SAX handler
9480: * @user_data: The user data returned on SAX callbacks
9481: * @chunk: a pointer to an array of chars
9482: * @size: number of chars in the array
9483: * @filename: an optional file name or URI
9484: *
9485: * Create a parser context for using the XML parser in push mode
9486: * To allow content encoding detection, @size should be >= 4
9487: * The value of @filename is used for fetching external entities
9488: * and error/warning reports.
9489: *
9490: * Returns the new parser context or NULL
9491: */
9492: xmlParserCtxtPtr
9493: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9494: const char *chunk, int size, const char *filename) {
9495: xmlParserCtxtPtr ctxt;
9496: xmlParserInputPtr inputStream;
9497: xmlParserInputBufferPtr buf;
9498: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9499:
9500: /*
1.156 daniel 9501: * plug some encoding conversion routines
1.140 daniel 9502: */
9503: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9504: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9505:
9506: buf = xmlAllocParserInputBuffer(enc);
9507: if (buf == NULL) return(NULL);
9508:
9509: ctxt = xmlNewParserCtxt();
9510: if (ctxt == NULL) {
9511: xmlFree(buf);
9512: return(NULL);
9513: }
9514: if (sax != NULL) {
9515: if (ctxt->sax != &xmlDefaultSAXHandler)
9516: xmlFree(ctxt->sax);
9517: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9518: if (ctxt->sax == NULL) {
9519: xmlFree(buf);
9520: xmlFree(ctxt);
9521: return(NULL);
9522: }
9523: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9524: if (user_data != NULL)
9525: ctxt->userData = user_data;
9526: }
9527: if (filename == NULL) {
9528: ctxt->directory = NULL;
9529: } else {
9530: ctxt->directory = xmlParserGetDirectory(filename);
9531: }
9532:
9533: inputStream = xmlNewInputStream(ctxt);
9534: if (inputStream == NULL) {
9535: xmlFreeParserCtxt(ctxt);
9536: return(NULL);
9537: }
9538:
9539: if (filename == NULL)
9540: inputStream->filename = NULL;
9541: else
9542: inputStream->filename = xmlMemStrdup(filename);
9543: inputStream->buf = buf;
9544: inputStream->base = inputStream->buf->buffer->content;
9545: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9546: if (enc != XML_CHAR_ENCODING_NONE) {
9547: xmlSwitchEncoding(ctxt, enc);
9548: }
1.140 daniel 9549:
9550: inputPush(ctxt, inputStream);
9551:
9552: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9553: (ctxt->input->buf != NULL)) {
9554: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9555: #ifdef DEBUG_PUSH
9556: fprintf(stderr, "PP: pushed %d\n", size);
9557: #endif
9558: }
1.190 ! daniel 9559:
! 9560: return(ctxt);
! 9561: }
! 9562:
! 9563: /**
! 9564: * xmlCreateIOParserCtxt:
! 9565: * @sax: a SAX handler
! 9566: * @user_data: The user data returned on SAX callbacks
! 9567: * @ioread: an I/O read function
! 9568: * @ioclose: an I/O close function
! 9569: * @ioctx: an I/O handler
! 9570: * @enc: the charset encoding if known
! 9571: *
! 9572: * Create a parser context for using the XML parser with an existing
! 9573: * I/O stream
! 9574: *
! 9575: * Returns the new parser context or NULL
! 9576: */
! 9577: xmlParserCtxtPtr
! 9578: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
! 9579: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
! 9580: void *ioctx, xmlCharEncoding enc) {
! 9581: xmlParserCtxtPtr ctxt;
! 9582: xmlParserInputPtr inputStream;
! 9583: xmlParserInputBufferPtr buf;
! 9584:
! 9585: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
! 9586: if (buf == NULL) return(NULL);
! 9587:
! 9588: ctxt = xmlNewParserCtxt();
! 9589: if (ctxt == NULL) {
! 9590: xmlFree(buf);
! 9591: return(NULL);
! 9592: }
! 9593: if (sax != NULL) {
! 9594: if (ctxt->sax != &xmlDefaultSAXHandler)
! 9595: xmlFree(ctxt->sax);
! 9596: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
! 9597: if (ctxt->sax == NULL) {
! 9598: xmlFree(buf);
! 9599: xmlFree(ctxt);
! 9600: return(NULL);
! 9601: }
! 9602: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
! 9603: if (user_data != NULL)
! 9604: ctxt->userData = user_data;
! 9605: }
! 9606:
! 9607: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
! 9608: if (inputStream == NULL) {
! 9609: xmlFreeParserCtxt(ctxt);
! 9610: return(NULL);
! 9611: }
! 9612: inputPush(ctxt, inputStream);
1.140 daniel 9613:
9614: return(ctxt);
9615: }
9616:
9617: /**
1.181 daniel 9618: * xmlCreateDocParserCtxt:
1.123 daniel 9619: * @cur: a pointer to an array of xmlChar
1.50 daniel 9620: *
1.69 daniel 9621: * Create a parser context for an XML in-memory document.
9622: *
9623: * Returns the new parser context or NULL
1.16 daniel 9624: */
1.69 daniel 9625: xmlParserCtxtPtr
1.123 daniel 9626: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9627: xmlParserCtxtPtr ctxt;
1.40 daniel 9628: xmlParserInputPtr input;
1.16 daniel 9629:
1.97 daniel 9630: ctxt = xmlNewParserCtxt();
1.16 daniel 9631: if (ctxt == NULL) {
9632: return(NULL);
9633: }
1.96 daniel 9634: input = xmlNewInputStream(ctxt);
1.40 daniel 9635: if (input == NULL) {
1.97 daniel 9636: xmlFreeParserCtxt(ctxt);
1.40 daniel 9637: return(NULL);
9638: }
9639:
9640: input->base = cur;
9641: input->cur = cur;
9642:
9643: inputPush(ctxt, input);
1.69 daniel 9644: return(ctxt);
9645: }
9646:
9647: /**
1.181 daniel 9648: * xmlSAXParseDoc:
1.69 daniel 9649: * @sax: the SAX handler block
1.123 daniel 9650: * @cur: a pointer to an array of xmlChar
1.69 daniel 9651: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9652: * documents
9653: *
9654: * parse an XML in-memory document and build a tree.
9655: * It use the given SAX function block to handle the parsing callback.
9656: * If sax is NULL, fallback to the default DOM tree building routines.
9657: *
9658: * Returns the resulting document tree
9659: */
9660:
9661: xmlDocPtr
1.123 daniel 9662: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9663: xmlDocPtr ret;
9664: xmlParserCtxtPtr ctxt;
9665:
9666: if (cur == NULL) return(NULL);
1.16 daniel 9667:
9668:
1.69 daniel 9669: ctxt = xmlCreateDocParserCtxt(cur);
9670: if (ctxt == NULL) return(NULL);
1.74 daniel 9671: if (sax != NULL) {
9672: ctxt->sax = sax;
9673: ctxt->userData = NULL;
9674: }
1.69 daniel 9675:
1.16 daniel 9676: xmlParseDocument(ctxt);
1.72 daniel 9677: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9678: else {
9679: ret = NULL;
1.72 daniel 9680: xmlFreeDoc(ctxt->myDoc);
9681: ctxt->myDoc = NULL;
1.59 daniel 9682: }
1.86 daniel 9683: if (sax != NULL)
9684: ctxt->sax = NULL;
1.69 daniel 9685: xmlFreeParserCtxt(ctxt);
1.16 daniel 9686:
1.1 veillard 9687: return(ret);
9688: }
9689:
1.50 daniel 9690: /**
1.181 daniel 9691: * xmlParseDoc:
1.123 daniel 9692: * @cur: a pointer to an array of xmlChar
1.55 daniel 9693: *
9694: * parse an XML in-memory document and build a tree.
9695: *
1.68 daniel 9696: * Returns the resulting document tree
1.55 daniel 9697: */
9698:
1.69 daniel 9699: xmlDocPtr
1.123 daniel 9700: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9701: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9702: }
9703:
9704: /**
1.181 daniel 9705: * xmlSAXParseDTD:
1.76 daniel 9706: * @sax: the SAX handler block
9707: * @ExternalID: a NAME* containing the External ID of the DTD
9708: * @SystemID: a NAME* containing the URL to the DTD
9709: *
9710: * Load and parse an external subset.
9711: *
9712: * Returns the resulting xmlDtdPtr or NULL in case of error.
9713: */
9714:
9715: xmlDtdPtr
1.123 daniel 9716: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9717: const xmlChar *SystemID) {
1.76 daniel 9718: xmlDtdPtr ret = NULL;
9719: xmlParserCtxtPtr ctxt;
1.83 daniel 9720: xmlParserInputPtr input = NULL;
1.76 daniel 9721: xmlCharEncoding enc;
9722:
9723: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9724:
1.97 daniel 9725: ctxt = xmlNewParserCtxt();
1.76 daniel 9726: if (ctxt == NULL) {
9727: return(NULL);
9728: }
9729:
9730: /*
9731: * Set-up the SAX context
9732: */
9733: if (ctxt == NULL) return(NULL);
9734: if (sax != NULL) {
1.93 veillard 9735: if (ctxt->sax != NULL)
1.119 daniel 9736: xmlFree(ctxt->sax);
1.76 daniel 9737: ctxt->sax = sax;
9738: ctxt->userData = NULL;
9739: }
9740:
9741: /*
9742: * Ask the Entity resolver to load the damn thing
9743: */
9744:
9745: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9746: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9747: if (input == NULL) {
1.86 daniel 9748: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9749: xmlFreeParserCtxt(ctxt);
9750: return(NULL);
9751: }
9752:
9753: /*
1.156 daniel 9754: * plug some encoding conversion routines here.
1.76 daniel 9755: */
9756: xmlPushInput(ctxt, input);
1.156 daniel 9757: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9758: xmlSwitchEncoding(ctxt, enc);
9759:
1.95 veillard 9760: if (input->filename == NULL)
1.156 daniel 9761: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9762: input->line = 1;
9763: input->col = 1;
9764: input->base = ctxt->input->cur;
9765: input->cur = ctxt->input->cur;
9766: input->free = NULL;
9767:
9768: /*
9769: * let's parse that entity knowing it's an external subset.
9770: */
1.79 daniel 9771: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9772:
9773: if (ctxt->myDoc != NULL) {
9774: if (ctxt->wellFormed) {
9775: ret = ctxt->myDoc->intSubset;
9776: ctxt->myDoc->intSubset = NULL;
9777: } else {
9778: ret = NULL;
9779: }
9780: xmlFreeDoc(ctxt->myDoc);
9781: ctxt->myDoc = NULL;
9782: }
1.86 daniel 9783: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9784: xmlFreeParserCtxt(ctxt);
9785:
9786: return(ret);
9787: }
9788:
9789: /**
1.181 daniel 9790: * xmlParseDTD:
1.76 daniel 9791: * @ExternalID: a NAME* containing the External ID of the DTD
9792: * @SystemID: a NAME* containing the URL to the DTD
9793: *
9794: * Load and parse an external subset.
9795: *
9796: * Returns the resulting xmlDtdPtr or NULL in case of error.
9797: */
9798:
9799: xmlDtdPtr
1.123 daniel 9800: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9801: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9802: }
9803:
9804: /**
1.181 daniel 9805: * xmlSAXParseBalancedChunk:
1.144 daniel 9806: * @ctx: an XML parser context (possibly NULL)
9807: * @sax: the SAX handler bloc (possibly NULL)
9808: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9809: * @input: a parser input stream
9810: * @enc: the encoding
9811: *
9812: * Parse a well-balanced chunk of an XML document
9813: * The user has to provide SAX callback block whose routines will be
9814: * called by the parser
9815: * The allowed sequence for the Well Balanced Chunk is the one defined by
9816: * the content production in the XML grammar:
9817: *
9818: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9819: *
1.176 daniel 9820: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9821: * the error code otherwise
9822: */
9823:
9824: int
9825: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9826: void *user_data, xmlParserInputPtr input,
9827: xmlCharEncoding enc) {
9828: xmlParserCtxtPtr ctxt;
9829: int ret;
9830:
9831: if (input == NULL) return(-1);
9832:
9833: if (ctx != NULL)
9834: ctxt = ctx;
9835: else {
9836: ctxt = xmlNewParserCtxt();
9837: if (ctxt == NULL)
9838: return(-1);
9839: if (sax == NULL)
9840: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9841: }
9842:
9843: /*
9844: * Set-up the SAX context
9845: */
9846: if (sax != NULL) {
9847: if (ctxt->sax != NULL)
9848: xmlFree(ctxt->sax);
9849: ctxt->sax = sax;
9850: ctxt->userData = user_data;
9851: }
9852:
9853: /*
9854: * plug some encoding conversion routines here.
9855: */
9856: xmlPushInput(ctxt, input);
9857: if (enc != XML_CHAR_ENCODING_NONE)
9858: xmlSwitchEncoding(ctxt, enc);
9859:
9860: /*
9861: * let's parse that entity knowing it's an external subset.
9862: */
9863: xmlParseContent(ctxt);
9864: ret = ctxt->errNo;
9865:
9866: if (ctx == NULL) {
9867: if (sax != NULL)
9868: ctxt->sax = NULL;
9869: else
9870: xmlFreeDoc(ctxt->myDoc);
9871: xmlFreeParserCtxt(ctxt);
9872: }
9873: return(ret);
9874: }
9875:
9876: /**
1.181 daniel 9877: * xmlParseExternalEntity:
9878: * @doc: the document the chunk pertains to
9879: * @sax: the SAX handler bloc (possibly NULL)
9880: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 9881: * @depth: Used for loop detection, use 0
1.181 daniel 9882: * @URL: the URL for the entity to load
9883: * @ID: the System ID for the entity to load
9884: * @list: the return value for the set of parsed nodes
9885: *
9886: * Parse an external general entity
9887: * An external general parsed entity is well-formed if it matches the
9888: * production labeled extParsedEnt.
9889: *
9890: * [78] extParsedEnt ::= TextDecl? content
9891: *
9892: * Returns 0 if the entity is well formed, -1 in case of args problem and
9893: * the parser error code otherwise
9894: */
9895:
9896: int
9897: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 9898: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 9899: xmlParserCtxtPtr ctxt;
9900: xmlDocPtr newDoc;
9901: xmlSAXHandlerPtr oldsax = NULL;
9902: int ret = 0;
9903:
1.185 daniel 9904: if (depth > 40) {
9905: return(XML_ERR_ENTITY_LOOP);
9906: }
9907:
9908:
1.181 daniel 9909:
9910: if (list != NULL)
9911: *list = NULL;
9912: if ((URL == NULL) && (ID == NULL))
9913: return(-1);
9914:
9915:
9916: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
9917: if (ctxt == NULL) return(-1);
9918: ctxt->userData = ctxt;
9919: if (sax != NULL) {
9920: oldsax = ctxt->sax;
9921: ctxt->sax = sax;
9922: if (user_data != NULL)
9923: ctxt->userData = user_data;
9924: }
9925: newDoc = xmlNewDoc(BAD_CAST "1.0");
9926: if (newDoc == NULL) {
9927: xmlFreeParserCtxt(ctxt);
9928: return(-1);
9929: }
9930: if (doc != NULL) {
9931: newDoc->intSubset = doc->intSubset;
9932: newDoc->extSubset = doc->extSubset;
9933: }
9934: if (doc->URL != NULL) {
9935: newDoc->URL = xmlStrdup(doc->URL);
9936: }
9937: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9938: if (newDoc->children == NULL) {
9939: if (sax != NULL)
9940: ctxt->sax = oldsax;
9941: xmlFreeParserCtxt(ctxt);
9942: newDoc->intSubset = NULL;
9943: newDoc->extSubset = NULL;
9944: xmlFreeDoc(newDoc);
9945: return(-1);
9946: }
9947: nodePush(ctxt, newDoc->children);
9948: if (doc == NULL) {
9949: ctxt->myDoc = newDoc;
9950: } else {
9951: ctxt->myDoc = doc;
9952: newDoc->children->doc = doc;
9953: }
9954:
9955: /*
9956: * Parse a possible text declaration first
9957: */
9958: GROW;
9959: if ((RAW == '<') && (NXT(1) == '?') &&
9960: (NXT(2) == 'x') && (NXT(3) == 'm') &&
9961: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9962: xmlParseTextDecl(ctxt);
9963: }
9964:
9965: /*
9966: * Doing validity checking on chunk doesn't make sense
9967: */
9968: ctxt->instate = XML_PARSER_CONTENT;
9969: ctxt->validate = 0;
1.185 daniel 9970: ctxt->depth = depth;
1.181 daniel 9971:
9972: xmlParseContent(ctxt);
9973:
9974: if ((RAW == '<') && (NXT(1) == '/')) {
9975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9976: ctxt->sax->error(ctxt->userData,
9977: "chunk is not well balanced\n");
9978: ctxt->wellFormed = 0;
9979: ctxt->disableSAX = 1;
9980: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9981: } else if (RAW != 0) {
9982: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9983: ctxt->sax->error(ctxt->userData,
9984: "extra content at the end of well balanced chunk\n");
9985: ctxt->wellFormed = 0;
9986: ctxt->disableSAX = 1;
9987: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9988: }
9989: if (ctxt->node != newDoc->children) {
9990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9991: ctxt->sax->error(ctxt->userData,
9992: "chunk is not well balanced\n");
9993: ctxt->wellFormed = 0;
9994: ctxt->disableSAX = 1;
9995: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9996: }
9997:
9998: if (!ctxt->wellFormed) {
9999: if (ctxt->errNo == 0)
10000: ret = 1;
10001: else
10002: ret = ctxt->errNo;
10003: } else {
10004: if (list != NULL) {
10005: xmlNodePtr cur;
10006:
10007: /*
10008: * Return the newly created nodeset after unlinking it from
10009: * they pseudo parent.
10010: */
10011: cur = newDoc->children->children;
10012: *list = cur;
10013: while (cur != NULL) {
10014: cur->parent = NULL;
10015: cur = cur->next;
10016: }
10017: newDoc->children->children = NULL;
10018: }
10019: ret = 0;
10020: }
10021: if (sax != NULL)
10022: ctxt->sax = oldsax;
10023: xmlFreeParserCtxt(ctxt);
10024: newDoc->intSubset = NULL;
10025: newDoc->extSubset = NULL;
10026: xmlFreeDoc(newDoc);
10027:
10028: return(ret);
10029: }
10030:
10031: /**
10032: * xmlParseBalancedChunk:
1.176 daniel 10033: * @doc: the document the chunk pertains to
10034: * @sax: the SAX handler bloc (possibly NULL)
10035: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10036: * @depth: Used for loop detection, use 0
1.176 daniel 10037: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10038: * @list: the return value for the set of parsed nodes
10039: *
10040: * Parse a well-balanced chunk of an XML document
10041: * called by the parser
10042: * The allowed sequence for the Well Balanced Chunk is the one defined by
10043: * the content production in the XML grammar:
1.144 daniel 10044: *
1.175 daniel 10045: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10046: *
1.176 daniel 10047: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10048: * the parser error code otherwise
1.144 daniel 10049: */
10050:
1.175 daniel 10051: int
10052: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10053: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10054: xmlParserCtxtPtr ctxt;
1.175 daniel 10055: xmlDocPtr newDoc;
1.181 daniel 10056: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10057: int size;
1.176 daniel 10058: int ret = 0;
1.175 daniel 10059:
1.185 daniel 10060: if (depth > 40) {
10061: return(XML_ERR_ENTITY_LOOP);
10062: }
10063:
1.175 daniel 10064:
1.176 daniel 10065: if (list != NULL)
10066: *list = NULL;
10067: if (string == NULL)
10068: return(-1);
10069:
10070: size = xmlStrlen(string);
10071:
1.183 daniel 10072: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10073: if (ctxt == NULL) return(-1);
10074: ctxt->userData = ctxt;
1.175 daniel 10075: if (sax != NULL) {
1.176 daniel 10076: oldsax = ctxt->sax;
10077: ctxt->sax = sax;
10078: if (user_data != NULL)
10079: ctxt->userData = user_data;
1.175 daniel 10080: }
10081: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10082: if (newDoc == NULL) {
10083: xmlFreeParserCtxt(ctxt);
10084: return(-1);
10085: }
1.175 daniel 10086: if (doc != NULL) {
10087: newDoc->intSubset = doc->intSubset;
10088: newDoc->extSubset = doc->extSubset;
10089: }
1.176 daniel 10090: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10091: if (newDoc->children == NULL) {
10092: if (sax != NULL)
10093: ctxt->sax = oldsax;
10094: xmlFreeParserCtxt(ctxt);
10095: newDoc->intSubset = NULL;
10096: newDoc->extSubset = NULL;
10097: xmlFreeDoc(newDoc);
10098: return(-1);
10099: }
10100: nodePush(ctxt, newDoc->children);
10101: if (doc == NULL) {
10102: ctxt->myDoc = newDoc;
10103: } else {
10104: ctxt->myDoc = doc;
10105: newDoc->children->doc = doc;
10106: }
10107: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10108: ctxt->depth = depth;
1.176 daniel 10109:
10110: /*
10111: * Doing validity checking on chunk doesn't make sense
10112: */
10113: ctxt->validate = 0;
10114:
1.175 daniel 10115: xmlParseContent(ctxt);
1.176 daniel 10116:
10117: if ((RAW == '<') && (NXT(1) == '/')) {
10118: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10119: ctxt->sax->error(ctxt->userData,
10120: "chunk is not well balanced\n");
10121: ctxt->wellFormed = 0;
1.180 daniel 10122: ctxt->disableSAX = 1;
1.176 daniel 10123: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10124: } else if (RAW != 0) {
10125: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10126: ctxt->sax->error(ctxt->userData,
10127: "extra content at the end of well balanced chunk\n");
10128: ctxt->wellFormed = 0;
1.180 daniel 10129: ctxt->disableSAX = 1;
1.176 daniel 10130: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10131: }
10132: if (ctxt->node != newDoc->children) {
10133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10134: ctxt->sax->error(ctxt->userData,
10135: "chunk is not well balanced\n");
10136: ctxt->wellFormed = 0;
1.180 daniel 10137: ctxt->disableSAX = 1;
1.176 daniel 10138: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10139: }
1.175 daniel 10140:
1.176 daniel 10141: if (!ctxt->wellFormed) {
10142: if (ctxt->errNo == 0)
10143: ret = 1;
10144: else
10145: ret = ctxt->errNo;
10146: } else {
10147: if (list != NULL) {
10148: xmlNodePtr cur;
1.175 daniel 10149:
1.176 daniel 10150: /*
10151: * Return the newly created nodeset after unlinking it from
10152: * they pseudo parent.
10153: */
10154: cur = newDoc->children->children;
10155: *list = cur;
10156: while (cur != NULL) {
10157: cur->parent = NULL;
10158: cur = cur->next;
10159: }
10160: newDoc->children->children = NULL;
10161: }
10162: ret = 0;
1.175 daniel 10163: }
1.176 daniel 10164: if (sax != NULL)
10165: ctxt->sax = oldsax;
1.175 daniel 10166: xmlFreeParserCtxt(ctxt);
10167: newDoc->intSubset = NULL;
10168: newDoc->extSubset = NULL;
1.176 daniel 10169: xmlFreeDoc(newDoc);
1.175 daniel 10170:
1.176 daniel 10171: return(ret);
1.144 daniel 10172: }
10173:
10174: /**
1.181 daniel 10175: * xmlParseBalancedChunkFile:
1.144 daniel 10176: * @doc: the document the chunk pertains to
10177: *
10178: * Parse a well-balanced chunk of an XML document contained in a file
10179: *
10180: * Returns the resulting list of nodes resulting from the parsing,
10181: * they are not added to @node
10182: */
10183:
10184: xmlNodePtr
10185: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10186: /* TODO !!! */
10187: return(NULL);
1.144 daniel 10188: }
10189:
10190: /**
1.181 daniel 10191: * xmlRecoverDoc:
1.123 daniel 10192: * @cur: a pointer to an array of xmlChar
1.59 daniel 10193: *
10194: * parse an XML in-memory document and build a tree.
10195: * In the case the document is not Well Formed, a tree is built anyway
10196: *
1.68 daniel 10197: * Returns the resulting document tree
1.59 daniel 10198: */
10199:
1.69 daniel 10200: xmlDocPtr
1.123 daniel 10201: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10202: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10203: }
10204:
10205: /**
1.181 daniel 10206: * xmlCreateEntityParserCtxt:
10207: * @URL: the entity URL
10208: * @ID: the entity PUBLIC ID
10209: * @base: a posible base for the target URI
10210: *
10211: * Create a parser context for an external entity
10212: * Automatic support for ZLIB/Compress compressed document is provided
10213: * by default if found at compile-time.
10214: *
10215: * Returns the new parser context or NULL
10216: */
10217: xmlParserCtxtPtr
10218: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10219: const xmlChar *base) {
10220: xmlParserCtxtPtr ctxt;
10221: xmlParserInputPtr inputStream;
10222: char *directory = NULL;
10223:
10224: ctxt = xmlNewParserCtxt();
10225: if (ctxt == NULL) {
10226: return(NULL);
10227: }
10228:
1.182 daniel 10229: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
1.181 daniel 10230: if (inputStream == NULL) {
10231: xmlFreeParserCtxt(ctxt);
10232: return(NULL);
10233: }
10234:
10235: inputPush(ctxt, inputStream);
10236:
10237: if ((ctxt->directory == NULL) && (directory == NULL))
1.182 daniel 10238: directory = xmlParserGetDirectory((char *)URL);
1.181 daniel 10239: if ((ctxt->directory == NULL) && (directory != NULL))
10240: ctxt->directory = directory;
10241:
10242: return(ctxt);
10243: }
10244:
10245: /**
10246: * xmlCreateFileParserCtxt:
1.50 daniel 10247: * @filename: the filename
10248: *
1.69 daniel 10249: * Create a parser context for a file content.
10250: * Automatic support for ZLIB/Compress compressed document is provided
10251: * by default if found at compile-time.
1.50 daniel 10252: *
1.69 daniel 10253: * Returns the new parser context or NULL
1.9 httpng 10254: */
1.69 daniel 10255: xmlParserCtxtPtr
10256: xmlCreateFileParserCtxt(const char *filename)
10257: {
10258: xmlParserCtxtPtr ctxt;
1.40 daniel 10259: xmlParserInputPtr inputStream;
1.91 daniel 10260: xmlParserInputBufferPtr buf;
1.111 daniel 10261: char *directory = NULL;
1.9 httpng 10262:
1.91 daniel 10263: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10264: if (buf == NULL) return(NULL);
1.9 httpng 10265:
1.97 daniel 10266: ctxt = xmlNewParserCtxt();
1.16 daniel 10267: if (ctxt == NULL) {
10268: return(NULL);
10269: }
1.97 daniel 10270:
1.96 daniel 10271: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10272: if (inputStream == NULL) {
1.97 daniel 10273: xmlFreeParserCtxt(ctxt);
1.40 daniel 10274: return(NULL);
10275: }
10276:
1.119 daniel 10277: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10278: inputStream->buf = buf;
10279: inputStream->base = inputStream->buf->buffer->content;
10280: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10281:
1.40 daniel 10282: inputPush(ctxt, inputStream);
1.110 daniel 10283: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10284: directory = xmlParserGetDirectory(filename);
10285: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10286: ctxt->directory = directory;
1.106 daniel 10287:
1.69 daniel 10288: return(ctxt);
10289: }
10290:
10291: /**
1.181 daniel 10292: * xmlSAXParseFile:
1.69 daniel 10293: * @sax: the SAX handler block
10294: * @filename: the filename
10295: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10296: * documents
10297: *
10298: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10299: * compressed document is provided by default if found at compile-time.
10300: * It use the given SAX function block to handle the parsing callback.
10301: * If sax is NULL, fallback to the default DOM tree building routines.
10302: *
10303: * Returns the resulting document tree
10304: */
10305:
1.79 daniel 10306: xmlDocPtr
10307: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10308: int recovery) {
10309: xmlDocPtr ret;
10310: xmlParserCtxtPtr ctxt;
1.111 daniel 10311: char *directory = NULL;
1.69 daniel 10312:
10313: ctxt = xmlCreateFileParserCtxt(filename);
10314: if (ctxt == NULL) return(NULL);
1.74 daniel 10315: if (sax != NULL) {
1.93 veillard 10316: if (ctxt->sax != NULL)
1.119 daniel 10317: xmlFree(ctxt->sax);
1.74 daniel 10318: ctxt->sax = sax;
10319: ctxt->userData = NULL;
10320: }
1.106 daniel 10321:
1.110 daniel 10322: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10323: directory = xmlParserGetDirectory(filename);
10324: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10325: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10326:
10327: xmlParseDocument(ctxt);
1.40 daniel 10328:
1.72 daniel 10329: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10330: else {
10331: ret = NULL;
1.72 daniel 10332: xmlFreeDoc(ctxt->myDoc);
10333: ctxt->myDoc = NULL;
1.59 daniel 10334: }
1.86 daniel 10335: if (sax != NULL)
10336: ctxt->sax = NULL;
1.69 daniel 10337: xmlFreeParserCtxt(ctxt);
1.20 daniel 10338:
10339: return(ret);
10340: }
10341:
1.55 daniel 10342: /**
1.181 daniel 10343: * xmlParseFile:
1.55 daniel 10344: * @filename: the filename
10345: *
10346: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10347: * compressed document is provided by default if found at compile-time.
10348: *
1.68 daniel 10349: * Returns the resulting document tree
1.55 daniel 10350: */
10351:
1.79 daniel 10352: xmlDocPtr
10353: xmlParseFile(const char *filename) {
1.59 daniel 10354: return(xmlSAXParseFile(NULL, filename, 0));
10355: }
10356:
10357: /**
1.181 daniel 10358: * xmlRecoverFile:
1.59 daniel 10359: * @filename: the filename
10360: *
10361: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10362: * compressed document is provided by default if found at compile-time.
10363: * In the case the document is not Well Formed, a tree is built anyway
10364: *
1.68 daniel 10365: * Returns the resulting document tree
1.59 daniel 10366: */
10367:
1.79 daniel 10368: xmlDocPtr
10369: xmlRecoverFile(const char *filename) {
1.59 daniel 10370: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10371: }
1.32 daniel 10372:
1.50 daniel 10373: /**
1.181 daniel 10374: * xmlCreateMemoryParserCtxt:
10375: * @buffer: a pointer to a zero terminated char array
10376: * @size: the size of the array (without the trailing 0)
1.50 daniel 10377: *
1.69 daniel 10378: * Create a parser context for an XML in-memory document.
1.50 daniel 10379: *
1.69 daniel 10380: * Returns the new parser context or NULL
1.20 daniel 10381: */
1.69 daniel 10382: xmlParserCtxtPtr
10383: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10384: xmlParserCtxtPtr ctxt;
1.40 daniel 10385: xmlParserInputPtr input;
10386:
1.179 daniel 10387: if (buffer[size] != 0)
1.181 daniel 10388: return(NULL);
1.40 daniel 10389:
1.97 daniel 10390: ctxt = xmlNewParserCtxt();
1.181 daniel 10391: if (ctxt == NULL)
1.20 daniel 10392: return(NULL);
1.97 daniel 10393:
1.96 daniel 10394: input = xmlNewInputStream(ctxt);
1.40 daniel 10395: if (input == NULL) {
1.97 daniel 10396: xmlFreeParserCtxt(ctxt);
1.40 daniel 10397: return(NULL);
10398: }
1.20 daniel 10399:
1.40 daniel 10400: input->filename = NULL;
10401: input->line = 1;
10402: input->col = 1;
1.96 daniel 10403: input->buf = NULL;
1.91 daniel 10404: input->consumed = 0;
1.75 daniel 10405:
1.116 daniel 10406: input->base = BAD_CAST buffer;
10407: input->cur = BAD_CAST buffer;
1.69 daniel 10408: input->free = NULL;
1.20 daniel 10409:
1.40 daniel 10410: inputPush(ctxt, input);
1.69 daniel 10411: return(ctxt);
10412: }
10413:
10414: /**
1.181 daniel 10415: * xmlSAXParseMemory:
1.69 daniel 10416: * @sax: the SAX handler block
10417: * @buffer: an pointer to a char array
1.127 daniel 10418: * @size: the size of the array
10419: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10420: * documents
10421: *
10422: * parse an XML in-memory block and use the given SAX function block
10423: * to handle the parsing callback. If sax is NULL, fallback to the default
10424: * DOM tree building routines.
10425: *
10426: * Returns the resulting document tree
10427: */
10428: xmlDocPtr
10429: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10430: xmlDocPtr ret;
10431: xmlParserCtxtPtr ctxt;
10432:
10433: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10434: if (ctxt == NULL) return(NULL);
1.74 daniel 10435: if (sax != NULL) {
10436: ctxt->sax = sax;
10437: ctxt->userData = NULL;
10438: }
1.20 daniel 10439:
10440: xmlParseDocument(ctxt);
1.40 daniel 10441:
1.72 daniel 10442: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10443: else {
10444: ret = NULL;
1.72 daniel 10445: xmlFreeDoc(ctxt->myDoc);
10446: ctxt->myDoc = NULL;
1.59 daniel 10447: }
1.86 daniel 10448: if (sax != NULL)
10449: ctxt->sax = NULL;
1.69 daniel 10450: xmlFreeParserCtxt(ctxt);
1.16 daniel 10451:
1.9 httpng 10452: return(ret);
1.17 daniel 10453: }
10454:
1.55 daniel 10455: /**
1.181 daniel 10456: * xmlParseMemory:
1.68 daniel 10457: * @buffer: an pointer to a char array
1.55 daniel 10458: * @size: the size of the array
10459: *
10460: * parse an XML in-memory block and build a tree.
10461: *
1.68 daniel 10462: * Returns the resulting document tree
1.55 daniel 10463: */
10464:
10465: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10466: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10467: }
10468:
10469: /**
1.181 daniel 10470: * xmlRecoverMemory:
1.68 daniel 10471: * @buffer: an pointer to a char array
1.59 daniel 10472: * @size: the size of the array
10473: *
10474: * parse an XML in-memory block and build a tree.
10475: * In the case the document is not Well Formed, a tree is built anyway
10476: *
1.68 daniel 10477: * Returns the resulting document tree
1.59 daniel 10478: */
10479:
10480: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10481: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10482: }
10483:
10484:
1.50 daniel 10485: /**
10486: * xmlSetupParserForBuffer:
10487: * @ctxt: an XML parser context
1.123 daniel 10488: * @buffer: a xmlChar * buffer
1.50 daniel 10489: * @filename: a file name
10490: *
1.19 daniel 10491: * Setup the parser context to parse a new buffer; Clears any prior
10492: * contents from the parser context. The buffer parameter must not be
10493: * NULL, but the filename parameter can be
10494: */
1.55 daniel 10495: void
1.123 daniel 10496: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10497: const char* filename)
10498: {
1.96 daniel 10499: xmlParserInputPtr input;
1.40 daniel 10500:
1.96 daniel 10501: input = xmlNewInputStream(ctxt);
10502: if (input == NULL) {
10503: perror("malloc");
1.119 daniel 10504: xmlFree(ctxt);
1.145 daniel 10505: return;
1.96 daniel 10506: }
10507:
10508: xmlClearParserCtxt(ctxt);
10509: if (filename != NULL)
1.119 daniel 10510: input->filename = xmlMemStrdup(filename);
1.96 daniel 10511: input->base = buffer;
10512: input->cur = buffer;
10513: inputPush(ctxt, input);
1.17 daniel 10514: }
10515:
1.123 daniel 10516: /**
10517: * xmlSAXUserParseFile:
10518: * @sax: a SAX handler
10519: * @user_data: The user data returned on SAX callbacks
10520: * @filename: a file name
10521: *
10522: * parse an XML file and call the given SAX handler routines.
10523: * Automatic support for ZLIB/Compress compressed document is provided
10524: *
10525: * Returns 0 in case of success or a error number otherwise
10526: */
1.131 daniel 10527: int
10528: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10529: const char *filename) {
1.123 daniel 10530: int ret = 0;
10531: xmlParserCtxtPtr ctxt;
10532:
10533: ctxt = xmlCreateFileParserCtxt(filename);
10534: if (ctxt == NULL) return -1;
1.134 daniel 10535: if (ctxt->sax != &xmlDefaultSAXHandler)
10536: xmlFree(ctxt->sax);
1.123 daniel 10537: ctxt->sax = sax;
1.140 daniel 10538: if (user_data != NULL)
10539: ctxt->userData = user_data;
1.123 daniel 10540:
10541: xmlParseDocument(ctxt);
10542:
10543: if (ctxt->wellFormed)
10544: ret = 0;
10545: else {
10546: if (ctxt->errNo != 0)
10547: ret = ctxt->errNo;
10548: else
10549: ret = -1;
10550: }
10551: if (sax != NULL)
10552: ctxt->sax = NULL;
10553: xmlFreeParserCtxt(ctxt);
10554:
10555: return ret;
10556: }
10557:
10558: /**
10559: * xmlSAXUserParseMemory:
10560: * @sax: a SAX handler
10561: * @user_data: The user data returned on SAX callbacks
10562: * @buffer: an in-memory XML document input
1.127 daniel 10563: * @size: the length of the XML document in bytes
1.123 daniel 10564: *
10565: * A better SAX parsing routine.
10566: * parse an XML in-memory buffer and call the given SAX handler routines.
10567: *
10568: * Returns 0 in case of success or a error number otherwise
10569: */
10570: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10571: char *buffer, int size) {
10572: int ret = 0;
10573: xmlParserCtxtPtr ctxt;
10574:
10575: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10576: if (ctxt == NULL) return -1;
10577: ctxt->sax = sax;
10578: ctxt->userData = user_data;
10579:
10580: xmlParseDocument(ctxt);
10581:
10582: if (ctxt->wellFormed)
10583: ret = 0;
10584: else {
10585: if (ctxt->errNo != 0)
10586: ret = ctxt->errNo;
10587: else
10588: ret = -1;
10589: }
10590: if (sax != NULL)
10591: ctxt->sax = NULL;
10592: xmlFreeParserCtxt(ctxt);
10593:
10594: return ret;
10595: }
10596:
1.32 daniel 10597:
1.98 daniel 10598: /************************************************************************
10599: * *
1.127 daniel 10600: * Miscellaneous *
1.98 daniel 10601: * *
10602: ************************************************************************/
10603:
1.132 daniel 10604: /**
10605: * xmlCleanupParser:
10606: *
10607: * Cleanup function for the XML parser. It tries to reclaim all
10608: * parsing related global memory allocated for the parser processing.
10609: * It doesn't deallocate any document related memory. Calling this
10610: * function should not prevent reusing the parser.
10611: */
10612:
10613: void
10614: xmlCleanupParser(void) {
10615: xmlCleanupCharEncodingHandlers();
1.133 daniel 10616: xmlCleanupPredefinedEntities();
1.132 daniel 10617: }
1.98 daniel 10618:
1.50 daniel 10619: /**
10620: * xmlParserFindNodeInfo:
10621: * @ctxt: an XML parser context
10622: * @node: an XML node within the tree
10623: *
10624: * Find the parser node info struct for a given node
10625: *
1.68 daniel 10626: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10627: */
10628: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10629: const xmlNode* node)
10630: {
10631: unsigned long pos;
10632:
10633: /* Find position where node should be at */
10634: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10635: if ( ctx->node_seq.buffer[pos].node == node )
10636: return &ctx->node_seq.buffer[pos];
10637: else
10638: return NULL;
10639: }
10640:
10641:
1.50 daniel 10642: /**
1.181 daniel 10643: * xmlInitNodeInfoSeq:
1.50 daniel 10644: * @seq: a node info sequence pointer
10645: *
10646: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10647: */
1.55 daniel 10648: void
10649: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10650: {
10651: seq->length = 0;
10652: seq->maximum = 0;
10653: seq->buffer = NULL;
10654: }
10655:
1.50 daniel 10656: /**
1.181 daniel 10657: * xmlClearNodeInfoSeq:
1.50 daniel 10658: * @seq: a node info sequence pointer
10659: *
10660: * -- Clear (release memory and reinitialize) node
1.32 daniel 10661: * info sequence
10662: */
1.55 daniel 10663: void
10664: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10665: {
10666: if ( seq->buffer != NULL )
1.119 daniel 10667: xmlFree(seq->buffer);
1.32 daniel 10668: xmlInitNodeInfoSeq(seq);
10669: }
10670:
10671:
1.50 daniel 10672: /**
10673: * xmlParserFindNodeInfoIndex:
10674: * @seq: a node info sequence pointer
10675: * @node: an XML node pointer
10676: *
10677: *
1.32 daniel 10678: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10679: * the given node is or should be at in a sorted sequence
1.68 daniel 10680: *
10681: * Returns a long indicating the position of the record
1.32 daniel 10682: */
10683: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10684: const xmlNode* node)
10685: {
10686: unsigned long upper, lower, middle;
10687: int found = 0;
10688:
10689: /* Do a binary search for the key */
10690: lower = 1;
10691: upper = seq->length;
10692: middle = 0;
10693: while ( lower <= upper && !found) {
10694: middle = lower + (upper - lower) / 2;
10695: if ( node == seq->buffer[middle - 1].node )
10696: found = 1;
10697: else if ( node < seq->buffer[middle - 1].node )
10698: upper = middle - 1;
10699: else
10700: lower = middle + 1;
10701: }
10702:
10703: /* Return position */
10704: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10705: return middle;
10706: else
10707: return middle - 1;
10708: }
10709:
10710:
1.50 daniel 10711: /**
10712: * xmlParserAddNodeInfo:
10713: * @ctxt: an XML parser context
1.68 daniel 10714: * @info: a node info sequence pointer
1.50 daniel 10715: *
10716: * Insert node info record into the sorted sequence
1.32 daniel 10717: */
1.55 daniel 10718: void
10719: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10720: const xmlParserNodeInfo* info)
1.32 daniel 10721: {
10722: unsigned long pos;
10723: static unsigned int block_size = 5;
10724:
10725: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10726: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10727: if ( pos < ctxt->node_seq.length
10728: && ctxt->node_seq.buffer[pos].node == info->node ) {
10729: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10730: }
10731:
10732: /* Otherwise, we need to add new node to buffer */
10733: else {
10734: /* Expand buffer by 5 if needed */
1.55 daniel 10735: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10736: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10737: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10738: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10739:
1.55 daniel 10740: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10741: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10742: else
1.119 daniel 10743: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10744:
10745: if ( tmp_buffer == NULL ) {
1.55 daniel 10746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10747: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10748: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10749: return;
10750: }
1.55 daniel 10751: ctxt->node_seq.buffer = tmp_buffer;
10752: ctxt->node_seq.maximum += block_size;
1.32 daniel 10753: }
10754:
10755: /* If position is not at end, move elements out of the way */
1.55 daniel 10756: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10757: unsigned long i;
10758:
1.55 daniel 10759: for ( i = ctxt->node_seq.length; i > pos; i-- )
10760: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10761: }
10762:
10763: /* Copy element and increase length */
1.55 daniel 10764: ctxt->node_seq.buffer[pos] = *info;
10765: ctxt->node_seq.length++;
1.32 daniel 10766: }
10767: }
1.77 daniel 10768:
1.98 daniel 10769:
10770: /**
1.181 daniel 10771: * xmlSubstituteEntitiesDefault:
1.98 daniel 10772: * @val: int 0 or 1
10773: *
10774: * Set and return the previous value for default entity support.
10775: * Initially the parser always keep entity references instead of substituting
10776: * entity values in the output. This function has to be used to change the
10777: * default parser behaviour
10778: * SAX::subtituteEntities() has to be used for changing that on a file by
10779: * file basis.
10780: *
10781: * Returns the last value for 0 for no substitution, 1 for substitution.
10782: */
10783:
10784: int
10785: xmlSubstituteEntitiesDefault(int val) {
10786: int old = xmlSubstituteEntitiesDefaultValue;
10787:
10788: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10789: return(old);
10790: }
10791:
10792: /**
10793: * xmlKeepBlanksDefault:
10794: * @val: int 0 or 1
10795: *
10796: * Set and return the previous value for default blanks text nodes support.
10797: * The 1.x version of the parser used an heuristic to try to detect
10798: * ignorable white spaces. As a result the SAX callback was generating
10799: * ignorableWhitespace() callbacks instead of characters() one, and when
10800: * using the DOM output text nodes containing those blanks were not generated.
10801: * The 2.x and later version will switch to the XML standard way and
10802: * ignorableWhitespace() are only generated when running the parser in
10803: * validating mode and when the current element doesn't allow CDATA or
10804: * mixed content.
10805: * This function is provided as a way to force the standard behaviour
10806: * on 1.X libs and to switch back to the old mode for compatibility when
10807: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10808: * by using xmlIsBlankNode() commodity function to detect the "empty"
10809: * nodes generated.
10810: * This value also affect autogeneration of indentation when saving code
10811: * if blanks sections are kept, indentation is not generated.
10812: *
10813: * Returns the last value for 0 for no substitution, 1 for substitution.
10814: */
10815:
10816: int
10817: xmlKeepBlanksDefault(int val) {
10818: int old = xmlKeepBlanksDefaultValue;
10819:
10820: xmlKeepBlanksDefaultValue = val;
10821: xmlIndentTreeOutput = !val;
1.98 daniel 10822: return(old);
10823: }
1.77 daniel 10824:
Webmaster