Annotation of XML/parser.c, revision 1.189
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.122 daniel 44: #include "xml-error.h"
1.1 veillard 45:
1.140 daniel 46: #define XML_PARSER_BIG_BUFFER_SIZE 1000
47: #define XML_PARSER_BUFFER_SIZE 100
48:
1.188 daniel 49: const char *xmlParserVersion = LIBXML_VERSION_STRING;
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.91 daniel 65: /************************************************************************
66: * *
67: * Input handling functions for progressive parsing *
68: * *
69: ************************************************************************/
70:
71: /* #define DEBUG_INPUT */
1.140 daniel 72: /* #define DEBUG_STACK */
73: /* #define DEBUG_PUSH */
74:
1.91 daniel 75:
1.110 daniel 76: #define INPUT_CHUNK 250
77: /* we need to keep enough input to show errors in context */
78: #define LINE_LEN 80
1.91 daniel 79:
80: #ifdef DEBUG_INPUT
81: #define CHECK_BUFFER(in) check_buffer(in)
82:
83: void check_buffer(xmlParserInputPtr in) {
84: if (in->base != in->buf->buffer->content) {
85: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
86: }
87: if (in->cur < in->base) {
88: fprintf(stderr, "xmlParserInput: cur < base problem\n");
89: }
90: if (in->cur > in->base + in->buf->buffer->use) {
91: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
92: }
93: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
94: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
95: in->buf->buffer->use, in->buf->buffer->size);
96: }
97:
1.110 daniel 98: #else
99: #define CHECK_BUFFER(in)
100: #endif
101:
1.91 daniel 102:
103: /**
104: * xmlParserInputRead:
105: * @in: an XML parser input
106: * @len: an indicative size for the lookahead
107: *
108: * This function refresh the input for the parser. It doesn't try to
109: * preserve pointers to the input buffer, and discard already read data
110: *
1.123 daniel 111: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 112: * end of this entity
113: */
114: int
115: xmlParserInputRead(xmlParserInputPtr in, int len) {
116: int ret;
117: int used;
118: int index;
119:
120: #ifdef DEBUG_INPUT
121: fprintf(stderr, "Read\n");
122: #endif
123: if (in->buf == NULL) return(-1);
124: if (in->base == NULL) return(-1);
125: if (in->cur == NULL) return(-1);
126: if (in->buf->buffer == NULL) return(-1);
127:
128: CHECK_BUFFER(in);
129:
130: used = in->cur - in->buf->buffer->content;
131: ret = xmlBufferShrink(in->buf->buffer, used);
132: if (ret > 0) {
133: in->cur -= ret;
134: in->consumed += ret;
135: }
136: ret = xmlParserInputBufferRead(in->buf, len);
137: if (in->base != in->buf->buffer->content) {
138: /*
139: * the buffer has been realloced
140: */
141: index = in->cur - in->base;
142: in->base = in->buf->buffer->content;
143: in->cur = &in->buf->buffer->content[index];
144: }
145:
146: CHECK_BUFFER(in);
147:
148: return(ret);
149: }
150:
151: /**
152: * xmlParserInputGrow:
153: * @in: an XML parser input
154: * @len: an indicative size for the lookahead
155: *
156: * This function increase the input for the parser. It tries to
157: * preserve pointers to the input buffer, and keep already read data
158: *
1.123 daniel 159: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 160: * end of this entity
161: */
162: int
163: xmlParserInputGrow(xmlParserInputPtr in, int len) {
164: int ret;
165: int index;
166:
167: #ifdef DEBUG_INPUT
168: fprintf(stderr, "Grow\n");
169: #endif
170: if (in->buf == NULL) return(-1);
171: if (in->base == NULL) return(-1);
172: if (in->cur == NULL) return(-1);
173: if (in->buf->buffer == NULL) return(-1);
174:
175: CHECK_BUFFER(in);
176:
177: index = in->cur - in->base;
178: if (in->buf->buffer->use > index + INPUT_CHUNK) {
179:
180: CHECK_BUFFER(in);
181:
182: return(0);
183: }
1.189 ! daniel 184: if (in->buf->readcallback != NULL)
1.140 daniel 185: ret = xmlParserInputBufferGrow(in->buf, len);
186: else
187: return(0);
1.135 daniel 188:
189: /*
190: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
191: * block, but we use it really as an integer to do some
192: * pointer arithmetic. Insure will raise it as a bug but in
193: * that specific case, that's not !
194: */
1.91 daniel 195: if (in->base != in->buf->buffer->content) {
196: /*
197: * the buffer has been realloced
198: */
199: index = in->cur - in->base;
200: in->base = in->buf->buffer->content;
201: in->cur = &in->buf->buffer->content[index];
202: }
203:
204: CHECK_BUFFER(in);
205:
206: return(ret);
207: }
208:
209: /**
210: * xmlParserInputShrink:
211: * @in: an XML parser input
212: *
213: * This function removes used input for the parser.
214: */
215: void
216: xmlParserInputShrink(xmlParserInputPtr in) {
217: int used;
218: int ret;
219: int index;
220:
221: #ifdef DEBUG_INPUT
222: fprintf(stderr, "Shrink\n");
223: #endif
224: if (in->buf == NULL) return;
225: if (in->base == NULL) return;
226: if (in->cur == NULL) return;
227: if (in->buf->buffer == NULL) return;
228:
229: CHECK_BUFFER(in);
230:
231: used = in->cur - in->buf->buffer->content;
232: if (used > INPUT_CHUNK) {
1.110 daniel 233: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 234: if (ret > 0) {
235: in->cur -= ret;
236: in->consumed += ret;
237: }
238: }
239:
240: CHECK_BUFFER(in);
241:
242: if (in->buf->buffer->use > INPUT_CHUNK) {
243: return;
244: }
245: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
246: if (in->base != in->buf->buffer->content) {
247: /*
248: * the buffer has been realloced
249: */
250: index = in->cur - in->base;
251: in->base = in->buf->buffer->content;
252: in->cur = &in->buf->buffer->content[index];
253: }
254:
255: CHECK_BUFFER(in);
256: }
257:
1.45 daniel 258: /************************************************************************
259: * *
260: * Parser stacks related functions and macros *
261: * *
262: ************************************************************************/
1.79 daniel 263:
264: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 265: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 266: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 267: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
268: const xmlChar ** str);
1.79 daniel 269:
1.1 veillard 270: /*
1.40 daniel 271: * Generic function for accessing stacks in the Parser Context
1.1 veillard 272: */
273:
1.140 daniel 274: #define PUSH_AND_POP(scope, type, name) \
275: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 276: if (ctxt->name##Nr >= ctxt->name##Max) { \
277: ctxt->name##Max *= 2; \
1.119 daniel 278: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 279: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
280: if (ctxt->name##Tab == NULL) { \
1.31 daniel 281: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 282: return(0); \
1.31 daniel 283: } \
284: } \
1.40 daniel 285: ctxt->name##Tab[ctxt->name##Nr] = value; \
286: ctxt->name = value; \
287: return(ctxt->name##Nr++); \
1.31 daniel 288: } \
1.140 daniel 289: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 290: type ret; \
1.40 daniel 291: if (ctxt->name##Nr <= 0) return(0); \
292: ctxt->name##Nr--; \
1.50 daniel 293: if (ctxt->name##Nr > 0) \
294: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
295: else \
296: ctxt->name = NULL; \
1.69 daniel 297: ret = ctxt->name##Tab[ctxt->name##Nr]; \
298: ctxt->name##Tab[ctxt->name##Nr] = 0; \
299: return(ret); \
1.31 daniel 300: } \
301:
1.140 daniel 302: PUSH_AND_POP(extern, xmlParserInputPtr, input)
303: PUSH_AND_POP(extern, xmlNodePtr, node)
304: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 305:
1.176 daniel 306: int spacePush(xmlParserCtxtPtr ctxt, int val) {
307: if (ctxt->spaceNr >= ctxt->spaceMax) {
308: ctxt->spaceMax *= 2;
309: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
310: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
311: if (ctxt->spaceTab == NULL) {
312: fprintf(stderr, "realloc failed !\n");
313: return(0);
314: }
315: }
316: ctxt->spaceTab[ctxt->spaceNr] = val;
317: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
318: return(ctxt->spaceNr++);
319: }
320:
321: int spacePop(xmlParserCtxtPtr ctxt) {
322: int ret;
323: if (ctxt->spaceNr <= 0) return(0);
324: ctxt->spaceNr--;
325: if (ctxt->spaceNr > 0)
326: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
327: else
328: ctxt->space = NULL;
329: ret = ctxt->spaceTab[ctxt->spaceNr];
330: ctxt->spaceTab[ctxt->spaceNr] = -1;
331: return(ret);
332: }
333:
1.55 daniel 334: /*
335: * Macros for accessing the content. Those should be used only by the parser,
336: * and not exported.
337: *
338: * Dirty macros, i.e. one need to make assumption on the context to use them
339: *
1.123 daniel 340: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 341: * To be used with extreme caution since operations consuming
342: * characters may move the input buffer to a different location !
1.123 daniel 343: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 344: * in ISO-Latin or UTF-8.
1.151 daniel 345: * This should be used internally by the parser
1.55 daniel 346: * only to compare to ASCII values otherwise it would break when
347: * running with UTF-8 encoding.
1.123 daniel 348: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 349: * to compare on ASCII based substring.
1.123 daniel 350: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 351: * strings within the parser.
352: *
1.77 daniel 353: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 354: *
355: * NEXT Skip to the next character, this does the proper decoding
356: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 357: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 358: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 359: */
1.45 daniel 360:
1.152 daniel 361: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 362: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 363: #define NXT(val) ctxt->input->cur[(val)]
364: #define CUR_PTR ctxt->input->cur
1.154 daniel 365:
1.164 daniel 366: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
367: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 368: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
369: if ((*ctxt->input->cur == 0) && \
370: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
371: xmlPopInput(ctxt)
1.164 daniel 372:
1.97 daniel 373: #define SHRINK xmlParserInputShrink(ctxt->input); \
374: if ((*ctxt->input->cur == 0) && \
375: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
376: xmlPopInput(ctxt)
377:
378: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
379: if ((*ctxt->input->cur == 0) && \
380: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
381: xmlPopInput(ctxt)
1.55 daniel 382:
1.155 daniel 383: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 384:
1.151 daniel 385: #define NEXT xmlNextChar(ctxt);
1.154 daniel 386:
1.153 daniel 387: #define NEXTL(l) \
388: if (*(ctxt->input->cur) == '\n') { \
389: ctxt->input->line++; ctxt->input->col = 1; \
390: } else ctxt->input->col++; \
1.154 daniel 391: ctxt->token = 0; ctxt->input->cur += l; \
392: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
393: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
394:
1.152 daniel 395: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 396: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 397:
1.152 daniel 398: #define COPY_BUF(l,b,i,v) \
399: if (l == 1) b[i++] = (xmlChar) v; \
400: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 401:
402: /**
403: * xmlNextChar:
404: * @ctxt: the XML parser context
405: *
406: * Skip to the next char input char.
407: */
1.55 daniel 408:
1.151 daniel 409: void
410: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.176 daniel 411: /*
412: * TODO: 2.11 End-of-Line Handling
413: * the literal two-character sequence "#xD#xA" or a standalone
414: * literal #xD, an XML processor must pass to the application
415: * the single character #xA.
416: */
1.151 daniel 417: if (ctxt->token != 0) ctxt->token = 0;
418: else {
419: if ((*ctxt->input->cur == 0) &&
420: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
421: (ctxt->instate != XML_PARSER_COMMENT)) {
422: /*
423: * If we are at the end of the current entity and
424: * the context allows it, we pop consumed entities
425: * automatically.
426: * TODO: the auto closing should be blocked in other cases
427: */
428: xmlPopInput(ctxt);
429: } else {
430: if (*(ctxt->input->cur) == '\n') {
431: ctxt->input->line++; ctxt->input->col = 1;
432: } else ctxt->input->col++;
433: if (ctxt->encoding == NULL) {
434: /*
435: * We are supposed to handle UTF8, check it's valid
436: * From rfc2044: encoding of the Unicode values on UTF-8:
437: *
438: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
439: * 0000 0000-0000 007F 0xxxxxxx
440: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
441: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
442: *
1.160 daniel 443: * Check for the 0x110000 limit too
1.151 daniel 444: */
445: const unsigned char *cur = ctxt->input->cur;
446: unsigned char c;
1.91 daniel 447:
1.151 daniel 448: c = *cur;
449: if (c & 0x80) {
450: if (cur[1] == 0)
451: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
452: if ((cur[1] & 0xc0) != 0x80)
453: goto encoding_error;
454: if ((c & 0xe0) == 0xe0) {
455: unsigned int val;
456:
457: if (cur[2] == 0)
458: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
459: if ((cur[2] & 0xc0) != 0x80)
460: goto encoding_error;
461: if ((c & 0xf0) == 0xf0) {
462: if (cur[3] == 0)
463: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
464: if (((c & 0xf8) != 0xf0) ||
465: ((cur[3] & 0xc0) != 0x80))
466: goto encoding_error;
467: /* 4-byte code */
468: ctxt->input->cur += 4;
469: val = (cur[0] & 0x7) << 18;
470: val |= (cur[1] & 0x3f) << 12;
471: val |= (cur[2] & 0x3f) << 6;
472: val |= cur[3] & 0x3f;
473: } else {
474: /* 3-byte code */
475: ctxt->input->cur += 3;
476: val = (cur[0] & 0xf) << 12;
477: val |= (cur[1] & 0x3f) << 6;
478: val |= cur[2] & 0x3f;
479: }
480: if (((val > 0xd7ff) && (val < 0xe000)) ||
481: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 482: (val >= 0x110000)) {
1.151 daniel 483: if ((ctxt->sax != NULL) &&
484: (ctxt->sax->error != NULL))
485: ctxt->sax->error(ctxt->userData,
486: "Char out of allowed range\n");
487: ctxt->errNo = XML_ERR_INVALID_ENCODING;
488: ctxt->wellFormed = 0;
1.180 daniel 489: ctxt->disableSAX = 1;
1.151 daniel 490: }
491: } else
492: /* 2-byte code */
493: ctxt->input->cur += 2;
494: } else
495: /* 1-byte code */
496: ctxt->input->cur++;
497: } else {
498: /*
499: * Assume it's a fixed lenght encoding (1) with
500: * a compatibke encoding for the ASCII set, since
501: * XML constructs only use < 128 chars
502: */
503: ctxt->input->cur++;
504: }
505: ctxt->nbChars++;
506: if (*ctxt->input->cur == 0)
507: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
508: }
509: }
1.154 daniel 510: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
511: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 512: if ((*ctxt->input->cur == 0) &&
513: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
514: xmlPopInput(ctxt);
1.151 daniel 515: return;
516: encoding_error:
517: /*
518: * If we detect an UTF8 error that probably mean that the
519: * input encoding didn't get properly advertized in the
520: * declaration header. Report the error and switch the encoding
521: * to ISO-Latin-1 (if you don't like this policy, just declare the
522: * encoding !)
523: */
524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
525: ctxt->sax->error(ctxt->userData,
526: "Input is not proper UTF-8, indicate encoding !\n");
527: ctxt->errNo = XML_ERR_INVALID_ENCODING;
528:
529: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
530: ctxt->input->cur++;
531: return;
532: }
1.42 daniel 533:
1.152 daniel 534: /**
535: * xmlCurrentChar:
536: * @ctxt: the XML parser context
537: * @len: pointer to the length of the char read
538: *
539: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 540: * bytes in the input buffer. Implement the end of line normalization:
541: * 2.11 End-of-Line Handling
542: * Wherever an external parsed entity or the literal entity value
543: * of an internal parsed entity contains either the literal two-character
544: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
545: * must pass to the application the single character #xA.
546: * This behavior can conveniently be produced by normalizing all
547: * line breaks to #xA on input, before parsing.)
1.152 daniel 548: *
549: * Returns the current char value and its lenght
550: */
551:
552: int
553: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
554: if (ctxt->token != 0) {
555: *len = 0;
556: return(ctxt->token);
557: }
558: if (ctxt->encoding == NULL) {
559: /*
560: * We are supposed to handle UTF8, check it's valid
561: * From rfc2044: encoding of the Unicode values on UTF-8:
562: *
563: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
564: * 0000 0000-0000 007F 0xxxxxxx
565: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
566: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
567: *
1.160 daniel 568: * Check for the 0x110000 limit too
1.152 daniel 569: */
570: const unsigned char *cur = ctxt->input->cur;
571: unsigned char c;
572: unsigned int val;
573:
574: c = *cur;
575: if (c & 0x80) {
576: if (cur[1] == 0)
577: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
578: if ((cur[1] & 0xc0) != 0x80)
579: goto encoding_error;
580: if ((c & 0xe0) == 0xe0) {
581:
582: if (cur[2] == 0)
583: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
584: if ((cur[2] & 0xc0) != 0x80)
585: goto encoding_error;
586: if ((c & 0xf0) == 0xf0) {
587: if (cur[3] == 0)
588: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
589: if (((c & 0xf8) != 0xf0) ||
590: ((cur[3] & 0xc0) != 0x80))
591: goto encoding_error;
592: /* 4-byte code */
593: *len = 4;
594: val = (cur[0] & 0x7) << 18;
595: val |= (cur[1] & 0x3f) << 12;
596: val |= (cur[2] & 0x3f) << 6;
597: val |= cur[3] & 0x3f;
598: } else {
599: /* 3-byte code */
600: *len = 3;
601: val = (cur[0] & 0xf) << 12;
602: val |= (cur[1] & 0x3f) << 6;
603: val |= cur[2] & 0x3f;
604: }
605: } else {
606: /* 2-byte code */
607: *len = 2;
608: val = (cur[0] & 0x1f) << 6;
1.168 daniel 609: val |= cur[1] & 0x3f;
1.152 daniel 610: }
611: if (!IS_CHAR(val)) {
612: if ((ctxt->sax != NULL) &&
613: (ctxt->sax->error != NULL))
614: ctxt->sax->error(ctxt->userData,
615: "Char out of allowed range\n");
616: ctxt->errNo = XML_ERR_INVALID_ENCODING;
617: ctxt->wellFormed = 0;
1.180 daniel 618: ctxt->disableSAX = 1;
1.152 daniel 619: }
620: return(val);
621: } else {
622: /* 1-byte code */
623: *len = 1;
1.180 daniel 624: if (*ctxt->input->cur == 0xD) {
625: if (ctxt->input->cur[1] == 0xA) {
626: ctxt->nbChars++;
627: ctxt->input->cur++;
628: }
629: return(0xA);
630: }
1.152 daniel 631: return((int) *ctxt->input->cur);
632: }
633: }
634: /*
635: * Assume it's a fixed lenght encoding (1) with
636: * a compatibke encoding for the ASCII set, since
637: * XML constructs only use < 128 chars
638: */
639: *len = 1;
1.180 daniel 640: if (*ctxt->input->cur == 0xD) {
641: if (ctxt->input->cur[1] == 0xA) {
642: ctxt->nbChars++;
643: ctxt->input->cur++;
644: }
645: return(0xA);
646: }
1.152 daniel 647: return((int) *ctxt->input->cur);
648: encoding_error:
649: /*
650: * If we detect an UTF8 error that probably mean that the
651: * input encoding didn't get properly advertized in the
652: * declaration header. Report the error and switch the encoding
653: * to ISO-Latin-1 (if you don't like this policy, just declare the
654: * encoding !)
655: */
656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657: ctxt->sax->error(ctxt->userData,
658: "Input is not proper UTF-8, indicate encoding !\n");
659: ctxt->errNo = XML_ERR_INVALID_ENCODING;
660:
661: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
662: *len = 1;
663: return((int) *ctxt->input->cur);
664: }
665:
666: /**
1.162 daniel 667: * xmlStringCurrentChar:
668: * @ctxt: the XML parser context
669: * @cur: pointer to the beginning of the char
670: * @len: pointer to the length of the char read
671: *
672: * The current char value, if using UTF-8 this may actaully span multiple
673: * bytes in the input buffer.
674: *
675: * Returns the current char value and its lenght
676: */
677:
678: int
679: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
680: if (ctxt->encoding == NULL) {
681: /*
682: * We are supposed to handle UTF8, check it's valid
683: * From rfc2044: encoding of the Unicode values on UTF-8:
684: *
685: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
686: * 0000 0000-0000 007F 0xxxxxxx
687: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
688: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
689: *
690: * Check for the 0x110000 limit too
691: */
692: unsigned char c;
693: unsigned int val;
694:
695: c = *cur;
696: if (c & 0x80) {
697: if ((cur[1] & 0xc0) != 0x80)
698: goto encoding_error;
699: if ((c & 0xe0) == 0xe0) {
700:
701: if ((cur[2] & 0xc0) != 0x80)
702: goto encoding_error;
703: if ((c & 0xf0) == 0xf0) {
704: if (((c & 0xf8) != 0xf0) ||
705: ((cur[3] & 0xc0) != 0x80))
706: goto encoding_error;
707: /* 4-byte code */
708: *len = 4;
709: val = (cur[0] & 0x7) << 18;
710: val |= (cur[1] & 0x3f) << 12;
711: val |= (cur[2] & 0x3f) << 6;
712: val |= cur[3] & 0x3f;
713: } else {
714: /* 3-byte code */
715: *len = 3;
716: val = (cur[0] & 0xf) << 12;
717: val |= (cur[1] & 0x3f) << 6;
718: val |= cur[2] & 0x3f;
719: }
720: } else {
721: /* 2-byte code */
722: *len = 2;
723: val = (cur[0] & 0x1f) << 6;
724: val |= cur[2] & 0x3f;
725: }
726: if (!IS_CHAR(val)) {
727: if ((ctxt->sax != NULL) &&
728: (ctxt->sax->error != NULL))
729: ctxt->sax->error(ctxt->userData,
730: "Char out of allowed range\n");
731: ctxt->errNo = XML_ERR_INVALID_ENCODING;
732: ctxt->wellFormed = 0;
1.180 daniel 733: ctxt->disableSAX = 1;
1.162 daniel 734: }
735: return(val);
736: } else {
737: /* 1-byte code */
738: *len = 1;
739: return((int) *cur);
740: }
741: }
742: /*
743: * Assume it's a fixed lenght encoding (1) with
744: * a compatibke encoding for the ASCII set, since
745: * XML constructs only use < 128 chars
746: */
747: *len = 1;
748: return((int) *cur);
749: encoding_error:
750: /*
751: * If we detect an UTF8 error that probably mean that the
752: * input encoding didn't get properly advertized in the
753: * declaration header. Report the error and switch the encoding
754: * to ISO-Latin-1 (if you don't like this policy, just declare the
755: * encoding !)
756: */
757: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
758: ctxt->sax->error(ctxt->userData,
759: "Input is not proper UTF-8, indicate encoding !\n");
760: ctxt->errNo = XML_ERR_INVALID_ENCODING;
761:
762: *len = 1;
763: return((int) *cur);
764: }
765:
766: /**
1.152 daniel 767: * xmlCopyChar:
768: * @len: pointer to the length of the char read (or zero)
769: * @array: pointer to an arry of xmlChar
770: * @val: the char value
771: *
772: * append the char value in the array
773: *
774: * Returns the number of xmlChar written
775: */
776:
777: int
778: xmlCopyChar(int len, xmlChar *out, int val) {
779: /*
780: * We are supposed to handle UTF8, check it's valid
781: * From rfc2044: encoding of the Unicode values on UTF-8:
782: *
783: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
784: * 0000 0000-0000 007F 0xxxxxxx
785: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
786: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
787: */
788: if (len == 0) {
789: if (val < 0) len = 0;
1.160 daniel 790: else if (val < 0x80) len = 1;
791: else if (val < 0x800) len = 2;
792: else if (val < 0x10000) len = 3;
793: else if (val < 0x110000) len = 4;
1.152 daniel 794: if (len == 0) {
795: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
796: val);
797: return(0);
798: }
799: }
800: if (len > 1) {
801: int bits;
802:
803: if (val < 0x80) { *out++= val; bits= -6; }
804: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
805: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
806: else { *out++= (val >> 18) | 0xF0; bits= 12; }
807:
808: for ( ; bits >= 0; bits-= 6)
809: *out++= ((val >> bits) & 0x3F) | 0x80 ;
810:
811: return(len);
812: }
813: *out = (xmlChar) val;
814: return(1);
1.155 daniel 815: }
816:
817: /**
818: * xmlSkipBlankChars:
819: * @ctxt: the XML parser context
820: *
821: * skip all blanks character found at that point in the input streams.
822: * It pops up finished entities in the process if allowable at that point.
823: *
824: * Returns the number of space chars skipped
825: */
826:
827: int
828: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
829: int cur, res = 0;
830:
831: do {
832: cur = CUR;
833: while (IS_BLANK(cur)) {
834: NEXT;
835: cur = CUR;
836: res++;
837: }
838: while ((cur == 0) && (ctxt->inputNr > 1) &&
839: (ctxt->instate != XML_PARSER_COMMENT)) {
840: xmlPopInput(ctxt);
841: cur = CUR;
842: }
843: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
844: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
845: } while (IS_BLANK(cur));
846: return(res);
1.152 daniel 847: }
848:
1.97 daniel 849: /************************************************************************
850: * *
851: * Commodity functions to handle entities processing *
852: * *
853: ************************************************************************/
1.40 daniel 854:
1.50 daniel 855: /**
856: * xmlPopInput:
857: * @ctxt: an XML parser context
858: *
1.40 daniel 859: * xmlPopInput: the current input pointed by ctxt->input came to an end
860: * pop it and return the next char.
1.45 daniel 861: *
1.123 daniel 862: * Returns the current xmlChar in the parser context
1.40 daniel 863: */
1.123 daniel 864: xmlChar
1.55 daniel 865: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 866: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 867: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 868: if ((*ctxt->input->cur == 0) &&
869: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
870: return(xmlPopInput(ctxt));
1.40 daniel 871: return(CUR);
872: }
873:
1.50 daniel 874: /**
875: * xmlPushInput:
876: * @ctxt: an XML parser context
877: * @input: an XML parser input fragment (entity, XML fragment ...).
878: *
1.40 daniel 879: * xmlPushInput: switch to a new input stream which is stacked on top
880: * of the previous one(s).
881: */
1.55 daniel 882: void
883: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 884: if (input == NULL) return;
885: inputPush(ctxt, input);
1.164 daniel 886: GROW;
1.40 daniel 887: }
888:
1.50 daniel 889: /**
1.69 daniel 890: * xmlFreeInputStream:
1.127 daniel 891: * @input: an xmlParserInputPtr
1.69 daniel 892: *
893: * Free up an input stream.
894: */
895: void
896: xmlFreeInputStream(xmlParserInputPtr input) {
897: if (input == NULL) return;
898:
1.119 daniel 899: if (input->filename != NULL) xmlFree((char *) input->filename);
900: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 901: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 902: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 903: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 904: input->free((xmlChar *) input->base);
1.93 veillard 905: if (input->buf != NULL)
906: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 907: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 908: xmlFree(input);
1.69 daniel 909: }
910:
911: /**
1.96 daniel 912: * xmlNewInputStream:
913: * @ctxt: an XML parser context
914: *
915: * Create a new input stream structure
916: * Returns the new input stream or NULL
917: */
918: xmlParserInputPtr
919: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
920: xmlParserInputPtr input;
921:
1.119 daniel 922: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 923: if (input == NULL) {
1.123 daniel 924: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 925: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 926: ctxt->sax->error(ctxt->userData,
927: "malloc: couldn't allocate a new input stream\n");
1.123 daniel 928: ctxt->errNo = XML_ERR_NO_MEMORY;
1.96 daniel 929: return(NULL);
930: }
1.165 daniel 931: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 932: input->line = 1;
933: input->col = 1;
1.167 daniel 934: input->standalone = -1;
1.96 daniel 935: return(input);
936: }
937:
938: /**
1.50 daniel 939: * xmlNewEntityInputStream:
940: * @ctxt: an XML parser context
941: * @entity: an Entity pointer
942: *
1.82 daniel 943: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 944: *
945: * Returns the new input stream or NULL
1.45 daniel 946: */
1.50 daniel 947: xmlParserInputPtr
948: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 949: xmlParserInputPtr input;
950:
951: if (entity == NULL) {
1.123 daniel 952: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 953: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 954: ctxt->sax->error(ctxt->userData,
1.45 daniel 955: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 956: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 957: return(NULL);
1.45 daniel 958: }
959: if (entity->content == NULL) {
1.159 daniel 960: switch (entity->etype) {
1.113 daniel 961: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 962: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
964: ctxt->sax->error(ctxt->userData,
965: "xmlNewEntityInputStream unparsed entity !\n");
966: break;
967: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
968: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 969: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 970: (char *) entity->ExternalID, ctxt));
1.113 daniel 971: case XML_INTERNAL_GENERAL_ENTITY:
972: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
973: ctxt->sax->error(ctxt->userData,
974: "Internal entity %s without content !\n", entity->name);
975: break;
976: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 977: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
979: ctxt->sax->error(ctxt->userData,
980: "Internal parameter entity %s without content !\n", entity->name);
981: break;
982: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 983: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
985: ctxt->sax->error(ctxt->userData,
986: "Predefined entity %s without content !\n", entity->name);
987: break;
988: }
1.50 daniel 989: return(NULL);
1.45 daniel 990: }
1.96 daniel 991: input = xmlNewInputStream(ctxt);
1.45 daniel 992: if (input == NULL) {
1.50 daniel 993: return(NULL);
1.45 daniel 994: }
1.156 daniel 995: input->filename = (char *) entity->SystemID;
1.45 daniel 996: input->base = entity->content;
997: input->cur = entity->content;
1.140 daniel 998: input->length = entity->length;
1.50 daniel 999: return(input);
1.45 daniel 1000: }
1001:
1.59 daniel 1002: /**
1003: * xmlNewStringInputStream:
1004: * @ctxt: an XML parser context
1.96 daniel 1005: * @buffer: an memory buffer
1.59 daniel 1006: *
1007: * Create a new input stream based on a memory buffer.
1.68 daniel 1008: * Returns the new input stream
1.59 daniel 1009: */
1010: xmlParserInputPtr
1.123 daniel 1011: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1012: xmlParserInputPtr input;
1013:
1.96 daniel 1014: if (buffer == NULL) {
1.123 daniel 1015: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1016: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1017: ctxt->sax->error(ctxt->userData,
1.59 daniel 1018: "internal: xmlNewStringInputStream string = NULL\n");
1019: return(NULL);
1020: }
1.96 daniel 1021: input = xmlNewInputStream(ctxt);
1.59 daniel 1022: if (input == NULL) {
1023: return(NULL);
1024: }
1.96 daniel 1025: input->base = buffer;
1026: input->cur = buffer;
1.140 daniel 1027: input->length = xmlStrlen(buffer);
1.59 daniel 1028: return(input);
1029: }
1030:
1.76 daniel 1031: /**
1032: * xmlNewInputFromFile:
1033: * @ctxt: an XML parser context
1034: * @filename: the filename to use as entity
1035: *
1036: * Create a new input stream based on a file.
1037: *
1038: * Returns the new input stream or NULL in case of error
1039: */
1040: xmlParserInputPtr
1.79 daniel 1041: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1042: xmlParserInputBufferPtr buf;
1.76 daniel 1043: xmlParserInputPtr inputStream;
1.111 daniel 1044: char *directory = NULL;
1.76 daniel 1045:
1.96 daniel 1046: if (ctxt == NULL) return(NULL);
1.91 daniel 1047: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1048: if (buf == NULL) {
1.140 daniel 1049: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1050:
1.94 daniel 1051: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1052: #ifdef WIN32
1053: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1054: #else
1055: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1056: #endif
1057: buf = xmlParserInputBufferCreateFilename(name,
1058: XML_CHAR_ENCODING_NONE);
1.106 daniel 1059: if (buf != NULL)
1.142 daniel 1060: directory = xmlParserGetDirectory(name);
1.106 daniel 1061: }
1062: if ((buf == NULL) && (ctxt->directory != NULL)) {
1063: #ifdef WIN32
1064: sprintf(name, "%s\\%s", ctxt->directory, filename);
1065: #else
1066: sprintf(name, "%s/%s", ctxt->directory, filename);
1067: #endif
1068: buf = xmlParserInputBufferCreateFilename(name,
1069: XML_CHAR_ENCODING_NONE);
1070: if (buf != NULL)
1.142 daniel 1071: directory = xmlParserGetDirectory(name);
1.106 daniel 1072: }
1073: if (buf == NULL)
1.94 daniel 1074: return(NULL);
1075: }
1076: if (directory == NULL)
1077: directory = xmlParserGetDirectory(filename);
1.76 daniel 1078:
1.96 daniel 1079: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1080: if (inputStream == NULL) {
1.119 daniel 1081: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1082: return(NULL);
1083: }
1084:
1.119 daniel 1085: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1086: inputStream->directory = directory;
1.91 daniel 1087: inputStream->buf = buf;
1.76 daniel 1088:
1.91 daniel 1089: inputStream->base = inputStream->buf->buffer->content;
1090: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1091: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1092: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1093: return(inputStream);
1094: }
1095:
1.77 daniel 1096: /************************************************************************
1097: * *
1.97 daniel 1098: * Commodity functions to handle parser contexts *
1099: * *
1100: ************************************************************************/
1101:
1102: /**
1103: * xmlInitParserCtxt:
1104: * @ctxt: an XML parser context
1105: *
1106: * Initialize a parser context
1107: */
1108:
1109: void
1110: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1111: {
1112: xmlSAXHandler *sax;
1113:
1.168 daniel 1114: xmlDefaultSAXHandlerInit();
1115:
1.119 daniel 1116: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1117: if (sax == NULL) {
1118: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1119: }
1.180 daniel 1120: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1121:
1122: /* Allocate the Input stack */
1.119 daniel 1123: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1124: ctxt->inputNr = 0;
1125: ctxt->inputMax = 5;
1126: ctxt->input = NULL;
1.165 daniel 1127:
1.97 daniel 1128: ctxt->version = NULL;
1129: ctxt->encoding = NULL;
1130: ctxt->standalone = -1;
1.98 daniel 1131: ctxt->hasExternalSubset = 0;
1132: ctxt->hasPErefs = 0;
1.97 daniel 1133: ctxt->html = 0;
1.98 daniel 1134: ctxt->external = 0;
1.140 daniel 1135: ctxt->instate = XML_PARSER_START;
1.97 daniel 1136: ctxt->token = 0;
1.106 daniel 1137: ctxt->directory = NULL;
1.97 daniel 1138:
1139: /* Allocate the Node stack */
1.119 daniel 1140: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1141: ctxt->nodeNr = 0;
1142: ctxt->nodeMax = 10;
1143: ctxt->node = NULL;
1144:
1.140 daniel 1145: /* Allocate the Name stack */
1146: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1147: ctxt->nameNr = 0;
1148: ctxt->nameMax = 10;
1149: ctxt->name = NULL;
1150:
1.176 daniel 1151: /* Allocate the space stack */
1152: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1153: ctxt->spaceNr = 1;
1154: ctxt->spaceMax = 10;
1155: ctxt->spaceTab[0] = -1;
1156: ctxt->space = &ctxt->spaceTab[0];
1157:
1.160 daniel 1158: if (sax == NULL) {
1159: ctxt->sax = &xmlDefaultSAXHandler;
1160: } else {
1.97 daniel 1161: ctxt->sax = sax;
1162: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1163: }
1164: ctxt->userData = ctxt;
1165: ctxt->myDoc = NULL;
1166: ctxt->wellFormed = 1;
1.99 daniel 1167: ctxt->valid = 1;
1.100 daniel 1168: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1169: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1170: ctxt->vctxt.userData = ctxt;
1.149 daniel 1171: if (ctxt->validate) {
1172: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1173: if (xmlGetWarningsDefaultValue == 0)
1174: ctxt->vctxt.warning = NULL;
1175: else
1176: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1177: /* Allocate the Node stack */
1178: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1179: ctxt->vctxt.nodeNr = 0;
1180: ctxt->vctxt.nodeMax = 4;
1181: ctxt->vctxt.node = NULL;
1.149 daniel 1182: } else {
1183: ctxt->vctxt.error = NULL;
1184: ctxt->vctxt.warning = NULL;
1185: }
1.97 daniel 1186: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1187: ctxt->record_info = 0;
1.135 daniel 1188: ctxt->nbChars = 0;
1.140 daniel 1189: ctxt->checkIndex = 0;
1.180 daniel 1190: ctxt->inSubset = 0;
1.140 daniel 1191: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1192: ctxt->depth = 0;
1.97 daniel 1193: xmlInitNodeInfoSeq(&ctxt->node_seq);
1194: }
1195:
1196: /**
1197: * xmlFreeParserCtxt:
1198: * @ctxt: an XML parser context
1199: *
1200: * Free all the memory used by a parser context. However the parsed
1201: * document in ctxt->myDoc is not freed.
1202: */
1203:
1204: void
1205: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1206: {
1207: xmlParserInputPtr input;
1.140 daniel 1208: xmlChar *oldname;
1.97 daniel 1209:
1210: if (ctxt == NULL) return;
1211:
1212: while ((input = inputPop(ctxt)) != NULL) {
1213: xmlFreeInputStream(input);
1214: }
1.140 daniel 1215: while ((oldname = namePop(ctxt)) != NULL) {
1216: xmlFree(oldname);
1217: }
1.176 daniel 1218: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1219: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1220: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1221: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1222: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1223: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1224: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1225: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1226: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1227: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1228: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1229: xmlFree(ctxt->sax);
1230: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1231: xmlFree(ctxt);
1.97 daniel 1232: }
1233:
1234: /**
1235: * xmlNewParserCtxt:
1236: *
1237: * Allocate and initialize a new parser context.
1238: *
1239: * Returns the xmlParserCtxtPtr or NULL
1240: */
1241:
1242: xmlParserCtxtPtr
1243: xmlNewParserCtxt()
1244: {
1245: xmlParserCtxtPtr ctxt;
1246:
1.119 daniel 1247: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1248: if (ctxt == NULL) {
1249: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1250: perror("malloc");
1251: return(NULL);
1252: }
1.165 daniel 1253: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1254: xmlInitParserCtxt(ctxt);
1255: return(ctxt);
1256: }
1257:
1258: /**
1259: * xmlClearParserCtxt:
1260: * @ctxt: an XML parser context
1261: *
1262: * Clear (release owned resources) and reinitialize a parser context
1263: */
1264:
1265: void
1266: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1267: {
1268: xmlClearNodeInfoSeq(&ctxt->node_seq);
1269: xmlInitParserCtxt(ctxt);
1270: }
1271:
1272: /************************************************************************
1273: * *
1.77 daniel 1274: * Commodity functions to handle entities *
1275: * *
1276: ************************************************************************/
1277:
1.174 daniel 1278: /**
1279: * xmlCheckEntity:
1280: * @ctxt: an XML parser context
1281: * @content: the entity content string
1282: *
1283: * Parse an entity content and checks the WF constraints
1284: *
1285: */
1286:
1287: void
1288: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1289: }
1.97 daniel 1290:
1291: /**
1292: * xmlParseCharRef:
1293: * @ctxt: an XML parser context
1294: *
1295: * parse Reference declarations
1296: *
1297: * [66] CharRef ::= '&#' [0-9]+ ';' |
1298: * '&#x' [0-9a-fA-F]+ ';'
1299: *
1.98 daniel 1300: * [ WFC: Legal Character ]
1301: * Characters referred to using character references must match the
1302: * production for Char.
1303: *
1.135 daniel 1304: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1305: */
1.97 daniel 1306: int
1307: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1308: int val = 0;
1309:
1.111 daniel 1310: if (ctxt->token != 0) {
1311: val = ctxt->token;
1312: ctxt->token = 0;
1313: return(val);
1314: }
1.152 daniel 1315: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1316: (NXT(2) == 'x')) {
1317: SKIP(3);
1.152 daniel 1318: while (RAW != ';') {
1319: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1320: val = val * 16 + (CUR - '0');
1.152 daniel 1321: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1322: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1323: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1324: val = val * 16 + (CUR - 'A') + 10;
1325: else {
1.123 daniel 1326: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1328: ctxt->sax->error(ctxt->userData,
1329: "xmlParseCharRef: invalid hexadecimal value\n");
1330: ctxt->wellFormed = 0;
1.180 daniel 1331: ctxt->disableSAX = 1;
1.97 daniel 1332: val = 0;
1333: break;
1334: }
1335: NEXT;
1336: }
1.164 daniel 1337: if (RAW == ';') {
1338: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1339: ctxt->nbChars ++;
1340: ctxt->input->cur++;
1341: }
1.152 daniel 1342: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1343: SKIP(2);
1.152 daniel 1344: while (RAW != ';') {
1345: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1346: val = val * 10 + (CUR - '0');
1347: else {
1.123 daniel 1348: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1350: ctxt->sax->error(ctxt->userData,
1351: "xmlParseCharRef: invalid decimal value\n");
1352: ctxt->wellFormed = 0;
1.180 daniel 1353: ctxt->disableSAX = 1;
1.97 daniel 1354: val = 0;
1355: break;
1356: }
1357: NEXT;
1358: }
1.164 daniel 1359: if (RAW == ';') {
1360: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1361: ctxt->nbChars ++;
1362: ctxt->input->cur++;
1363: }
1.97 daniel 1364: } else {
1.123 daniel 1365: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1367: ctxt->sax->error(ctxt->userData,
1368: "xmlParseCharRef: invalid value\n");
1.97 daniel 1369: ctxt->wellFormed = 0;
1.180 daniel 1370: ctxt->disableSAX = 1;
1.97 daniel 1371: }
1.98 daniel 1372:
1.97 daniel 1373: /*
1.98 daniel 1374: * [ WFC: Legal Character ]
1375: * Characters referred to using character references must match the
1376: * production for Char.
1.97 daniel 1377: */
1378: if (IS_CHAR(val)) {
1379: return(val);
1380: } else {
1.123 daniel 1381: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1383: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1384: val);
1385: ctxt->wellFormed = 0;
1.180 daniel 1386: ctxt->disableSAX = 1;
1.97 daniel 1387: }
1388: return(0);
1.77 daniel 1389: }
1390:
1.96 daniel 1391: /**
1.135 daniel 1392: * xmlParseStringCharRef:
1393: * @ctxt: an XML parser context
1394: * @str: a pointer to an index in the string
1395: *
1396: * parse Reference declarations, variant parsing from a string rather
1397: * than an an input flow.
1398: *
1399: * [66] CharRef ::= '&#' [0-9]+ ';' |
1400: * '&#x' [0-9a-fA-F]+ ';'
1401: *
1402: * [ WFC: Legal Character ]
1403: * Characters referred to using character references must match the
1404: * production for Char.
1405: *
1406: * Returns the value parsed (as an int), 0 in case of error, str will be
1407: * updated to the current value of the index
1408: */
1409: int
1410: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1411: const xmlChar *ptr;
1412: xmlChar cur;
1413: int val = 0;
1414:
1415: if ((str == NULL) || (*str == NULL)) return(0);
1416: ptr = *str;
1417: cur = *ptr;
1.137 daniel 1418: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1419: ptr += 3;
1420: cur = *ptr;
1421: while (cur != ';') {
1422: if ((cur >= '0') && (cur <= '9'))
1423: val = val * 16 + (cur - '0');
1424: else if ((cur >= 'a') && (cur <= 'f'))
1425: val = val * 16 + (cur - 'a') + 10;
1426: else if ((cur >= 'A') && (cur <= 'F'))
1427: val = val * 16 + (cur - 'A') + 10;
1428: else {
1429: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1431: ctxt->sax->error(ctxt->userData,
1432: "xmlParseCharRef: invalid hexadecimal value\n");
1433: ctxt->wellFormed = 0;
1.180 daniel 1434: ctxt->disableSAX = 1;
1.135 daniel 1435: val = 0;
1436: break;
1437: }
1438: ptr++;
1439: cur = *ptr;
1440: }
1441: if (cur == ';')
1442: ptr++;
1.145 daniel 1443: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1444: ptr += 2;
1445: cur = *ptr;
1446: while (cur != ';') {
1447: if ((cur >= '0') && (cur <= '9'))
1448: val = val * 10 + (cur - '0');
1449: else {
1450: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1451: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1452: ctxt->sax->error(ctxt->userData,
1453: "xmlParseCharRef: invalid decimal value\n");
1454: ctxt->wellFormed = 0;
1.180 daniel 1455: ctxt->disableSAX = 1;
1.135 daniel 1456: val = 0;
1457: break;
1458: }
1459: ptr++;
1460: cur = *ptr;
1461: }
1462: if (cur == ';')
1463: ptr++;
1464: } else {
1465: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1467: ctxt->sax->error(ctxt->userData,
1468: "xmlParseCharRef: invalid value\n");
1469: ctxt->wellFormed = 0;
1.180 daniel 1470: ctxt->disableSAX = 1;
1.135 daniel 1471: return(0);
1472: }
1473: *str = ptr;
1474:
1475: /*
1476: * [ WFC: Legal Character ]
1477: * Characters referred to using character references must match the
1478: * production for Char.
1479: */
1480: if (IS_CHAR(val)) {
1481: return(val);
1482: } else {
1483: ctxt->errNo = XML_ERR_INVALID_CHAR;
1484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1485: ctxt->sax->error(ctxt->userData,
1486: "CharRef: invalid xmlChar value %d\n", val);
1487: ctxt->wellFormed = 0;
1.180 daniel 1488: ctxt->disableSAX = 1;
1.135 daniel 1489: }
1490: return(0);
1491: }
1492:
1493: /**
1.96 daniel 1494: * xmlParserHandleReference:
1495: * @ctxt: the parser context
1496: *
1.97 daniel 1497: * [67] Reference ::= EntityRef | CharRef
1498: *
1.96 daniel 1499: * [68] EntityRef ::= '&' Name ';'
1500: *
1.98 daniel 1501: * [ WFC: Entity Declared ]
1502: * the Name given in the entity reference must match that in an entity
1503: * declaration, except that well-formed documents need not declare any
1504: * of the following entities: amp, lt, gt, apos, quot.
1505: *
1506: * [ WFC: Parsed Entity ]
1507: * An entity reference must not contain the name of an unparsed entity
1508: *
1.97 daniel 1509: * [66] CharRef ::= '&#' [0-9]+ ';' |
1510: * '&#x' [0-9a-fA-F]+ ';'
1511: *
1.96 daniel 1512: * A PEReference may have been detectect in the current input stream
1513: * the handling is done accordingly to
1514: * http://www.w3.org/TR/REC-xml#entproc
1515: */
1516: void
1517: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1518: xmlParserInputPtr input;
1.123 daniel 1519: xmlChar *name;
1.97 daniel 1520: xmlEntityPtr ent = NULL;
1521:
1.126 daniel 1522: if (ctxt->token != 0) {
1523: return;
1524: }
1.152 daniel 1525: if (RAW != '&') return;
1.97 daniel 1526: GROW;
1.152 daniel 1527: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1528: switch(ctxt->instate) {
1.140 daniel 1529: case XML_PARSER_ENTITY_DECL:
1530: case XML_PARSER_PI:
1.109 daniel 1531: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1532: case XML_PARSER_COMMENT:
1.168 daniel 1533: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1534: /* we just ignore it there */
1535: return;
1536: case XML_PARSER_START_TAG:
1.109 daniel 1537: return;
1.140 daniel 1538: case XML_PARSER_END_TAG:
1.97 daniel 1539: return;
1540: case XML_PARSER_EOF:
1.123 daniel 1541: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1543: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1544: ctxt->wellFormed = 0;
1.180 daniel 1545: ctxt->disableSAX = 1;
1.97 daniel 1546: return;
1547: case XML_PARSER_PROLOG:
1.140 daniel 1548: case XML_PARSER_START:
1549: case XML_PARSER_MISC:
1.123 daniel 1550: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1552: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1553: ctxt->wellFormed = 0;
1.180 daniel 1554: ctxt->disableSAX = 1;
1.97 daniel 1555: return;
1556: case XML_PARSER_EPILOG:
1.123 daniel 1557: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1558: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1559: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1560: ctxt->wellFormed = 0;
1.180 daniel 1561: ctxt->disableSAX = 1;
1.97 daniel 1562: return;
1563: case XML_PARSER_DTD:
1.123 daniel 1564: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1566: ctxt->sax->error(ctxt->userData,
1567: "CharRef are forbiden in DTDs!\n");
1568: ctxt->wellFormed = 0;
1.180 daniel 1569: ctxt->disableSAX = 1;
1.97 daniel 1570: return;
1571: case XML_PARSER_ENTITY_VALUE:
1572: /*
1573: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1574: * substitution here since we need the literal
1.97 daniel 1575: * entity value to be able to save the internal
1576: * subset of the document.
1577: * This will be handled by xmlDecodeEntities
1578: */
1579: return;
1580: case XML_PARSER_CONTENT:
1581: case XML_PARSER_ATTRIBUTE_VALUE:
1582: ctxt->token = xmlParseCharRef(ctxt);
1583: return;
1584: }
1585: return;
1586: }
1587:
1588: switch(ctxt->instate) {
1.109 daniel 1589: case XML_PARSER_CDATA_SECTION:
1590: return;
1.140 daniel 1591: case XML_PARSER_PI:
1.97 daniel 1592: case XML_PARSER_COMMENT:
1.168 daniel 1593: case XML_PARSER_SYSTEM_LITERAL:
1594: case XML_PARSER_CONTENT:
1.97 daniel 1595: return;
1.140 daniel 1596: case XML_PARSER_START_TAG:
1597: return;
1598: case XML_PARSER_END_TAG:
1599: return;
1.97 daniel 1600: case XML_PARSER_EOF:
1.123 daniel 1601: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1604: ctxt->wellFormed = 0;
1.180 daniel 1605: ctxt->disableSAX = 1;
1.97 daniel 1606: return;
1607: case XML_PARSER_PROLOG:
1.140 daniel 1608: case XML_PARSER_START:
1609: case XML_PARSER_MISC:
1.123 daniel 1610: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1611: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1612: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1613: ctxt->wellFormed = 0;
1.180 daniel 1614: ctxt->disableSAX = 1;
1.97 daniel 1615: return;
1616: case XML_PARSER_EPILOG:
1.123 daniel 1617: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1619: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1620: ctxt->wellFormed = 0;
1.180 daniel 1621: ctxt->disableSAX = 1;
1.97 daniel 1622: return;
1623: case XML_PARSER_ENTITY_VALUE:
1624: /*
1625: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1626: * substitution here since we need the literal
1.97 daniel 1627: * entity value to be able to save the internal
1628: * subset of the document.
1629: * This will be handled by xmlDecodeEntities
1630: */
1631: return;
1632: case XML_PARSER_ATTRIBUTE_VALUE:
1633: /*
1634: * NOTE: in the case of attributes values, we don't do the
1635: * substitution here unless we are in a mode where
1636: * the parser is explicitely asked to substitute
1637: * entities. The SAX callback is called with values
1638: * without entity substitution.
1639: * This will then be handled by xmlDecodeEntities
1640: */
1.113 daniel 1641: return;
1.97 daniel 1642: case XML_PARSER_ENTITY_DECL:
1643: /*
1644: * we just ignore it there
1645: * the substitution will be done once the entity is referenced
1646: */
1647: return;
1648: case XML_PARSER_DTD:
1.123 daniel 1649: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651: ctxt->sax->error(ctxt->userData,
1652: "Entity references are forbiden in DTDs!\n");
1653: ctxt->wellFormed = 0;
1.180 daniel 1654: ctxt->disableSAX = 1;
1.97 daniel 1655: return;
1656: }
1657:
1658: NEXT;
1659: name = xmlScanName(ctxt);
1660: if (name == NULL) {
1.123 daniel 1661: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1663: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1664: ctxt->wellFormed = 0;
1.180 daniel 1665: ctxt->disableSAX = 1;
1.97 daniel 1666: ctxt->token = '&';
1667: return;
1668: }
1669: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1670: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1672: ctxt->sax->error(ctxt->userData,
1673: "Entity reference: ';' expected\n");
1674: ctxt->wellFormed = 0;
1.180 daniel 1675: ctxt->disableSAX = 1;
1.97 daniel 1676: ctxt->token = '&';
1.119 daniel 1677: xmlFree(name);
1.97 daniel 1678: return;
1679: }
1680: SKIP(xmlStrlen(name) + 1);
1681: if (ctxt->sax != NULL) {
1682: if (ctxt->sax->getEntity != NULL)
1683: ent = ctxt->sax->getEntity(ctxt->userData, name);
1684: }
1.98 daniel 1685:
1686: /*
1687: * [ WFC: Entity Declared ]
1688: * the Name given in the entity reference must match that in an entity
1689: * declaration, except that well-formed documents need not declare any
1690: * of the following entities: amp, lt, gt, apos, quot.
1691: */
1.97 daniel 1692: if (ent == NULL)
1693: ent = xmlGetPredefinedEntity(name);
1694: if (ent == NULL) {
1.123 daniel 1695: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697: ctxt->sax->error(ctxt->userData,
1.98 daniel 1698: "Entity reference: entity %s not declared\n",
1699: name);
1.97 daniel 1700: ctxt->wellFormed = 0;
1.180 daniel 1701: ctxt->disableSAX = 1;
1.119 daniel 1702: xmlFree(name);
1.97 daniel 1703: return;
1704: }
1.98 daniel 1705:
1706: /*
1707: * [ WFC: Parsed Entity ]
1708: * An entity reference must not contain the name of an unparsed entity
1709: */
1.159 daniel 1710: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1711: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1712: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1713: ctxt->sax->error(ctxt->userData,
1714: "Entity reference to unparsed entity %s\n", name);
1715: ctxt->wellFormed = 0;
1.180 daniel 1716: ctxt->disableSAX = 1;
1.98 daniel 1717: }
1718:
1.159 daniel 1719: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1720: ctxt->token = ent->content[0];
1.119 daniel 1721: xmlFree(name);
1.97 daniel 1722: return;
1723: }
1724: input = xmlNewEntityInputStream(ctxt, ent);
1725: xmlPushInput(ctxt, input);
1.119 daniel 1726: xmlFree(name);
1.96 daniel 1727: return;
1728: }
1729:
1730: /**
1731: * xmlParserHandlePEReference:
1732: * @ctxt: the parser context
1733: *
1734: * [69] PEReference ::= '%' Name ';'
1735: *
1.98 daniel 1736: * [ WFC: No Recursion ]
1737: * TODO A parsed entity must not contain a recursive
1738: * reference to itself, either directly or indirectly.
1739: *
1740: * [ WFC: Entity Declared ]
1741: * In a document without any DTD, a document with only an internal DTD
1742: * subset which contains no parameter entity references, or a document
1743: * with "standalone='yes'", ... ... The declaration of a parameter
1744: * entity must precede any reference to it...
1745: *
1746: * [ VC: Entity Declared ]
1747: * In a document with an external subset or external parameter entities
1748: * with "standalone='no'", ... ... The declaration of a parameter entity
1749: * must precede any reference to it...
1750: *
1751: * [ WFC: In DTD ]
1752: * Parameter-entity references may only appear in the DTD.
1753: * NOTE: misleading but this is handled.
1754: *
1755: * A PEReference may have been detected in the current input stream
1.96 daniel 1756: * the handling is done accordingly to
1757: * http://www.w3.org/TR/REC-xml#entproc
1758: * i.e.
1759: * - Included in literal in entity values
1760: * - Included as Paraemeter Entity reference within DTDs
1761: */
1762: void
1763: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1764: xmlChar *name;
1.96 daniel 1765: xmlEntityPtr entity = NULL;
1766: xmlParserInputPtr input;
1767:
1.126 daniel 1768: if (ctxt->token != 0) {
1769: return;
1770: }
1.152 daniel 1771: if (RAW != '%') return;
1.96 daniel 1772: switch(ctxt->instate) {
1.109 daniel 1773: case XML_PARSER_CDATA_SECTION:
1774: return;
1.97 daniel 1775: case XML_PARSER_COMMENT:
1776: return;
1.140 daniel 1777: case XML_PARSER_START_TAG:
1778: return;
1779: case XML_PARSER_END_TAG:
1780: return;
1.96 daniel 1781: case XML_PARSER_EOF:
1.123 daniel 1782: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1783: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1784: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1785: ctxt->wellFormed = 0;
1.180 daniel 1786: ctxt->disableSAX = 1;
1.96 daniel 1787: return;
1788: case XML_PARSER_PROLOG:
1.140 daniel 1789: case XML_PARSER_START:
1790: case XML_PARSER_MISC:
1.123 daniel 1791: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1793: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1794: ctxt->wellFormed = 0;
1.180 daniel 1795: ctxt->disableSAX = 1;
1.96 daniel 1796: return;
1.97 daniel 1797: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1798: case XML_PARSER_CONTENT:
1799: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1800: case XML_PARSER_PI:
1.168 daniel 1801: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1802: /* we just ignore it there */
1803: return;
1804: case XML_PARSER_EPILOG:
1.123 daniel 1805: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1807: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1808: ctxt->wellFormed = 0;
1.180 daniel 1809: ctxt->disableSAX = 1;
1.96 daniel 1810: return;
1.97 daniel 1811: case XML_PARSER_ENTITY_VALUE:
1812: /*
1813: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1814: * substitution here since we need the literal
1.97 daniel 1815: * entity value to be able to save the internal
1816: * subset of the document.
1817: * This will be handled by xmlDecodeEntities
1818: */
1819: return;
1.96 daniel 1820: case XML_PARSER_DTD:
1.98 daniel 1821: /*
1822: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1823: * In the internal DTD subset, parameter-entity references
1824: * can occur only where markup declarations can occur, not
1825: * within markup declarations.
1826: * In that case this is handled in xmlParseMarkupDecl
1827: */
1828: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1829: return;
1.96 daniel 1830: }
1831:
1832: NEXT;
1833: name = xmlParseName(ctxt);
1834: if (name == NULL) {
1.123 daniel 1835: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1837: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1838: ctxt->wellFormed = 0;
1.180 daniel 1839: ctxt->disableSAX = 1;
1.96 daniel 1840: } else {
1.152 daniel 1841: if (RAW == ';') {
1.96 daniel 1842: NEXT;
1.98 daniel 1843: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1844: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1845: if (entity == NULL) {
1.98 daniel 1846:
1847: /*
1848: * [ WFC: Entity Declared ]
1849: * In a document without any DTD, a document with only an
1850: * internal DTD subset which contains no parameter entity
1851: * references, or a document with "standalone='yes'", ...
1852: * ... The declaration of a parameter entity must precede
1853: * any reference to it...
1854: */
1855: if ((ctxt->standalone == 1) ||
1856: ((ctxt->hasExternalSubset == 0) &&
1857: (ctxt->hasPErefs == 0))) {
1858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1859: ctxt->sax->error(ctxt->userData,
1860: "PEReference: %%%s; not found\n", name);
1861: ctxt->wellFormed = 0;
1.180 daniel 1862: ctxt->disableSAX = 1;
1.98 daniel 1863: } else {
1864: /*
1865: * [ VC: Entity Declared ]
1866: * In a document with an external subset or external
1867: * parameter entities with "standalone='no'", ...
1868: * ... The declaration of a parameter entity must precede
1869: * any reference to it...
1870: */
1871: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1872: ctxt->sax->warning(ctxt->userData,
1873: "PEReference: %%%s; not found\n", name);
1874: ctxt->valid = 0;
1875: }
1.96 daniel 1876: } else {
1.159 daniel 1877: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1878: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1879: /*
1.156 daniel 1880: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1881: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1882: */
1883: input = xmlNewEntityInputStream(ctxt, entity);
1884: xmlPushInput(ctxt, input);
1.164 daniel 1885: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1886: (RAW == '<') && (NXT(1) == '?') &&
1887: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1888: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1889: xmlParseTextDecl(ctxt);
1.164 daniel 1890: }
1891: if (ctxt->token == 0)
1892: ctxt->token = ' ';
1.96 daniel 1893: } else {
1894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1895: ctxt->sax->error(ctxt->userData,
1896: "xmlHandlePEReference: %s is not a parameter entity\n",
1897: name);
1898: ctxt->wellFormed = 0;
1.180 daniel 1899: ctxt->disableSAX = 1;
1.96 daniel 1900: }
1901: }
1902: } else {
1.123 daniel 1903: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1904: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1905: ctxt->sax->error(ctxt->userData,
1906: "xmlHandlePEReference: expecting ';'\n");
1907: ctxt->wellFormed = 0;
1.180 daniel 1908: ctxt->disableSAX = 1;
1.96 daniel 1909: }
1.119 daniel 1910: xmlFree(name);
1.97 daniel 1911: }
1912: }
1913:
1914: /*
1915: * Macro used to grow the current buffer.
1916: */
1917: #define growBuffer(buffer) { \
1918: buffer##_size *= 2; \
1.145 daniel 1919: buffer = (xmlChar *) \
1920: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1921: if (buffer == NULL) { \
1922: perror("realloc failed"); \
1.145 daniel 1923: return(NULL); \
1.97 daniel 1924: } \
1.96 daniel 1925: }
1.77 daniel 1926:
1927: /**
1928: * xmlDecodeEntities:
1929: * @ctxt: the parser context
1930: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1931: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1932: * @end: an end marker xmlChar, 0 if none
1933: * @end2: an end marker xmlChar, 0 if none
1934: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1935: *
1936: * [67] Reference ::= EntityRef | CharRef
1937: *
1938: * [69] PEReference ::= '%' Name ';'
1939: *
1940: * Returns A newly allocated string with the substitution done. The caller
1941: * must deallocate it !
1942: */
1.123 daniel 1943: xmlChar *
1.77 daniel 1944: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1945: xmlChar end, xmlChar end2, xmlChar end3) {
1946: xmlChar *buffer = NULL;
1.78 daniel 1947: int buffer_size = 0;
1.161 daniel 1948: int nbchars = 0;
1.78 daniel 1949:
1.123 daniel 1950: xmlChar *current = NULL;
1.77 daniel 1951: xmlEntityPtr ent;
1952: unsigned int max = (unsigned int) len;
1.161 daniel 1953: int c,l;
1.77 daniel 1954:
1.185 daniel 1955: if (ctxt->depth > 40) {
1956: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1957: ctxt->sax->error(ctxt->userData,
1958: "Detected entity reference loop\n");
1959: ctxt->wellFormed = 0;
1960: ctxt->disableSAX = 1;
1961: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1962: return(NULL);
1963: }
1964:
1.77 daniel 1965: /*
1966: * allocate a translation buffer.
1967: */
1.140 daniel 1968: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 1969: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 1970: if (buffer == NULL) {
1971: perror("xmlDecodeEntities: malloc failed");
1972: return(NULL);
1973: }
1974:
1.78 daniel 1975: /*
1976: * Ok loop until we reach one of the ending char or a size limit.
1977: */
1.161 daniel 1978: c = CUR_CHAR(l);
1979: while ((nbchars < max) && (c != end) &&
1980: (c != end2) && (c != end3)) {
1.77 daniel 1981:
1.161 daniel 1982: if (c == 0) break;
1983: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 1984: int val = xmlParseCharRef(ctxt);
1.161 daniel 1985: COPY_BUF(0,buffer,nbchars,val);
1986: NEXTL(l);
1987: } else if ((c == '&') && (ctxt->token != '&') &&
1988: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 1989: ent = xmlParseEntityRef(ctxt);
1990: if ((ent != NULL) &&
1991: (ctxt->replaceEntities != 0)) {
1992: current = ent->content;
1993: while (*current != 0) {
1.161 daniel 1994: buffer[nbchars++] = *current++;
1995: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 1996: growBuffer(buffer);
1.77 daniel 1997: }
1998: }
1.98 daniel 1999: } else if (ent != NULL) {
1.123 daniel 2000: const xmlChar *cur = ent->name;
1.98 daniel 2001:
1.161 daniel 2002: buffer[nbchars++] = '&';
2003: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2004: growBuffer(buffer);
2005: }
1.161 daniel 2006: while (*cur != 0) {
2007: buffer[nbchars++] = *cur++;
2008: }
2009: buffer[nbchars++] = ';';
1.77 daniel 2010: }
1.161 daniel 2011: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2012: /*
1.77 daniel 2013: * a PEReference induce to switch the entity flow,
2014: * we break here to flush the current set of chars
2015: * parsed if any. We will be called back later.
1.97 daniel 2016: */
1.91 daniel 2017: if (nbchars != 0) break;
1.77 daniel 2018:
2019: xmlParsePEReference(ctxt);
1.79 daniel 2020:
1.97 daniel 2021: /*
1.79 daniel 2022: * Pop-up of finished entities.
1.97 daniel 2023: */
1.152 daniel 2024: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2025: xmlPopInput(ctxt);
2026:
1.98 daniel 2027: break;
1.77 daniel 2028: } else {
1.161 daniel 2029: COPY_BUF(l,buffer,nbchars,c);
2030: NEXTL(l);
2031: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2032: growBuffer(buffer);
2033: }
1.77 daniel 2034: }
1.161 daniel 2035: c = CUR_CHAR(l);
1.77 daniel 2036: }
1.161 daniel 2037: buffer[nbchars++] = 0;
1.77 daniel 2038: return(buffer);
2039: }
2040:
1.135 daniel 2041: /**
2042: * xmlStringDecodeEntities:
2043: * @ctxt: the parser context
2044: * @str: the input string
2045: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2046: * @end: an end marker xmlChar, 0 if none
2047: * @end2: an end marker xmlChar, 0 if none
2048: * @end3: an end marker xmlChar, 0 if none
2049: *
2050: * [67] Reference ::= EntityRef | CharRef
2051: *
2052: * [69] PEReference ::= '%' Name ';'
2053: *
2054: * Returns A newly allocated string with the substitution done. The caller
2055: * must deallocate it !
2056: */
2057: xmlChar *
2058: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2059: xmlChar end, xmlChar end2, xmlChar end3) {
2060: xmlChar *buffer = NULL;
2061: int buffer_size = 0;
2062:
2063: xmlChar *current = NULL;
2064: xmlEntityPtr ent;
1.176 daniel 2065: int c,l;
2066: int nbchars = 0;
1.135 daniel 2067:
1.185 daniel 2068: if (ctxt->depth > 40) {
2069: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2070: ctxt->sax->error(ctxt->userData,
2071: "Detected entity reference loop\n");
2072: ctxt->wellFormed = 0;
2073: ctxt->disableSAX = 1;
2074: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2075: return(NULL);
2076: }
2077:
1.135 daniel 2078: /*
2079: * allocate a translation buffer.
2080: */
1.140 daniel 2081: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2082: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2083: if (buffer == NULL) {
2084: perror("xmlDecodeEntities: malloc failed");
2085: return(NULL);
2086: }
2087:
2088: /*
2089: * Ok loop until we reach one of the ending char or a size limit.
2090: */
1.176 daniel 2091: c = CUR_SCHAR(str, l);
2092: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2093:
1.176 daniel 2094: if (c == 0) break;
2095: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2096: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2097: if (val != 0) {
2098: COPY_BUF(0,buffer,nbchars,val);
2099: }
2100: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2101: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2102: if ((ent != NULL) && (ent->content != NULL)) {
2103: xmlChar *rep;
2104:
2105: ctxt->depth++;
2106: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2107: 0, 0, 0);
2108: ctxt->depth--;
2109: if (rep != NULL) {
2110: current = rep;
2111: while (*current != 0) {
2112: buffer[nbchars++] = *current++;
2113: if (nbchars >
2114: buffer_size - XML_PARSER_BUFFER_SIZE) {
2115: growBuffer(buffer);
2116: }
1.135 daniel 2117: }
1.185 daniel 2118: xmlFree(rep);
1.135 daniel 2119: }
2120: } else if (ent != NULL) {
2121: int i = xmlStrlen(ent->name);
2122: const xmlChar *cur = ent->name;
2123:
1.176 daniel 2124: buffer[nbchars++] = '&';
2125: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2126: growBuffer(buffer);
2127: }
2128: for (;i > 0;i--)
1.176 daniel 2129: buffer[nbchars++] = *cur++;
2130: buffer[nbchars++] = ';';
1.135 daniel 2131: }
1.176 daniel 2132: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2133: ent = xmlParseStringPEReference(ctxt, &str);
2134: if (ent != NULL) {
1.185 daniel 2135: xmlChar *rep;
2136:
2137: ctxt->depth++;
2138: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2139: 0, 0, 0);
2140: ctxt->depth--;
2141: if (rep != NULL) {
2142: current = rep;
2143: while (*current != 0) {
2144: buffer[nbchars++] = *current++;
2145: if (nbchars >
2146: buffer_size - XML_PARSER_BUFFER_SIZE) {
2147: growBuffer(buffer);
2148: }
1.135 daniel 2149: }
1.185 daniel 2150: xmlFree(rep);
1.135 daniel 2151: }
2152: }
2153: } else {
1.176 daniel 2154: COPY_BUF(l,buffer,nbchars,c);
2155: str += l;
2156: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2157: growBuffer(buffer);
2158: }
2159: }
1.176 daniel 2160: c = CUR_SCHAR(str, l);
1.135 daniel 2161: }
1.176 daniel 2162: buffer[nbchars++] = 0;
1.135 daniel 2163: return(buffer);
2164: }
2165:
1.1 veillard 2166:
1.28 daniel 2167: /************************************************************************
2168: * *
1.75 daniel 2169: * Commodity functions to handle encodings *
2170: * *
2171: ************************************************************************/
2172:
1.172 daniel 2173: /*
2174: * xmlCheckLanguageID
2175: * @lang: pointer to the string value
2176: *
2177: * Checks that the value conforms to the LanguageID production:
2178: *
2179: * [33] LanguageID ::= Langcode ('-' Subcode)*
2180: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2181: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2182: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2183: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2184: * [38] Subcode ::= ([a-z] | [A-Z])+
2185: *
2186: * Returns 1 if correct 0 otherwise
2187: **/
2188: int
2189: xmlCheckLanguageID(const xmlChar *lang) {
2190: const xmlChar *cur = lang;
2191:
2192: if (cur == NULL)
2193: return(0);
2194: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2195: ((cur[0] == 'I') && (cur[1] == '-'))) {
2196: /*
2197: * IANA code
2198: */
2199: cur += 2;
2200: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2201: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2202: cur++;
2203: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2204: ((cur[0] == 'X') && (cur[1] == '-'))) {
2205: /*
2206: * User code
2207: */
2208: cur += 2;
2209: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2210: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2211: cur++;
2212: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2213: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2214: /*
2215: * ISO639
2216: */
2217: cur++;
2218: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2219: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2220: cur++;
2221: else
2222: return(0);
2223: } else
2224: return(0);
2225: while (cur[0] != 0) {
2226: if (cur[0] != '-')
2227: return(0);
2228: cur++;
2229: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2230: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2231: cur++;
2232: else
2233: return(0);
2234: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2235: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2236: cur++;
2237: }
2238: return(1);
2239: }
2240:
1.75 daniel 2241: /**
2242: * xmlSwitchEncoding:
2243: * @ctxt: the parser context
1.124 daniel 2244: * @enc: the encoding value (number)
1.75 daniel 2245: *
2246: * change the input functions when discovering the character encoding
2247: * of a given entity.
2248: */
2249: void
2250: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2251: {
1.156 daniel 2252: xmlCharEncodingHandlerPtr handler;
2253:
2254: handler = xmlGetCharEncodingHandler(enc);
2255: if (handler != NULL) {
2256: if (ctxt->input != NULL) {
2257: if (ctxt->input->buf != NULL) {
2258: if (ctxt->input->buf->encoder != NULL) {
2259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2260: ctxt->sax->error(ctxt->userData,
2261: "xmlSwitchEncoding : encoder already regitered\n");
2262: return;
2263: }
2264: ctxt->input->buf->encoder = handler;
2265:
2266: /*
2267: * Is there already some content down the pipe to convert
2268: */
2269: if ((ctxt->input->buf->buffer != NULL) &&
2270: (ctxt->input->buf->buffer->use > 0)) {
2271: xmlChar *buf;
2272: int res, len, size;
2273: int processed;
2274:
2275: /*
2276: * Specific handling of the Byte Order Mark for
2277: * UTF-16
2278: */
2279: if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
2280: (ctxt->input->cur[0] == 0xFF) &&
2281: (ctxt->input->cur[1] == 0xFE)) {
2282: SKIP(2);
2283: }
2284: if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
2285: (ctxt->input->cur[0] == 0xFE) &&
2286: (ctxt->input->cur[1] == 0xFF)) {
2287: SKIP(2);
2288: }
2289:
2290: /*
2291: * convert the non processed part
2292: */
2293: processed = ctxt->input->cur - ctxt->input->base;
2294: len = ctxt->input->buf->buffer->use - processed;
2295:
2296: if (len <= 0) {
2297: return;
2298: }
2299: size = ctxt->input->buf->buffer->use * 4;
2300: if (size < 4000)
2301: size = 4000;
1.167 daniel 2302: retry_larger:
1.160 daniel 2303: buf = (xmlChar *) xmlMalloc(size + 1);
1.156 daniel 2304: if (buf == NULL) {
2305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2306: ctxt->sax->error(ctxt->userData,
2307: "xmlSwitchEncoding : out of memory\n");
2308: return;
2309: }
1.160 daniel 2310: /* TODO !!! Handling of buf too small */
1.156 daniel 2311: res = handler->input(buf, size, ctxt->input->cur, &len);
1.167 daniel 2312: if (res == -1) {
2313: size *= 2;
2314: xmlFree(buf);
2315: goto retry_larger;
2316: }
1.156 daniel 2317: if ((res < 0) ||
2318: (len != ctxt->input->buf->buffer->use - processed)) {
2319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2320: ctxt->sax->error(ctxt->userData,
2321: "xmlSwitchEncoding : conversion failed\n");
2322: xmlFree(buf);
2323: return;
2324: }
1.167 daniel 2325:
1.156 daniel 2326: /*
2327: * Conversion succeeded, get rid of the old buffer
2328: */
2329: xmlFree(ctxt->input->buf->buffer->content);
2330: ctxt->input->buf->buffer->content = buf;
2331: ctxt->input->base = buf;
2332: ctxt->input->cur = buf;
2333: ctxt->input->buf->buffer->size = size;
2334: ctxt->input->buf->buffer->use = res;
1.160 daniel 2335: buf[res] = 0;
1.156 daniel 2336: }
2337: return;
2338: } else {
2339: if (ctxt->input->length == 0) {
2340: /*
2341: * When parsing a static memory array one must know the
2342: * size to be able to convert the buffer.
2343: */
2344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2345: ctxt->sax->error(ctxt->userData,
2346: "xmlSwitchEncoding : no input\n");
2347: return;
2348: } else {
2349: xmlChar *buf;
2350: int res, len;
2351: int processed = ctxt->input->cur - ctxt->input->base;
2352:
2353: /*
2354: * convert the non processed part
2355: */
2356: len = ctxt->input->length - processed;
2357: if (len <= 0) {
2358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2359: ctxt->sax->error(ctxt->userData,
2360: "xmlSwitchEncoding : input fully consumed?\n");
2361: return;
2362: }
2363: buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
2364: if (buf == NULL) {
2365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2366: ctxt->sax->error(ctxt->userData,
2367: "xmlSwitchEncoding : out of memory\n");
2368: return;
2369: }
2370: res = handler->input(buf, ctxt->input->length * 4,
2371: ctxt->input->cur, &len);
2372: if ((res < 0) ||
2373: (len != ctxt->input->length - processed)) {
2374: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2375: ctxt->sax->error(ctxt->userData,
2376: "xmlSwitchEncoding : conversion failed\n");
2377: xmlFree(buf);
2378: return;
2379: }
2380: /*
2381: * Conversion succeeded, get rid of the old buffer
2382: */
2383: if ((ctxt->input->free != NULL) &&
2384: (ctxt->input->base != NULL))
2385: ctxt->input->free((xmlChar *) ctxt->input->base);
2386: ctxt->input->base = ctxt->input->cur = buf;
2387: ctxt->input->length = res;
2388: }
2389: }
2390: } else {
2391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2392: ctxt->sax->error(ctxt->userData,
2393: "xmlSwitchEncoding : no input\n");
2394: }
2395: }
2396:
1.75 daniel 2397: switch (enc) {
2398: case XML_CHAR_ENCODING_ERROR:
1.123 daniel 2399: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
1.75 daniel 2400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2401: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2402: ctxt->wellFormed = 0;
1.180 daniel 2403: ctxt->disableSAX = 1;
1.75 daniel 2404: break;
2405: case XML_CHAR_ENCODING_NONE:
2406: /* let's assume it's UTF-8 without the XML decl */
2407: return;
2408: case XML_CHAR_ENCODING_UTF8:
2409: /* default encoding, no conversion should be needed */
2410: return;
2411: case XML_CHAR_ENCODING_UTF16LE:
1.123 daniel 2412: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2413: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2414: ctxt->sax->error(ctxt->userData,
2415: "char encoding UTF16 little endian not supported\n");
2416: break;
2417: case XML_CHAR_ENCODING_UTF16BE:
1.123 daniel 2418: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2420: ctxt->sax->error(ctxt->userData,
2421: "char encoding UTF16 big endian not supported\n");
2422: break;
2423: case XML_CHAR_ENCODING_UCS4LE:
1.123 daniel 2424: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2426: ctxt->sax->error(ctxt->userData,
2427: "char encoding USC4 little endian not supported\n");
2428: break;
2429: case XML_CHAR_ENCODING_UCS4BE:
1.123 daniel 2430: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2432: ctxt->sax->error(ctxt->userData,
2433: "char encoding USC4 big endian not supported\n");
2434: break;
2435: case XML_CHAR_ENCODING_EBCDIC:
1.123 daniel 2436: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2438: ctxt->sax->error(ctxt->userData,
2439: "char encoding EBCDIC not supported\n");
2440: break;
2441: case XML_CHAR_ENCODING_UCS4_2143:
1.123 daniel 2442: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2444: ctxt->sax->error(ctxt->userData,
2445: "char encoding UCS4 2143 not supported\n");
2446: break;
2447: case XML_CHAR_ENCODING_UCS4_3412:
1.123 daniel 2448: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2450: ctxt->sax->error(ctxt->userData,
2451: "char encoding UCS4 3412 not supported\n");
2452: break;
2453: case XML_CHAR_ENCODING_UCS2:
1.123 daniel 2454: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2456: ctxt->sax->error(ctxt->userData,
2457: "char encoding UCS2 not supported\n");
2458: break;
2459: case XML_CHAR_ENCODING_8859_1:
1.123 daniel 2460: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2461: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2462: ctxt->sax->error(ctxt->userData,
2463: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
2464: break;
2465: case XML_CHAR_ENCODING_8859_2:
1.123 daniel 2466: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2468: ctxt->sax->error(ctxt->userData,
2469: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
2470: break;
2471: case XML_CHAR_ENCODING_8859_3:
1.123 daniel 2472: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2474: ctxt->sax->error(ctxt->userData,
2475: "char encoding ISO_8859_3 not supported\n");
2476: break;
2477: case XML_CHAR_ENCODING_8859_4:
1.123 daniel 2478: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2480: ctxt->sax->error(ctxt->userData,
2481: "char encoding ISO_8859_4 not supported\n");
2482: break;
2483: case XML_CHAR_ENCODING_8859_5:
1.123 daniel 2484: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2486: ctxt->sax->error(ctxt->userData,
2487: "char encoding ISO_8859_5 not supported\n");
2488: break;
2489: case XML_CHAR_ENCODING_8859_6:
1.123 daniel 2490: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2492: ctxt->sax->error(ctxt->userData,
2493: "char encoding ISO_8859_6 not supported\n");
2494: break;
2495: case XML_CHAR_ENCODING_8859_7:
1.123 daniel 2496: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2498: ctxt->sax->error(ctxt->userData,
2499: "char encoding ISO_8859_7 not supported\n");
2500: break;
2501: case XML_CHAR_ENCODING_8859_8:
1.123 daniel 2502: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2503: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2504: ctxt->sax->error(ctxt->userData,
2505: "char encoding ISO_8859_8 not supported\n");
2506: break;
2507: case XML_CHAR_ENCODING_8859_9:
1.123 daniel 2508: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2510: ctxt->sax->error(ctxt->userData,
2511: "char encoding ISO_8859_9 not supported\n");
2512: break;
2513: case XML_CHAR_ENCODING_2022_JP:
1.123 daniel 2514: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2516: ctxt->sax->error(ctxt->userData,
2517: "char encoding ISO-2022-JPnot supported\n");
2518: break;
2519: case XML_CHAR_ENCODING_SHIFT_JIS:
1.123 daniel 2520: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2521: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2522: ctxt->sax->error(ctxt->userData,
2523: "char encoding Shift_JISnot supported\n");
2524: break;
2525: case XML_CHAR_ENCODING_EUC_JP:
1.123 daniel 2526: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.75 daniel 2527: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2528: ctxt->sax->error(ctxt->userData,
2529: "char encoding EUC-JPnot supported\n");
2530: break;
2531: }
2532: }
2533:
2534: /************************************************************************
2535: * *
1.123 daniel 2536: * Commodity functions to handle xmlChars *
1.28 daniel 2537: * *
2538: ************************************************************************/
2539:
1.50 daniel 2540: /**
2541: * xmlStrndup:
1.123 daniel 2542: * @cur: the input xmlChar *
1.50 daniel 2543: * @len: the len of @cur
2544: *
1.123 daniel 2545: * a strndup for array of xmlChar's
1.68 daniel 2546: *
1.123 daniel 2547: * Returns a new xmlChar * or NULL
1.1 veillard 2548: */
1.123 daniel 2549: xmlChar *
2550: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2551: xmlChar *ret;
2552:
2553: if ((cur == NULL) || (len < 0)) return(NULL);
2554: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2555: if (ret == NULL) {
1.86 daniel 2556: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2557: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2558: return(NULL);
2559: }
1.123 daniel 2560: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2561: ret[len] = 0;
2562: return(ret);
2563: }
2564:
1.50 daniel 2565: /**
2566: * xmlStrdup:
1.123 daniel 2567: * @cur: the input xmlChar *
1.50 daniel 2568: *
1.152 daniel 2569: * a strdup for array of xmlChar's. Since they are supposed to be
2570: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2571: * a termination mark of '0'.
1.68 daniel 2572: *
1.123 daniel 2573: * Returns a new xmlChar * or NULL
1.1 veillard 2574: */
1.123 daniel 2575: xmlChar *
2576: xmlStrdup(const xmlChar *cur) {
2577: const xmlChar *p = cur;
1.1 veillard 2578:
1.135 daniel 2579: if (cur == NULL) return(NULL);
1.152 daniel 2580: while (*p != 0) p++;
1.1 veillard 2581: return(xmlStrndup(cur, p - cur));
2582: }
2583:
1.50 daniel 2584: /**
2585: * xmlCharStrndup:
2586: * @cur: the input char *
2587: * @len: the len of @cur
2588: *
1.123 daniel 2589: * a strndup for char's to xmlChar's
1.68 daniel 2590: *
1.123 daniel 2591: * Returns a new xmlChar * or NULL
1.45 daniel 2592: */
2593:
1.123 daniel 2594: xmlChar *
1.55 daniel 2595: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2596: int i;
1.135 daniel 2597: xmlChar *ret;
2598:
2599: if ((cur == NULL) || (len < 0)) return(NULL);
2600: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2601: if (ret == NULL) {
1.86 daniel 2602: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2603: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2604: return(NULL);
2605: }
2606: for (i = 0;i < len;i++)
1.123 daniel 2607: ret[i] = (xmlChar) cur[i];
1.45 daniel 2608: ret[len] = 0;
2609: return(ret);
2610: }
2611:
1.50 daniel 2612: /**
2613: * xmlCharStrdup:
2614: * @cur: the input char *
2615: * @len: the len of @cur
2616: *
1.123 daniel 2617: * a strdup for char's to xmlChar's
1.68 daniel 2618: *
1.123 daniel 2619: * Returns a new xmlChar * or NULL
1.45 daniel 2620: */
2621:
1.123 daniel 2622: xmlChar *
1.55 daniel 2623: xmlCharStrdup(const char *cur) {
1.45 daniel 2624: const char *p = cur;
2625:
1.135 daniel 2626: if (cur == NULL) return(NULL);
1.45 daniel 2627: while (*p != '\0') p++;
2628: return(xmlCharStrndup(cur, p - cur));
2629: }
2630:
1.50 daniel 2631: /**
2632: * xmlStrcmp:
1.123 daniel 2633: * @str1: the first xmlChar *
2634: * @str2: the second xmlChar *
1.50 daniel 2635: *
1.123 daniel 2636: * a strcmp for xmlChar's
1.68 daniel 2637: *
2638: * Returns the integer result of the comparison
1.14 veillard 2639: */
2640:
1.55 daniel 2641: int
1.123 daniel 2642: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2643: register int tmp;
2644:
1.135 daniel 2645: if ((str1 == NULL) && (str2 == NULL)) return(0);
2646: if (str1 == NULL) return(-1);
2647: if (str2 == NULL) return(1);
1.14 veillard 2648: do {
2649: tmp = *str1++ - *str2++;
2650: if (tmp != 0) return(tmp);
2651: } while ((*str1 != 0) && (*str2 != 0));
2652: return (*str1 - *str2);
2653: }
2654:
1.50 daniel 2655: /**
2656: * xmlStrncmp:
1.123 daniel 2657: * @str1: the first xmlChar *
2658: * @str2: the second xmlChar *
1.50 daniel 2659: * @len: the max comparison length
2660: *
1.123 daniel 2661: * a strncmp for xmlChar's
1.68 daniel 2662: *
2663: * Returns the integer result of the comparison
1.14 veillard 2664: */
2665:
1.55 daniel 2666: int
1.123 daniel 2667: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2668: register int tmp;
2669:
2670: if (len <= 0) return(0);
1.135 daniel 2671: if ((str1 == NULL) && (str2 == NULL)) return(0);
2672: if (str1 == NULL) return(-1);
2673: if (str2 == NULL) return(1);
1.14 veillard 2674: do {
2675: tmp = *str1++ - *str2++;
2676: if (tmp != 0) return(tmp);
2677: len--;
2678: if (len <= 0) return(0);
2679: } while ((*str1 != 0) && (*str2 != 0));
2680: return (*str1 - *str2);
2681: }
2682:
1.50 daniel 2683: /**
2684: * xmlStrchr:
1.123 daniel 2685: * @str: the xmlChar * array
2686: * @val: the xmlChar to search
1.50 daniel 2687: *
1.123 daniel 2688: * a strchr for xmlChar's
1.68 daniel 2689: *
1.123 daniel 2690: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2691: */
2692:
1.123 daniel 2693: const xmlChar *
2694: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2695: if (str == NULL) return(NULL);
1.14 veillard 2696: while (*str != 0) {
1.123 daniel 2697: if (*str == val) return((xmlChar *) str);
1.14 veillard 2698: str++;
2699: }
2700: return(NULL);
1.89 daniel 2701: }
2702:
2703: /**
2704: * xmlStrstr:
1.123 daniel 2705: * @str: the xmlChar * array (haystack)
2706: * @val: the xmlChar to search (needle)
1.89 daniel 2707: *
1.123 daniel 2708: * a strstr for xmlChar's
1.89 daniel 2709: *
1.123 daniel 2710: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2711: */
2712:
1.123 daniel 2713: const xmlChar *
2714: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2715: int n;
2716:
2717: if (str == NULL) return(NULL);
2718: if (val == NULL) return(NULL);
2719: n = xmlStrlen(val);
2720:
2721: if (n == 0) return(str);
2722: while (*str != 0) {
2723: if (*str == *val) {
1.123 daniel 2724: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2725: }
2726: str++;
2727: }
2728: return(NULL);
2729: }
2730:
2731: /**
2732: * xmlStrsub:
1.123 daniel 2733: * @str: the xmlChar * array (haystack)
1.89 daniel 2734: * @start: the index of the first char (zero based)
2735: * @len: the length of the substring
2736: *
2737: * Extract a substring of a given string
2738: *
1.123 daniel 2739: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2740: */
2741:
1.123 daniel 2742: xmlChar *
2743: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2744: int i;
2745:
2746: if (str == NULL) return(NULL);
2747: if (start < 0) return(NULL);
1.90 daniel 2748: if (len < 0) return(NULL);
1.89 daniel 2749:
2750: for (i = 0;i < start;i++) {
2751: if (*str == 0) return(NULL);
2752: str++;
2753: }
2754: if (*str == 0) return(NULL);
2755: return(xmlStrndup(str, len));
1.14 veillard 2756: }
1.28 daniel 2757:
1.50 daniel 2758: /**
2759: * xmlStrlen:
1.123 daniel 2760: * @str: the xmlChar * array
1.50 daniel 2761: *
1.127 daniel 2762: * length of a xmlChar's string
1.68 daniel 2763: *
1.123 daniel 2764: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2765: */
2766:
1.55 daniel 2767: int
1.123 daniel 2768: xmlStrlen(const xmlChar *str) {
1.45 daniel 2769: int len = 0;
2770:
2771: if (str == NULL) return(0);
2772: while (*str != 0) {
2773: str++;
2774: len++;
2775: }
2776: return(len);
2777: }
2778:
1.50 daniel 2779: /**
2780: * xmlStrncat:
1.123 daniel 2781: * @cur: the original xmlChar * array
2782: * @add: the xmlChar * array added
1.50 daniel 2783: * @len: the length of @add
2784: *
1.123 daniel 2785: * a strncat for array of xmlChar's
1.68 daniel 2786: *
1.123 daniel 2787: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2788: */
2789:
1.123 daniel 2790: xmlChar *
2791: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2792: int size;
1.123 daniel 2793: xmlChar *ret;
1.45 daniel 2794:
2795: if ((add == NULL) || (len == 0))
2796: return(cur);
2797: if (cur == NULL)
2798: return(xmlStrndup(add, len));
2799:
2800: size = xmlStrlen(cur);
1.123 daniel 2801: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2802: if (ret == NULL) {
1.86 daniel 2803: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2804: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2805: return(cur);
2806: }
1.123 daniel 2807: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2808: ret[size + len] = 0;
2809: return(ret);
2810: }
2811:
1.50 daniel 2812: /**
2813: * xmlStrcat:
1.123 daniel 2814: * @cur: the original xmlChar * array
2815: * @add: the xmlChar * array added
1.50 daniel 2816: *
1.152 daniel 2817: * a strcat for array of xmlChar's. Since they are supposed to be
2818: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2819: * a termination mark of '0'.
1.68 daniel 2820: *
1.123 daniel 2821: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2822: */
1.123 daniel 2823: xmlChar *
2824: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2825: const xmlChar *p = add;
1.45 daniel 2826:
2827: if (add == NULL) return(cur);
2828: if (cur == NULL)
2829: return(xmlStrdup(add));
2830:
1.152 daniel 2831: while (*p != 0) p++;
1.45 daniel 2832: return(xmlStrncat(cur, add, p - add));
2833: }
2834:
2835: /************************************************************************
2836: * *
2837: * Commodity functions, cleanup needed ? *
2838: * *
2839: ************************************************************************/
2840:
1.50 daniel 2841: /**
2842: * areBlanks:
2843: * @ctxt: an XML parser context
1.123 daniel 2844: * @str: a xmlChar *
1.50 daniel 2845: * @len: the size of @str
2846: *
1.45 daniel 2847: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2848: *
1.68 daniel 2849: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2850: */
2851:
1.123 daniel 2852: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2853: int i, ret;
1.45 daniel 2854: xmlNodePtr lastChild;
2855:
1.176 daniel 2856: /*
2857: * Check for xml:space value.
2858: */
2859: if (*(ctxt->space) == 1)
2860: return(0);
2861:
2862: /*
2863: * Check that the string is made of blanks
2864: */
1.45 daniel 2865: for (i = 0;i < len;i++)
2866: if (!(IS_BLANK(str[i]))) return(0);
2867:
1.176 daniel 2868: /*
2869: * Look if the element is mixed content in the Dtd if available
2870: */
1.104 daniel 2871: if (ctxt->myDoc != NULL) {
2872: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2873: if (ret == 0) return(1);
2874: if (ret == 1) return(0);
2875: }
1.176 daniel 2876:
1.104 daniel 2877: /*
1.176 daniel 2878: * Otherwise, heuristic :-\
1.104 daniel 2879: */
1.179 daniel 2880: if (ctxt->keepBlanks)
2881: return(0);
2882: if (RAW != '<') return(0);
2883: if (ctxt->node == NULL) return(0);
2884: if ((ctxt->node->children == NULL) &&
2885: (RAW == '<') && (NXT(1) == '/')) return(0);
2886:
1.45 daniel 2887: lastChild = xmlGetLastChild(ctxt->node);
2888: if (lastChild == NULL) {
2889: if (ctxt->node->content != NULL) return(0);
2890: } else if (xmlNodeIsText(lastChild))
2891: return(0);
1.157 daniel 2892: else if ((ctxt->node->children != NULL) &&
2893: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2894: return(0);
1.45 daniel 2895: return(1);
2896: }
2897:
1.50 daniel 2898: /**
2899: * xmlHandleEntity:
2900: * @ctxt: an XML parser context
2901: * @entity: an XML entity pointer.
2902: *
2903: * Default handling of defined entities, when should we define a new input
1.45 daniel 2904: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2905: *
2906: * OBSOLETE: to be removed at some point.
1.45 daniel 2907: */
2908:
1.55 daniel 2909: void
2910: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2911: int len;
1.50 daniel 2912: xmlParserInputPtr input;
1.45 daniel 2913:
2914: if (entity->content == NULL) {
1.123 daniel 2915: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2917: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2918: entity->name);
1.59 daniel 2919: ctxt->wellFormed = 0;
1.180 daniel 2920: ctxt->disableSAX = 1;
1.45 daniel 2921: return;
2922: }
2923: len = xmlStrlen(entity->content);
2924: if (len <= 2) goto handle_as_char;
2925:
2926: /*
2927: * Redefine its content as an input stream.
2928: */
1.50 daniel 2929: input = xmlNewEntityInputStream(ctxt, entity);
2930: xmlPushInput(ctxt, input);
1.45 daniel 2931: return;
2932:
2933: handle_as_char:
2934: /*
2935: * Just handle the content as a set of chars.
2936: */
1.171 daniel 2937: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2938: (ctxt->sax->characters != NULL))
1.74 daniel 2939: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2940:
2941: }
2942:
2943: /*
2944: * Forward definition for recusive behaviour.
2945: */
1.77 daniel 2946: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2947: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2948:
1.28 daniel 2949: /************************************************************************
2950: * *
2951: * Extra stuff for namespace support *
2952: * Relates to http://www.w3.org/TR/WD-xml-names *
2953: * *
2954: ************************************************************************/
2955:
1.50 daniel 2956: /**
2957: * xmlNamespaceParseNCName:
2958: * @ctxt: an XML parser context
2959: *
2960: * parse an XML namespace name.
1.28 daniel 2961: *
2962: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2963: *
2964: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2965: * CombiningChar | Extender
1.68 daniel 2966: *
2967: * Returns the namespace name or NULL
1.28 daniel 2968: */
2969:
1.123 daniel 2970: xmlChar *
1.55 daniel 2971: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2972: xmlChar buf[XML_MAX_NAMELEN + 5];
2973: int len = 0, l;
2974: int cur = CUR_CHAR(l);
1.28 daniel 2975:
1.156 daniel 2976: /* load first the value of the char !!! */
1.152 daniel 2977: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2978:
1.152 daniel 2979: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2980: (cur == '.') || (cur == '-') ||
2981: (cur == '_') ||
2982: (IS_COMBINING(cur)) ||
2983: (IS_EXTENDER(cur))) {
2984: COPY_BUF(l,buf,len,cur);
2985: NEXTL(l);
2986: cur = CUR_CHAR(l);
1.91 daniel 2987: if (len >= XML_MAX_NAMELEN) {
2988: fprintf(stderr,
2989: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 2990: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2991: (cur == '.') || (cur == '-') ||
2992: (cur == '_') ||
2993: (IS_COMBINING(cur)) ||
2994: (IS_EXTENDER(cur))) {
2995: NEXTL(l);
2996: cur = CUR_CHAR(l);
2997: }
1.91 daniel 2998: break;
2999: }
3000: }
3001: return(xmlStrndup(buf, len));
1.28 daniel 3002: }
3003:
1.50 daniel 3004: /**
3005: * xmlNamespaceParseQName:
3006: * @ctxt: an XML parser context
1.123 daniel 3007: * @prefix: a xmlChar **
1.50 daniel 3008: *
3009: * parse an XML qualified name
1.28 daniel 3010: *
3011: * [NS 5] QName ::= (Prefix ':')? LocalPart
3012: *
3013: * [NS 6] Prefix ::= NCName
3014: *
3015: * [NS 7] LocalPart ::= NCName
1.68 daniel 3016: *
1.127 daniel 3017: * Returns the local part, and prefix is updated
1.50 daniel 3018: * to get the Prefix if any.
1.28 daniel 3019: */
3020:
1.123 daniel 3021: xmlChar *
3022: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3023: xmlChar *ret = NULL;
1.28 daniel 3024:
3025: *prefix = NULL;
3026: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3027: if (RAW == ':') {
1.28 daniel 3028: *prefix = ret;
1.40 daniel 3029: NEXT;
1.28 daniel 3030: ret = xmlNamespaceParseNCName(ctxt);
3031: }
3032:
3033: return(ret);
3034: }
3035:
1.50 daniel 3036: /**
1.72 daniel 3037: * xmlSplitQName:
1.162 daniel 3038: * @ctxt: an XML parser context
1.72 daniel 3039: * @name: an XML parser context
1.123 daniel 3040: * @prefix: a xmlChar **
1.72 daniel 3041: *
3042: * parse an XML qualified name string
3043: *
3044: * [NS 5] QName ::= (Prefix ':')? LocalPart
3045: *
3046: * [NS 6] Prefix ::= NCName
3047: *
3048: * [NS 7] LocalPart ::= NCName
3049: *
1.127 daniel 3050: * Returns the local part, and prefix is updated
1.72 daniel 3051: * to get the Prefix if any.
3052: */
3053:
1.123 daniel 3054: xmlChar *
1.162 daniel 3055: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3056: xmlChar buf[XML_MAX_NAMELEN + 5];
3057: int len = 0;
1.123 daniel 3058: xmlChar *ret = NULL;
3059: const xmlChar *cur = name;
1.162 daniel 3060: int c,l;
1.72 daniel 3061:
3062: *prefix = NULL;
1.113 daniel 3063:
3064: /* xml: prefix is not really a namespace */
3065: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3066: (cur[2] == 'l') && (cur[3] == ':'))
3067: return(xmlStrdup(name));
3068:
1.162 daniel 3069: /* nasty but valid */
3070: if (cur[0] == ':')
3071: return(xmlStrdup(name));
3072:
3073: c = CUR_SCHAR(cur, l);
3074: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3075:
1.162 daniel 3076: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3077: (c == '.') || (c == '-') ||
3078: (c == '_') ||
3079: (IS_COMBINING(c)) ||
3080: (IS_EXTENDER(c))) {
3081: COPY_BUF(l,buf,len,c);
3082: cur += l;
3083: c = CUR_SCHAR(cur, l);
3084: }
1.72 daniel 3085:
1.162 daniel 3086: ret = xmlStrndup(buf, len);
1.72 daniel 3087:
1.162 daniel 3088: if (c == ':') {
3089: cur += l;
1.163 daniel 3090: c = CUR_SCHAR(cur, l);
1.162 daniel 3091: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3092: *prefix = ret;
1.162 daniel 3093: len = 0;
1.72 daniel 3094:
1.162 daniel 3095: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3096: (c == '.') || (c == '-') ||
3097: (c == '_') ||
3098: (IS_COMBINING(c)) ||
3099: (IS_EXTENDER(c))) {
3100: COPY_BUF(l,buf,len,c);
3101: cur += l;
3102: c = CUR_SCHAR(cur, l);
3103: }
1.72 daniel 3104:
1.162 daniel 3105: ret = xmlStrndup(buf, len);
1.72 daniel 3106: }
3107:
3108: return(ret);
3109: }
3110: /**
1.50 daniel 3111: * xmlNamespaceParseNSDef:
3112: * @ctxt: an XML parser context
3113: *
3114: * parse a namespace prefix declaration
1.28 daniel 3115: *
3116: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3117: *
3118: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3119: *
3120: * Returns the namespace name
1.28 daniel 3121: */
3122:
1.123 daniel 3123: xmlChar *
1.55 daniel 3124: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3125: xmlChar *name = NULL;
1.28 daniel 3126:
1.152 daniel 3127: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3128: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3129: (NXT(4) == 's')) {
3130: SKIP(5);
1.152 daniel 3131: if (RAW == ':') {
1.40 daniel 3132: NEXT;
1.28 daniel 3133: name = xmlNamespaceParseNCName(ctxt);
3134: }
3135: }
1.39 daniel 3136: return(name);
1.28 daniel 3137: }
3138:
1.50 daniel 3139: /**
3140: * xmlParseQuotedString:
3141: * @ctxt: an XML parser context
3142: *
1.45 daniel 3143: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3144: * To be removed at next drop of binary compatibility
1.68 daniel 3145: *
3146: * Returns the string parser or NULL.
1.45 daniel 3147: */
1.123 daniel 3148: xmlChar *
1.55 daniel 3149: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3150: xmlChar *buf = NULL;
1.152 daniel 3151: int len = 0,l;
1.140 daniel 3152: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3153: int c;
1.45 daniel 3154:
1.135 daniel 3155: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3156: if (buf == NULL) {
3157: fprintf(stderr, "malloc of %d byte failed\n", size);
3158: return(NULL);
3159: }
1.152 daniel 3160: if (RAW == '"') {
1.45 daniel 3161: NEXT;
1.152 daniel 3162: c = CUR_CHAR(l);
1.135 daniel 3163: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3164: if (len + 5 >= size) {
1.135 daniel 3165: size *= 2;
3166: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3167: if (buf == NULL) {
3168: fprintf(stderr, "realloc of %d byte failed\n", size);
3169: return(NULL);
3170: }
3171: }
1.152 daniel 3172: COPY_BUF(l,buf,len,c);
3173: NEXTL(l);
3174: c = CUR_CHAR(l);
1.135 daniel 3175: }
3176: if (c != '"') {
1.123 daniel 3177: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3179: ctxt->sax->error(ctxt->userData,
3180: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3181: ctxt->wellFormed = 0;
1.180 daniel 3182: ctxt->disableSAX = 1;
1.55 daniel 3183: } else {
1.45 daniel 3184: NEXT;
3185: }
1.152 daniel 3186: } else if (RAW == '\''){
1.45 daniel 3187: NEXT;
1.135 daniel 3188: c = CUR;
3189: while (IS_CHAR(c) && (c != '\'')) {
3190: if (len + 1 >= size) {
3191: size *= 2;
3192: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3193: if (buf == NULL) {
3194: fprintf(stderr, "realloc of %d byte failed\n", size);
3195: return(NULL);
3196: }
3197: }
3198: buf[len++] = c;
3199: NEXT;
3200: c = CUR;
3201: }
1.152 daniel 3202: if (RAW != '\'') {
1.123 daniel 3203: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3205: ctxt->sax->error(ctxt->userData,
3206: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3207: ctxt->wellFormed = 0;
1.180 daniel 3208: ctxt->disableSAX = 1;
1.55 daniel 3209: } else {
1.45 daniel 3210: NEXT;
3211: }
3212: }
1.135 daniel 3213: return(buf);
1.45 daniel 3214: }
3215:
1.50 daniel 3216: /**
3217: * xmlParseNamespace:
3218: * @ctxt: an XML parser context
3219: *
1.45 daniel 3220: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3221: *
3222: * This is what the older xml-name Working Draft specified, a bunch of
3223: * other stuff may still rely on it, so support is still here as
1.127 daniel 3224: * if it was declared on the root of the Tree:-(
1.110 daniel 3225: *
3226: * To be removed at next drop of binary compatibility
1.45 daniel 3227: */
3228:
1.55 daniel 3229: void
3230: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3231: xmlChar *href = NULL;
3232: xmlChar *prefix = NULL;
1.45 daniel 3233: int garbage = 0;
3234:
3235: /*
3236: * We just skipped "namespace" or "xml:namespace"
3237: */
3238: SKIP_BLANKS;
3239:
1.153 daniel 3240: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3241: /*
3242: * We can have "ns" or "prefix" attributes
3243: * Old encoding as 'href' or 'AS' attributes is still supported
3244: */
1.152 daniel 3245: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3246: garbage = 0;
3247: SKIP(2);
3248: SKIP_BLANKS;
3249:
1.152 daniel 3250: if (RAW != '=') continue;
1.45 daniel 3251: NEXT;
3252: SKIP_BLANKS;
3253:
3254: href = xmlParseQuotedString(ctxt);
3255: SKIP_BLANKS;
1.152 daniel 3256: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3257: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3258: garbage = 0;
3259: SKIP(4);
3260: SKIP_BLANKS;
3261:
1.152 daniel 3262: if (RAW != '=') continue;
1.45 daniel 3263: NEXT;
3264: SKIP_BLANKS;
3265:
3266: href = xmlParseQuotedString(ctxt);
3267: SKIP_BLANKS;
1.152 daniel 3268: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3269: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3270: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3271: garbage = 0;
3272: SKIP(6);
3273: SKIP_BLANKS;
3274:
1.152 daniel 3275: if (RAW != '=') continue;
1.45 daniel 3276: NEXT;
3277: SKIP_BLANKS;
3278:
3279: prefix = xmlParseQuotedString(ctxt);
3280: SKIP_BLANKS;
1.152 daniel 3281: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3282: garbage = 0;
3283: SKIP(2);
3284: SKIP_BLANKS;
3285:
1.152 daniel 3286: if (RAW != '=') continue;
1.45 daniel 3287: NEXT;
3288: SKIP_BLANKS;
3289:
3290: prefix = xmlParseQuotedString(ctxt);
3291: SKIP_BLANKS;
1.152 daniel 3292: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3293: garbage = 0;
1.91 daniel 3294: NEXT;
1.45 daniel 3295: } else {
3296: /*
3297: * Found garbage when parsing the namespace
3298: */
1.122 daniel 3299: if (!garbage) {
1.55 daniel 3300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3301: ctxt->sax->error(ctxt->userData,
3302: "xmlParseNamespace found garbage\n");
3303: }
1.123 daniel 3304: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3305: ctxt->wellFormed = 0;
1.180 daniel 3306: ctxt->disableSAX = 1;
1.45 daniel 3307: NEXT;
3308: }
3309: }
3310:
3311: MOVETO_ENDTAG(CUR_PTR);
3312: NEXT;
3313:
3314: /*
3315: * Register the DTD.
1.72 daniel 3316: if (href != NULL)
3317: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3318: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3319: */
3320:
1.119 daniel 3321: if (prefix != NULL) xmlFree(prefix);
3322: if (href != NULL) xmlFree(href);
1.45 daniel 3323: }
3324:
1.28 daniel 3325: /************************************************************************
3326: * *
3327: * The parser itself *
3328: * Relates to http://www.w3.org/TR/REC-xml *
3329: * *
3330: ************************************************************************/
1.14 veillard 3331:
1.50 daniel 3332: /**
1.97 daniel 3333: * xmlScanName:
3334: * @ctxt: an XML parser context
3335: *
3336: * Trickery: parse an XML name but without consuming the input flow
3337: * Needed for rollback cases.
3338: *
3339: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3340: * CombiningChar | Extender
3341: *
3342: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3343: *
3344: * [6] Names ::= Name (S Name)*
3345: *
3346: * Returns the Name parsed or NULL
3347: */
3348:
1.123 daniel 3349: xmlChar *
1.97 daniel 3350: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3351: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3352: int len = 0;
3353:
3354: GROW;
1.152 daniel 3355: if (!IS_LETTER(RAW) && (RAW != '_') &&
3356: (RAW != ':')) {
1.97 daniel 3357: return(NULL);
3358: }
3359:
3360: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3361: (NXT(len) == '.') || (NXT(len) == '-') ||
3362: (NXT(len) == '_') || (NXT(len) == ':') ||
3363: (IS_COMBINING(NXT(len))) ||
3364: (IS_EXTENDER(NXT(len)))) {
3365: buf[len] = NXT(len);
3366: len++;
3367: if (len >= XML_MAX_NAMELEN) {
3368: fprintf(stderr,
3369: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3370: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3371: (NXT(len) == '.') || (NXT(len) == '-') ||
3372: (NXT(len) == '_') || (NXT(len) == ':') ||
3373: (IS_COMBINING(NXT(len))) ||
3374: (IS_EXTENDER(NXT(len))))
3375: len++;
3376: break;
3377: }
3378: }
3379: return(xmlStrndup(buf, len));
3380: }
3381:
3382: /**
1.50 daniel 3383: * xmlParseName:
3384: * @ctxt: an XML parser context
3385: *
3386: * parse an XML name.
1.22 daniel 3387: *
3388: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3389: * CombiningChar | Extender
3390: *
3391: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3392: *
3393: * [6] Names ::= Name (S Name)*
1.68 daniel 3394: *
3395: * Returns the Name parsed or NULL
1.1 veillard 3396: */
3397:
1.123 daniel 3398: xmlChar *
1.55 daniel 3399: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3400: xmlChar buf[XML_MAX_NAMELEN + 5];
3401: int len = 0, l;
3402: int c;
1.1 veillard 3403:
1.91 daniel 3404: GROW;
1.160 daniel 3405: c = CUR_CHAR(l);
3406: if (!IS_LETTER(c) && (c != '_') &&
3407: (c != ':')) {
1.91 daniel 3408: return(NULL);
3409: }
1.40 daniel 3410:
1.160 daniel 3411: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3412: (c == '.') || (c == '-') ||
3413: (c == '_') || (c == ':') ||
3414: (IS_COMBINING(c)) ||
3415: (IS_EXTENDER(c))) {
3416: COPY_BUF(l,buf,len,c);
3417: NEXTL(l);
3418: c = CUR_CHAR(l);
1.91 daniel 3419: if (len >= XML_MAX_NAMELEN) {
3420: fprintf(stderr,
3421: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3422: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3423: (c == '.') || (c == '-') ||
3424: (c == '_') || (c == ':') ||
3425: (IS_COMBINING(c)) ||
3426: (IS_EXTENDER(c))) {
3427: NEXTL(l);
3428: c = CUR_CHAR(l);
1.97 daniel 3429: }
1.91 daniel 3430: break;
3431: }
3432: }
3433: return(xmlStrndup(buf, len));
1.22 daniel 3434: }
3435:
1.50 daniel 3436: /**
1.135 daniel 3437: * xmlParseStringName:
3438: * @ctxt: an XML parser context
3439: * @str: a pointer to an index in the string
3440: *
3441: * parse an XML name.
3442: *
3443: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3444: * CombiningChar | Extender
3445: *
3446: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3447: *
3448: * [6] Names ::= Name (S Name)*
3449: *
3450: * Returns the Name parsed or NULL. The str pointer
3451: * is updated to the current location in the string.
3452: */
3453:
3454: xmlChar *
3455: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3456: xmlChar buf[XML_MAX_NAMELEN + 5];
3457: const xmlChar *cur = *str;
3458: int len = 0, l;
3459: int c;
1.135 daniel 3460:
1.176 daniel 3461: c = CUR_SCHAR(cur, l);
3462: if (!IS_LETTER(c) && (c != '_') &&
3463: (c != ':')) {
1.135 daniel 3464: return(NULL);
3465: }
3466:
1.176 daniel 3467: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3468: (c == '.') || (c == '-') ||
3469: (c == '_') || (c == ':') ||
3470: (IS_COMBINING(c)) ||
3471: (IS_EXTENDER(c))) {
3472: COPY_BUF(l,buf,len,c);
3473: cur += l;
3474: c = CUR_SCHAR(cur, l);
3475: if (len >= XML_MAX_NAMELEN) {
3476: fprintf(stderr,
3477: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3478: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3479: (c == '.') || (c == '-') ||
3480: (c == '_') || (c == ':') ||
3481: (IS_COMBINING(c)) ||
3482: (IS_EXTENDER(c))) {
3483: cur += l;
3484: c = CUR_SCHAR(cur, l);
3485: }
3486: break;
3487: }
1.135 daniel 3488: }
1.176 daniel 3489: *str = cur;
3490: return(xmlStrndup(buf, len));
1.135 daniel 3491: }
3492:
3493: /**
1.50 daniel 3494: * xmlParseNmtoken:
3495: * @ctxt: an XML parser context
3496: *
3497: * parse an XML Nmtoken.
1.22 daniel 3498: *
3499: * [7] Nmtoken ::= (NameChar)+
3500: *
3501: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3502: *
3503: * Returns the Nmtoken parsed or NULL
1.22 daniel 3504: */
3505:
1.123 daniel 3506: xmlChar *
1.55 daniel 3507: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3508: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3509: int len = 0;
1.160 daniel 3510: int c,l;
1.22 daniel 3511:
1.91 daniel 3512: GROW;
1.160 daniel 3513: c = CUR_CHAR(l);
3514: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3515: (c == '.') || (c == '-') ||
3516: (c == '_') || (c == ':') ||
3517: (IS_COMBINING(c)) ||
3518: (IS_EXTENDER(c))) {
3519: COPY_BUF(l,buf,len,c);
3520: NEXTL(l);
3521: c = CUR_CHAR(l);
1.91 daniel 3522: if (len >= XML_MAX_NAMELEN) {
3523: fprintf(stderr,
3524: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3525: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3526: (c == '.') || (c == '-') ||
3527: (c == '_') || (c == ':') ||
3528: (IS_COMBINING(c)) ||
3529: (IS_EXTENDER(c))) {
3530: NEXTL(l);
3531: c = CUR_CHAR(l);
3532: }
1.91 daniel 3533: break;
3534: }
3535: }
1.168 daniel 3536: if (len == 0)
3537: return(NULL);
1.91 daniel 3538: return(xmlStrndup(buf, len));
1.1 veillard 3539: }
3540:
1.50 daniel 3541: /**
3542: * xmlParseEntityValue:
3543: * @ctxt: an XML parser context
1.78 daniel 3544: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3545: *
3546: * parse a value for ENTITY decl.
1.24 daniel 3547: *
3548: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3549: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3550: *
1.78 daniel 3551: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3552: */
3553:
1.123 daniel 3554: xmlChar *
3555: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3556: xmlChar *buf = NULL;
3557: int len = 0;
1.140 daniel 3558: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3559: int c, l;
1.135 daniel 3560: xmlChar stop;
1.123 daniel 3561: xmlChar *ret = NULL;
1.176 daniel 3562: const xmlChar *cur = NULL;
1.98 daniel 3563: xmlParserInputPtr input;
1.24 daniel 3564:
1.152 daniel 3565: if (RAW == '"') stop = '"';
3566: else if (RAW == '\'') stop = '\'';
1.135 daniel 3567: else {
3568: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3569: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3570: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3571: ctxt->wellFormed = 0;
1.180 daniel 3572: ctxt->disableSAX = 1;
1.135 daniel 3573: return(NULL);
3574: }
3575: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3576: if (buf == NULL) {
3577: fprintf(stderr, "malloc of %d byte failed\n", size);
3578: return(NULL);
3579: }
1.94 daniel 3580:
1.135 daniel 3581: /*
3582: * The content of the entity definition is copied in a buffer.
3583: */
1.94 daniel 3584:
1.135 daniel 3585: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3586: input = ctxt->input;
3587: GROW;
3588: NEXT;
1.152 daniel 3589: c = CUR_CHAR(l);
1.135 daniel 3590: /*
3591: * NOTE: 4.4.5 Included in Literal
3592: * When a parameter entity reference appears in a literal entity
3593: * value, ... a single or double quote character in the replacement
3594: * text is always treated as a normal data character and will not
3595: * terminate the literal.
3596: * In practice it means we stop the loop only when back at parsing
3597: * the initial entity and the quote is found
3598: */
3599: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3600: if (len + 5 >= size) {
1.135 daniel 3601: size *= 2;
3602: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3603: if (buf == NULL) {
3604: fprintf(stderr, "realloc of %d byte failed\n", size);
3605: return(NULL);
1.94 daniel 3606: }
1.79 daniel 3607: }
1.152 daniel 3608: COPY_BUF(l,buf,len,c);
3609: NEXTL(l);
1.98 daniel 3610: /*
1.135 daniel 3611: * Pop-up of finished entities.
1.98 daniel 3612: */
1.152 daniel 3613: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3614: xmlPopInput(ctxt);
1.152 daniel 3615:
3616: c = CUR_CHAR(l);
1.135 daniel 3617: if (c == 0) {
1.94 daniel 3618: GROW;
1.152 daniel 3619: c = CUR_CHAR(l);
1.79 daniel 3620: }
1.135 daniel 3621: }
3622: buf[len] = 0;
3623:
3624: /*
1.176 daniel 3625: * Raise problem w.r.t. '&' and '%' being used in non-entities
3626: * reference constructs. Note Charref will be handled in
3627: * xmlStringDecodeEntities()
3628: */
3629: cur = buf;
3630: while (*cur != 0) {
3631: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3632: xmlChar *name;
3633: xmlChar tmp = *cur;
3634:
3635: cur++;
3636: name = xmlParseStringName(ctxt, &cur);
3637: if ((name == NULL) || (*cur != ';')) {
3638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3639: ctxt->sax->error(ctxt->userData,
3640: "EntityValue: '%c' forbidden except for entities references\n",
3641: tmp);
3642: ctxt->wellFormed = 0;
1.180 daniel 3643: ctxt->disableSAX = 1;
1.176 daniel 3644: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3645: }
3646: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3647: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3648: ctxt->sax->error(ctxt->userData,
3649: "EntityValue: PEReferences forbidden in internal subset\n",
3650: tmp);
3651: ctxt->wellFormed = 0;
1.180 daniel 3652: ctxt->disableSAX = 1;
1.176 daniel 3653: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3654: }
3655: if (name != NULL)
3656: xmlFree(name);
3657: }
3658: cur++;
3659: }
3660:
3661: /*
1.135 daniel 3662: * Then PEReference entities are substituted.
3663: */
3664: if (c != stop) {
3665: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3667: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3668: ctxt->wellFormed = 0;
1.180 daniel 3669: ctxt->disableSAX = 1;
1.170 daniel 3670: xmlFree(buf);
1.135 daniel 3671: } else {
3672: NEXT;
3673: /*
3674: * NOTE: 4.4.7 Bypassed
3675: * When a general entity reference appears in the EntityValue in
3676: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3677: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3678: */
3679: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3680: 0, 0, 0);
3681: if (orig != NULL)
3682: *orig = buf;
3683: else
3684: xmlFree(buf);
1.24 daniel 3685: }
3686:
3687: return(ret);
3688: }
3689:
1.50 daniel 3690: /**
3691: * xmlParseAttValue:
3692: * @ctxt: an XML parser context
3693: *
3694: * parse a value for an attribute
1.78 daniel 3695: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3696: * will be handled later in xmlStringGetNodeList
1.29 daniel 3697: *
3698: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3699: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3700: *
1.129 daniel 3701: * 3.3.3 Attribute-Value Normalization:
3702: * Before the value of an attribute is passed to the application or
3703: * checked for validity, the XML processor must normalize it as follows:
3704: * - a character reference is processed by appending the referenced
3705: * character to the attribute value
3706: * - an entity reference is processed by recursively processing the
3707: * replacement text of the entity
3708: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3709: * appending #x20 to the normalized value, except that only a single
3710: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3711: * parsed entity or the literal entity value of an internal parsed entity
3712: * - other characters are processed by appending them to the normalized value
1.130 daniel 3713: * If the declared value is not CDATA, then the XML processor must further
3714: * process the normalized attribute value by discarding any leading and
3715: * trailing space (#x20) characters, and by replacing sequences of space
3716: * (#x20) characters by a single space (#x20) character.
3717: * All attributes for which no declaration has been read should be treated
3718: * by a non-validating parser as if declared CDATA.
1.129 daniel 3719: *
3720: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3721: */
3722:
1.123 daniel 3723: xmlChar *
1.55 daniel 3724: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3725: xmlChar limit = 0;
3726: xmlChar *buffer = NULL;
3727: int buffer_size = 0;
3728: xmlChar *out = NULL;
3729:
3730: xmlChar *current = NULL;
3731: xmlEntityPtr ent;
3732: xmlChar cur;
3733:
1.29 daniel 3734:
1.91 daniel 3735: SHRINK;
1.151 daniel 3736: if (NXT(0) == '"') {
1.96 daniel 3737: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3738: limit = '"';
1.40 daniel 3739: NEXT;
1.151 daniel 3740: } else if (NXT(0) == '\'') {
1.129 daniel 3741: limit = '\'';
1.96 daniel 3742: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3743: NEXT;
1.29 daniel 3744: } else {
1.123 daniel 3745: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3747: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3748: ctxt->wellFormed = 0;
1.180 daniel 3749: ctxt->disableSAX = 1;
1.129 daniel 3750: return(NULL);
1.29 daniel 3751: }
3752:
1.129 daniel 3753: /*
3754: * allocate a translation buffer.
3755: */
1.140 daniel 3756: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3757: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3758: if (buffer == NULL) {
3759: perror("xmlParseAttValue: malloc failed");
3760: return(NULL);
3761: }
3762: out = buffer;
3763:
3764: /*
3765: * Ok loop until we reach one of the ending char or a size limit.
3766: */
3767: cur = CUR;
1.156 daniel 3768: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3769: if (cur == 0) break;
3770: if ((cur == '&') && (NXT(1) == '#')) {
3771: int val = xmlParseCharRef(ctxt);
3772: *out++ = val;
3773: } else if (cur == '&') {
3774: ent = xmlParseEntityRef(ctxt);
3775: if ((ent != NULL) &&
3776: (ctxt->replaceEntities != 0)) {
1.185 daniel 3777: xmlChar *rep;
3778:
1.186 daniel 3779: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3780: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3781: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3782: if (rep != NULL) {
3783: current = rep;
3784: while (*current != 0) {
3785: *out++ = *current++;
3786: if (out - buffer > buffer_size - 10) {
3787: int index = out - buffer;
1.129 daniel 3788:
1.186 daniel 3789: growBuffer(buffer);
3790: out = &buffer[index];
3791: }
1.185 daniel 3792: }
1.186 daniel 3793: xmlFree(rep);
1.129 daniel 3794: }
1.186 daniel 3795: } else {
3796: if (ent->content != NULL)
3797: *out++ = ent->content[0];
1.129 daniel 3798: }
3799: } else if (ent != NULL) {
3800: int i = xmlStrlen(ent->name);
3801: const xmlChar *cur = ent->name;
3802:
1.186 daniel 3803: /*
3804: * This may look absurd but is needed to detect
3805: * entities problems
3806: */
3807: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3808: xmlChar *rep;
3809: rep = xmlStringDecodeEntities(ctxt, ent->content,
3810: XML_SUBSTITUTE_REF, 0, 0, 0);
3811: if (rep != NULL)
3812: xmlFree(rep);
3813: }
3814:
3815: /*
3816: * Just output the reference
3817: */
1.129 daniel 3818: *out++ = '&';
3819: if (out - buffer > buffer_size - i - 10) {
3820: int index = out - buffer;
3821:
3822: growBuffer(buffer);
3823: out = &buffer[index];
3824: }
3825: for (;i > 0;i--)
3826: *out++ = *cur++;
3827: *out++ = ';';
3828: }
3829: } else {
1.156 daniel 3830: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3831: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3832: *out++ = 0x20;
3833: if (out - buffer > buffer_size - 10) {
3834: int index = out - buffer;
3835:
3836: growBuffer(buffer);
3837: out = &buffer[index];
1.129 daniel 3838: }
3839: } else {
3840: *out++ = cur;
3841: if (out - buffer > buffer_size - 10) {
3842: int index = out - buffer;
3843:
3844: growBuffer(buffer);
3845: out = &buffer[index];
3846: }
3847: }
3848: NEXT;
3849: }
3850: cur = CUR;
3851: }
3852: *out++ = 0;
1.152 daniel 3853: if (RAW == '<') {
1.129 daniel 3854: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3855: ctxt->sax->error(ctxt->userData,
3856: "Unescaped '<' not allowed in attributes values\n");
3857: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3858: ctxt->wellFormed = 0;
1.180 daniel 3859: ctxt->disableSAX = 1;
1.152 daniel 3860: } else if (RAW != limit) {
1.129 daniel 3861: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3862: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3863: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3864: ctxt->wellFormed = 0;
1.180 daniel 3865: ctxt->disableSAX = 1;
1.129 daniel 3866: } else
3867: NEXT;
3868: return(buffer);
1.29 daniel 3869: }
3870:
1.50 daniel 3871: /**
3872: * xmlParseSystemLiteral:
3873: * @ctxt: an XML parser context
3874: *
3875: * parse an XML Literal
1.21 daniel 3876: *
1.22 daniel 3877: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3878: *
3879: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3880: */
3881:
1.123 daniel 3882: xmlChar *
1.55 daniel 3883: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3884: xmlChar *buf = NULL;
3885: int len = 0;
1.140 daniel 3886: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3887: int cur, l;
1.135 daniel 3888: xmlChar stop;
1.168 daniel 3889: int state = ctxt->instate;
1.21 daniel 3890:
1.91 daniel 3891: SHRINK;
1.152 daniel 3892: if (RAW == '"') {
1.40 daniel 3893: NEXT;
1.135 daniel 3894: stop = '"';
1.152 daniel 3895: } else if (RAW == '\'') {
1.40 daniel 3896: NEXT;
1.135 daniel 3897: stop = '\'';
1.21 daniel 3898: } else {
1.55 daniel 3899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3900: ctxt->sax->error(ctxt->userData,
3901: "SystemLiteral \" or ' expected\n");
1.123 daniel 3902: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3903: ctxt->wellFormed = 0;
1.180 daniel 3904: ctxt->disableSAX = 1;
1.135 daniel 3905: return(NULL);
1.21 daniel 3906: }
3907:
1.135 daniel 3908: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3909: if (buf == NULL) {
3910: fprintf(stderr, "malloc of %d byte failed\n", size);
3911: return(NULL);
3912: }
1.168 daniel 3913: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3914: cur = CUR_CHAR(l);
1.135 daniel 3915: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3916: if (len + 5 >= size) {
1.135 daniel 3917: size *= 2;
3918: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3919: if (buf == NULL) {
3920: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3921: ctxt->instate = state;
1.135 daniel 3922: return(NULL);
3923: }
3924: }
1.152 daniel 3925: COPY_BUF(l,buf,len,cur);
3926: NEXTL(l);
3927: cur = CUR_CHAR(l);
1.135 daniel 3928: if (cur == 0) {
3929: GROW;
3930: SHRINK;
1.152 daniel 3931: cur = CUR_CHAR(l);
1.135 daniel 3932: }
3933: }
3934: buf[len] = 0;
1.168 daniel 3935: ctxt->instate = state;
1.135 daniel 3936: if (!IS_CHAR(cur)) {
3937: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3938: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3939: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3940: ctxt->wellFormed = 0;
1.180 daniel 3941: ctxt->disableSAX = 1;
1.135 daniel 3942: } else {
3943: NEXT;
3944: }
3945: return(buf);
1.21 daniel 3946: }
3947:
1.50 daniel 3948: /**
3949: * xmlParsePubidLiteral:
3950: * @ctxt: an XML parser context
1.21 daniel 3951: *
1.50 daniel 3952: * parse an XML public literal
1.68 daniel 3953: *
3954: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3955: *
3956: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3957: */
3958:
1.123 daniel 3959: xmlChar *
1.55 daniel 3960: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3961: xmlChar *buf = NULL;
3962: int len = 0;
1.140 daniel 3963: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3964: xmlChar cur;
3965: xmlChar stop;
1.125 daniel 3966:
1.91 daniel 3967: SHRINK;
1.152 daniel 3968: if (RAW == '"') {
1.40 daniel 3969: NEXT;
1.135 daniel 3970: stop = '"';
1.152 daniel 3971: } else if (RAW == '\'') {
1.40 daniel 3972: NEXT;
1.135 daniel 3973: stop = '\'';
1.21 daniel 3974: } else {
1.55 daniel 3975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3976: ctxt->sax->error(ctxt->userData,
3977: "SystemLiteral \" or ' expected\n");
1.123 daniel 3978: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3979: ctxt->wellFormed = 0;
1.180 daniel 3980: ctxt->disableSAX = 1;
1.135 daniel 3981: return(NULL);
3982: }
3983: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3984: if (buf == NULL) {
3985: fprintf(stderr, "malloc of %d byte failed\n", size);
3986: return(NULL);
3987: }
3988: cur = CUR;
3989: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
3990: if (len + 1 >= size) {
3991: size *= 2;
3992: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3993: if (buf == NULL) {
3994: fprintf(stderr, "realloc of %d byte failed\n", size);
3995: return(NULL);
3996: }
3997: }
3998: buf[len++] = cur;
3999: NEXT;
4000: cur = CUR;
4001: if (cur == 0) {
4002: GROW;
4003: SHRINK;
4004: cur = CUR;
4005: }
4006: }
4007: buf[len] = 0;
4008: if (cur != stop) {
4009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4010: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4011: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4012: ctxt->wellFormed = 0;
1.180 daniel 4013: ctxt->disableSAX = 1;
1.135 daniel 4014: } else {
4015: NEXT;
1.21 daniel 4016: }
1.135 daniel 4017: return(buf);
1.21 daniel 4018: }
4019:
1.50 daniel 4020: /**
4021: * xmlParseCharData:
4022: * @ctxt: an XML parser context
4023: * @cdata: int indicating whether we are within a CDATA section
4024: *
4025: * parse a CharData section.
4026: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4027: *
1.151 daniel 4028: * The right angle bracket (>) may be represented using the string ">",
4029: * and must, for compatibility, be escaped using ">" or a character
4030: * reference when it appears in the string "]]>" in content, when that
4031: * string is not marking the end of a CDATA section.
4032: *
1.27 daniel 4033: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4034: */
4035:
1.55 daniel 4036: void
4037: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4038: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4039: int nbchar = 0;
1.152 daniel 4040: int cur, l;
1.27 daniel 4041:
1.91 daniel 4042: SHRINK;
1.152 daniel 4043: cur = CUR_CHAR(l);
1.160 daniel 4044: while ((IS_CHAR(cur)) && ((cur != '<') || (ctxt->token == '<')) &&
1.153 daniel 4045: ((cur != '&') || (ctxt->token == '&'))) {
1.97 daniel 4046: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4047: (NXT(2) == '>')) {
4048: if (cdata) break;
4049: else {
4050: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4051: ctxt->sax->error(ctxt->userData,
1.59 daniel 4052: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4053: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4054: /* Should this be relaxed ??? I see a "must here */
4055: ctxt->wellFormed = 0;
1.180 daniel 4056: ctxt->disableSAX = 1;
1.59 daniel 4057: }
4058: }
1.152 daniel 4059: COPY_BUF(l,buf,nbchar,cur);
4060: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4061: /*
4062: * Ok the segment is to be consumed as chars.
4063: */
1.171 daniel 4064: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4065: if (areBlanks(ctxt, buf, nbchar)) {
4066: if (ctxt->sax->ignorableWhitespace != NULL)
4067: ctxt->sax->ignorableWhitespace(ctxt->userData,
4068: buf, nbchar);
4069: } else {
4070: if (ctxt->sax->characters != NULL)
4071: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4072: }
4073: }
4074: nbchar = 0;
4075: }
1.152 daniel 4076: NEXTL(l);
4077: cur = CUR_CHAR(l);
1.27 daniel 4078: }
1.91 daniel 4079: if (nbchar != 0) {
4080: /*
4081: * Ok the segment is to be consumed as chars.
4082: */
1.171 daniel 4083: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4084: if (areBlanks(ctxt, buf, nbchar)) {
4085: if (ctxt->sax->ignorableWhitespace != NULL)
4086: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4087: } else {
4088: if (ctxt->sax->characters != NULL)
4089: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4090: }
4091: }
1.45 daniel 4092: }
1.27 daniel 4093: }
4094:
1.50 daniel 4095: /**
4096: * xmlParseExternalID:
4097: * @ctxt: an XML parser context
1.123 daniel 4098: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4099: * @strict: indicate whether we should restrict parsing to only
4100: * production [75], see NOTE below
1.50 daniel 4101: *
1.67 daniel 4102: * Parse an External ID or a Public ID
4103: *
4104: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4105: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4106: *
4107: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4108: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4109: *
4110: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4111: *
1.68 daniel 4112: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4113: * case publicID receives PubidLiteral, is strict is off
4114: * it is possible to return NULL and have publicID set.
1.22 daniel 4115: */
4116:
1.123 daniel 4117: xmlChar *
4118: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4119: xmlChar *URI = NULL;
1.22 daniel 4120:
1.91 daniel 4121: SHRINK;
1.152 daniel 4122: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4123: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4124: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4125: SKIP(6);
1.59 daniel 4126: if (!IS_BLANK(CUR)) {
4127: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4128: ctxt->sax->error(ctxt->userData,
1.59 daniel 4129: "Space required after 'SYSTEM'\n");
1.123 daniel 4130: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4131: ctxt->wellFormed = 0;
1.180 daniel 4132: ctxt->disableSAX = 1;
1.59 daniel 4133: }
1.42 daniel 4134: SKIP_BLANKS;
1.39 daniel 4135: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4136: if (URI == NULL) {
1.55 daniel 4137: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4138: ctxt->sax->error(ctxt->userData,
1.39 daniel 4139: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4140: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4141: ctxt->wellFormed = 0;
1.180 daniel 4142: ctxt->disableSAX = 1;
1.59 daniel 4143: }
1.152 daniel 4144: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4145: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4146: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4147: SKIP(6);
1.59 daniel 4148: if (!IS_BLANK(CUR)) {
4149: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4150: ctxt->sax->error(ctxt->userData,
1.59 daniel 4151: "Space required after 'PUBLIC'\n");
1.123 daniel 4152: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4153: ctxt->wellFormed = 0;
1.180 daniel 4154: ctxt->disableSAX = 1;
1.59 daniel 4155: }
1.42 daniel 4156: SKIP_BLANKS;
1.39 daniel 4157: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4158: if (*publicID == NULL) {
1.55 daniel 4159: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4160: ctxt->sax->error(ctxt->userData,
1.39 daniel 4161: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4162: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4163: ctxt->wellFormed = 0;
1.180 daniel 4164: ctxt->disableSAX = 1;
1.59 daniel 4165: }
1.67 daniel 4166: if (strict) {
4167: /*
4168: * We don't handle [83] so "S SystemLiteral" is required.
4169: */
4170: if (!IS_BLANK(CUR)) {
4171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4172: ctxt->sax->error(ctxt->userData,
1.67 daniel 4173: "Space required after the Public Identifier\n");
1.123 daniel 4174: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4175: ctxt->wellFormed = 0;
1.180 daniel 4176: ctxt->disableSAX = 1;
1.67 daniel 4177: }
4178: } else {
4179: /*
4180: * We handle [83] so we return immediately, if
4181: * "S SystemLiteral" is not detected. From a purely parsing
4182: * point of view that's a nice mess.
4183: */
1.135 daniel 4184: const xmlChar *ptr;
4185: GROW;
4186:
4187: ptr = CUR_PTR;
1.67 daniel 4188: if (!IS_BLANK(*ptr)) return(NULL);
4189:
4190: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4191: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4192: }
1.42 daniel 4193: SKIP_BLANKS;
1.39 daniel 4194: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4195: if (URI == NULL) {
1.55 daniel 4196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4197: ctxt->sax->error(ctxt->userData,
1.39 daniel 4198: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4199: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4200: ctxt->wellFormed = 0;
1.180 daniel 4201: ctxt->disableSAX = 1;
1.59 daniel 4202: }
1.22 daniel 4203: }
1.39 daniel 4204: return(URI);
1.22 daniel 4205: }
4206:
1.50 daniel 4207: /**
4208: * xmlParseComment:
1.69 daniel 4209: * @ctxt: an XML parser context
1.50 daniel 4210: *
1.3 veillard 4211: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4212: * The spec says that "For compatibility, the string "--" (double-hyphen)
4213: * must not occur within comments. "
1.22 daniel 4214: *
4215: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4216: */
1.72 daniel 4217: void
1.114 daniel 4218: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4219: xmlChar *buf = NULL;
4220: int len = 0;
1.140 daniel 4221: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4222: int q, ql;
4223: int r, rl;
4224: int cur, l;
1.140 daniel 4225: xmlParserInputState state;
1.187 daniel 4226: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4227:
4228: /*
1.22 daniel 4229: * Check that there is a comment right here.
1.3 veillard 4230: */
1.152 daniel 4231: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4232: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4233:
1.140 daniel 4234: state = ctxt->instate;
1.97 daniel 4235: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4236: SHRINK;
1.40 daniel 4237: SKIP(4);
1.135 daniel 4238: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4239: if (buf == NULL) {
4240: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4241: ctxt->instate = state;
1.135 daniel 4242: return;
4243: }
1.152 daniel 4244: q = CUR_CHAR(ql);
4245: NEXTL(ql);
4246: r = CUR_CHAR(rl);
4247: NEXTL(rl);
4248: cur = CUR_CHAR(l);
1.135 daniel 4249: while (IS_CHAR(cur) &&
4250: ((cur != '>') ||
4251: (r != '-') || (q != '-'))) {
4252: if ((r == '-') && (q == '-')) {
1.55 daniel 4253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4254: ctxt->sax->error(ctxt->userData,
1.38 daniel 4255: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4256: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4257: ctxt->wellFormed = 0;
1.180 daniel 4258: ctxt->disableSAX = 1;
1.59 daniel 4259: }
1.152 daniel 4260: if (len + 5 >= size) {
1.135 daniel 4261: size *= 2;
4262: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4263: if (buf == NULL) {
4264: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4265: ctxt->instate = state;
1.135 daniel 4266: return;
4267: }
4268: }
1.152 daniel 4269: COPY_BUF(ql,buf,len,q);
1.135 daniel 4270: q = r;
1.152 daniel 4271: ql = rl;
1.135 daniel 4272: r = cur;
1.152 daniel 4273: rl = l;
4274: NEXTL(l);
4275: cur = CUR_CHAR(l);
1.135 daniel 4276: if (cur == 0) {
4277: SHRINK;
4278: GROW;
1.152 daniel 4279: cur = CUR_CHAR(l);
1.135 daniel 4280: }
1.3 veillard 4281: }
1.135 daniel 4282: buf[len] = 0;
4283: if (!IS_CHAR(cur)) {
1.55 daniel 4284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4285: ctxt->sax->error(ctxt->userData,
1.135 daniel 4286: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4287: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4288: ctxt->wellFormed = 0;
1.180 daniel 4289: ctxt->disableSAX = 1;
1.178 daniel 4290: xmlFree(buf);
1.3 veillard 4291: } else {
1.187 daniel 4292: if (input != ctxt->input) {
4293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4294: ctxt->sax->error(ctxt->userData,
4295: "Comment doesn't start and stop in the same entity\n");
4296: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4297: ctxt->wellFormed = 0;
4298: ctxt->disableSAX = 1;
4299: }
1.40 daniel 4300: NEXT;
1.171 daniel 4301: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4302: (!ctxt->disableSAX))
1.135 daniel 4303: ctxt->sax->comment(ctxt->userData, buf);
4304: xmlFree(buf);
1.3 veillard 4305: }
1.140 daniel 4306: ctxt->instate = state;
1.3 veillard 4307: }
4308:
1.50 daniel 4309: /**
4310: * xmlParsePITarget:
4311: * @ctxt: an XML parser context
4312: *
4313: * parse the name of a PI
1.22 daniel 4314: *
4315: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4316: *
4317: * Returns the PITarget name or NULL
1.22 daniel 4318: */
4319:
1.123 daniel 4320: xmlChar *
1.55 daniel 4321: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4322: xmlChar *name;
1.22 daniel 4323:
4324: name = xmlParseName(ctxt);
1.139 daniel 4325: if ((name != NULL) &&
1.22 daniel 4326: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4327: ((name[1] == 'm') || (name[1] == 'M')) &&
4328: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4329: int i;
1.177 daniel 4330: if ((name[0] == 'x') && (name[1] == 'm') &&
4331: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4333: ctxt->sax->error(ctxt->userData,
4334: "XML declaration allowed only at the start of the document\n");
4335: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4336: ctxt->wellFormed = 0;
1.180 daniel 4337: ctxt->disableSAX = 1;
1.151 daniel 4338: return(name);
4339: } else if (name[3] == 0) {
4340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4341: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4342: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4343: ctxt->wellFormed = 0;
1.180 daniel 4344: ctxt->disableSAX = 1;
1.151 daniel 4345: return(name);
4346: }
1.139 daniel 4347: for (i = 0;;i++) {
4348: if (xmlW3CPIs[i] == NULL) break;
4349: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4350: return(name);
4351: }
4352: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4353: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4354: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4355: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4356: }
1.22 daniel 4357: }
4358: return(name);
4359: }
4360:
1.50 daniel 4361: /**
4362: * xmlParsePI:
4363: * @ctxt: an XML parser context
4364: *
4365: * parse an XML Processing Instruction.
1.22 daniel 4366: *
4367: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4368: *
1.69 daniel 4369: * The processing is transfered to SAX once parsed.
1.3 veillard 4370: */
4371:
1.55 daniel 4372: void
4373: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4374: xmlChar *buf = NULL;
4375: int len = 0;
1.140 daniel 4376: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4377: int cur, l;
1.123 daniel 4378: xmlChar *target;
1.140 daniel 4379: xmlParserInputState state;
1.22 daniel 4380:
1.152 daniel 4381: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4382: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4383: state = ctxt->instate;
4384: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4385: /*
4386: * this is a Processing Instruction.
4387: */
1.40 daniel 4388: SKIP(2);
1.91 daniel 4389: SHRINK;
1.3 veillard 4390:
4391: /*
1.22 daniel 4392: * Parse the target name and check for special support like
4393: * namespace.
1.3 veillard 4394: */
1.22 daniel 4395: target = xmlParsePITarget(ctxt);
4396: if (target != NULL) {
1.156 daniel 4397: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4398: if (input != ctxt->input) {
4399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4400: ctxt->sax->error(ctxt->userData,
4401: "PI declaration doesn't start and stop in the same entity\n");
4402: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4403: ctxt->wellFormed = 0;
4404: ctxt->disableSAX = 1;
4405: }
1.156 daniel 4406: SKIP(2);
4407:
4408: /*
4409: * SAX: PI detected.
4410: */
1.171 daniel 4411: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4412: (ctxt->sax->processingInstruction != NULL))
4413: ctxt->sax->processingInstruction(ctxt->userData,
4414: target, NULL);
4415: ctxt->instate = state;
1.170 daniel 4416: xmlFree(target);
1.156 daniel 4417: return;
4418: }
1.135 daniel 4419: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4420: if (buf == NULL) {
4421: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4422: ctxt->instate = state;
1.135 daniel 4423: return;
4424: }
4425: cur = CUR;
4426: if (!IS_BLANK(cur)) {
1.114 daniel 4427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4428: ctxt->sax->error(ctxt->userData,
4429: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4430: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4431: ctxt->wellFormed = 0;
1.180 daniel 4432: ctxt->disableSAX = 1;
1.114 daniel 4433: }
4434: SKIP_BLANKS;
1.152 daniel 4435: cur = CUR_CHAR(l);
1.135 daniel 4436: while (IS_CHAR(cur) &&
4437: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4438: if (len + 5 >= size) {
1.135 daniel 4439: size *= 2;
4440: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4441: if (buf == NULL) {
4442: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4443: ctxt->instate = state;
1.135 daniel 4444: return;
4445: }
4446: }
1.152 daniel 4447: COPY_BUF(l,buf,len,cur);
4448: NEXTL(l);
4449: cur = CUR_CHAR(l);
1.135 daniel 4450: if (cur == 0) {
4451: SHRINK;
4452: GROW;
1.152 daniel 4453: cur = CUR_CHAR(l);
1.135 daniel 4454: }
4455: }
4456: buf[len] = 0;
1.152 daniel 4457: if (cur != '?') {
1.72 daniel 4458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4459: ctxt->sax->error(ctxt->userData,
1.72 daniel 4460: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4461: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4462: ctxt->wellFormed = 0;
1.180 daniel 4463: ctxt->disableSAX = 1;
1.22 daniel 4464: } else {
1.187 daniel 4465: if (input != ctxt->input) {
4466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4467: ctxt->sax->error(ctxt->userData,
4468: "PI declaration doesn't start and stop in the same entity\n");
4469: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4470: ctxt->wellFormed = 0;
4471: ctxt->disableSAX = 1;
4472: }
1.72 daniel 4473: SKIP(2);
1.44 daniel 4474:
1.72 daniel 4475: /*
4476: * SAX: PI detected.
4477: */
1.171 daniel 4478: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4479: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4480: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4481: target, buf);
1.22 daniel 4482: }
1.135 daniel 4483: xmlFree(buf);
1.119 daniel 4484: xmlFree(target);
1.3 veillard 4485: } else {
1.55 daniel 4486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4487: ctxt->sax->error(ctxt->userData,
4488: "xmlParsePI : no target name\n");
1.123 daniel 4489: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4490: ctxt->wellFormed = 0;
1.180 daniel 4491: ctxt->disableSAX = 1;
1.22 daniel 4492: }
1.140 daniel 4493: ctxt->instate = state;
1.22 daniel 4494: }
4495: }
4496:
1.50 daniel 4497: /**
4498: * xmlParseNotationDecl:
4499: * @ctxt: an XML parser context
4500: *
4501: * parse a notation declaration
1.22 daniel 4502: *
4503: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4504: *
4505: * Hence there is actually 3 choices:
4506: * 'PUBLIC' S PubidLiteral
4507: * 'PUBLIC' S PubidLiteral S SystemLiteral
4508: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4509: *
1.67 daniel 4510: * See the NOTE on xmlParseExternalID().
1.22 daniel 4511: */
4512:
1.55 daniel 4513: void
4514: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4515: xmlChar *name;
4516: xmlChar *Pubid;
4517: xmlChar *Systemid;
1.22 daniel 4518:
1.152 daniel 4519: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4520: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4521: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4522: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4523: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4524: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4525: SHRINK;
1.40 daniel 4526: SKIP(10);
1.67 daniel 4527: if (!IS_BLANK(CUR)) {
4528: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4529: ctxt->sax->error(ctxt->userData,
4530: "Space required after '<!NOTATION'\n");
1.123 daniel 4531: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4532: ctxt->wellFormed = 0;
1.180 daniel 4533: ctxt->disableSAX = 1;
1.67 daniel 4534: return;
4535: }
4536: SKIP_BLANKS;
1.22 daniel 4537:
4538: name = xmlParseName(ctxt);
4539: if (name == NULL) {
1.55 daniel 4540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4541: ctxt->sax->error(ctxt->userData,
4542: "NOTATION: Name expected here\n");
1.123 daniel 4543: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4544: ctxt->wellFormed = 0;
1.180 daniel 4545: ctxt->disableSAX = 1;
1.67 daniel 4546: return;
4547: }
4548: if (!IS_BLANK(CUR)) {
4549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4550: ctxt->sax->error(ctxt->userData,
1.67 daniel 4551: "Space required after the NOTATION name'\n");
1.123 daniel 4552: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4553: ctxt->wellFormed = 0;
1.180 daniel 4554: ctxt->disableSAX = 1;
1.22 daniel 4555: return;
4556: }
1.42 daniel 4557: SKIP_BLANKS;
1.67 daniel 4558:
1.22 daniel 4559: /*
1.67 daniel 4560: * Parse the IDs.
1.22 daniel 4561: */
1.160 daniel 4562: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4563: SKIP_BLANKS;
4564:
1.152 daniel 4565: if (RAW == '>') {
1.187 daniel 4566: if (input != ctxt->input) {
4567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4568: ctxt->sax->error(ctxt->userData,
4569: "Notation declaration doesn't start and stop in the same entity\n");
4570: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4571: ctxt->wellFormed = 0;
4572: ctxt->disableSAX = 1;
4573: }
1.40 daniel 4574: NEXT;
1.171 daniel 4575: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4576: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4577: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4578: } else {
4579: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4580: ctxt->sax->error(ctxt->userData,
1.67 daniel 4581: "'>' required to close NOTATION declaration\n");
1.123 daniel 4582: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4583: ctxt->wellFormed = 0;
1.180 daniel 4584: ctxt->disableSAX = 1;
1.67 daniel 4585: }
1.119 daniel 4586: xmlFree(name);
4587: if (Systemid != NULL) xmlFree(Systemid);
4588: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4589: }
4590: }
4591:
1.50 daniel 4592: /**
4593: * xmlParseEntityDecl:
4594: * @ctxt: an XML parser context
4595: *
4596: * parse <!ENTITY declarations
1.22 daniel 4597: *
4598: * [70] EntityDecl ::= GEDecl | PEDecl
4599: *
4600: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4601: *
4602: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4603: *
4604: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4605: *
4606: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4607: *
4608: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4609: *
4610: * [ VC: Notation Declared ]
1.116 daniel 4611: * The Name must match the declared name of a notation.
1.22 daniel 4612: */
4613:
1.55 daniel 4614: void
4615: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4616: xmlChar *name = NULL;
4617: xmlChar *value = NULL;
4618: xmlChar *URI = NULL, *literal = NULL;
4619: xmlChar *ndata = NULL;
1.39 daniel 4620: int isParameter = 0;
1.123 daniel 4621: xmlChar *orig = NULL;
1.22 daniel 4622:
1.94 daniel 4623: GROW;
1.152 daniel 4624: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4625: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4626: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4627: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4628: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4629: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4630: SHRINK;
1.40 daniel 4631: SKIP(8);
1.59 daniel 4632: if (!IS_BLANK(CUR)) {
4633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4634: ctxt->sax->error(ctxt->userData,
4635: "Space required after '<!ENTITY'\n");
1.123 daniel 4636: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4637: ctxt->wellFormed = 0;
1.180 daniel 4638: ctxt->disableSAX = 1;
1.59 daniel 4639: }
4640: SKIP_BLANKS;
1.40 daniel 4641:
1.152 daniel 4642: if (RAW == '%') {
1.40 daniel 4643: NEXT;
1.59 daniel 4644: if (!IS_BLANK(CUR)) {
4645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4646: ctxt->sax->error(ctxt->userData,
4647: "Space required after '%'\n");
1.123 daniel 4648: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4649: ctxt->wellFormed = 0;
1.180 daniel 4650: ctxt->disableSAX = 1;
1.59 daniel 4651: }
1.42 daniel 4652: SKIP_BLANKS;
1.39 daniel 4653: isParameter = 1;
1.22 daniel 4654: }
4655:
4656: name = xmlParseName(ctxt);
1.24 daniel 4657: if (name == NULL) {
1.55 daniel 4658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4659: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4660: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4661: ctxt->wellFormed = 0;
1.180 daniel 4662: ctxt->disableSAX = 1;
1.24 daniel 4663: return;
4664: }
1.59 daniel 4665: if (!IS_BLANK(CUR)) {
4666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4667: ctxt->sax->error(ctxt->userData,
1.59 daniel 4668: "Space required after the entity name\n");
1.123 daniel 4669: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4670: ctxt->wellFormed = 0;
1.180 daniel 4671: ctxt->disableSAX = 1;
1.59 daniel 4672: }
1.42 daniel 4673: SKIP_BLANKS;
1.24 daniel 4674:
1.22 daniel 4675: /*
1.68 daniel 4676: * handle the various case of definitions...
1.22 daniel 4677: */
1.39 daniel 4678: if (isParameter) {
1.152 daniel 4679: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4680: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4681: if (value) {
1.171 daniel 4682: if ((ctxt->sax != NULL) &&
4683: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4684: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4685: XML_INTERNAL_PARAMETER_ENTITY,
4686: NULL, NULL, value);
4687: }
1.24 daniel 4688: else {
1.67 daniel 4689: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4690: if ((URI == NULL) && (literal == NULL)) {
4691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4692: ctxt->sax->error(ctxt->userData,
4693: "Entity value required\n");
4694: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4695: ctxt->wellFormed = 0;
1.180 daniel 4696: ctxt->disableSAX = 1;
1.169 daniel 4697: }
1.39 daniel 4698: if (URI) {
1.171 daniel 4699: if ((ctxt->sax != NULL) &&
4700: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4701: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4702: XML_EXTERNAL_PARAMETER_ENTITY,
4703: literal, URI, NULL);
4704: }
1.24 daniel 4705: }
4706: } else {
1.152 daniel 4707: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4708: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4709: if ((ctxt->sax != NULL) &&
4710: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4711: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4712: XML_INTERNAL_GENERAL_ENTITY,
4713: NULL, NULL, value);
4714: } else {
1.67 daniel 4715: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4716: if ((URI == NULL) && (literal == NULL)) {
4717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4718: ctxt->sax->error(ctxt->userData,
4719: "Entity value required\n");
4720: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4721: ctxt->wellFormed = 0;
1.180 daniel 4722: ctxt->disableSAX = 1;
1.169 daniel 4723: }
1.152 daniel 4724: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4726: ctxt->sax->error(ctxt->userData,
1.59 daniel 4727: "Space required before 'NDATA'\n");
1.123 daniel 4728: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4729: ctxt->wellFormed = 0;
1.180 daniel 4730: ctxt->disableSAX = 1;
1.59 daniel 4731: }
1.42 daniel 4732: SKIP_BLANKS;
1.152 daniel 4733: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4734: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4735: (NXT(4) == 'A')) {
4736: SKIP(5);
1.59 daniel 4737: if (!IS_BLANK(CUR)) {
4738: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4739: ctxt->sax->error(ctxt->userData,
1.59 daniel 4740: "Space required after 'NDATA'\n");
1.123 daniel 4741: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4742: ctxt->wellFormed = 0;
1.180 daniel 4743: ctxt->disableSAX = 1;
1.59 daniel 4744: }
1.42 daniel 4745: SKIP_BLANKS;
1.24 daniel 4746: ndata = xmlParseName(ctxt);
1.171 daniel 4747: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4748: (ctxt->sax->unparsedEntityDecl != NULL))
4749: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4750: literal, URI, ndata);
4751: } else {
1.171 daniel 4752: if ((ctxt->sax != NULL) &&
4753: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4754: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4755: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4756: literal, URI, NULL);
1.24 daniel 4757: }
4758: }
4759: }
1.42 daniel 4760: SKIP_BLANKS;
1.152 daniel 4761: if (RAW != '>') {
1.55 daniel 4762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4763: ctxt->sax->error(ctxt->userData,
1.31 daniel 4764: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4765: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4766: ctxt->wellFormed = 0;
1.180 daniel 4767: ctxt->disableSAX = 1;
1.187 daniel 4768: } else {
4769: if (input != ctxt->input) {
4770: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4771: ctxt->sax->error(ctxt->userData,
4772: "Entity declaration doesn't start and stop in the same entity\n");
4773: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4774: ctxt->wellFormed = 0;
4775: ctxt->disableSAX = 1;
4776: }
1.40 daniel 4777: NEXT;
1.187 daniel 4778: }
1.78 daniel 4779: if (orig != NULL) {
4780: /*
1.98 daniel 4781: * Ugly mechanism to save the raw entity value.
1.78 daniel 4782: */
4783: xmlEntityPtr cur = NULL;
4784:
1.98 daniel 4785: if (isParameter) {
4786: if ((ctxt->sax != NULL) &&
4787: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4788: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4789: } else {
4790: if ((ctxt->sax != NULL) &&
4791: (ctxt->sax->getEntity != NULL))
1.120 daniel 4792: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4793: }
4794: if (cur != NULL) {
4795: if (cur->orig != NULL)
1.119 daniel 4796: xmlFree(orig);
1.98 daniel 4797: else
4798: cur->orig = orig;
4799: } else
1.119 daniel 4800: xmlFree(orig);
1.78 daniel 4801: }
1.119 daniel 4802: if (name != NULL) xmlFree(name);
4803: if (value != NULL) xmlFree(value);
4804: if (URI != NULL) xmlFree(URI);
4805: if (literal != NULL) xmlFree(literal);
4806: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4807: }
4808: }
4809:
1.50 daniel 4810: /**
1.59 daniel 4811: * xmlParseDefaultDecl:
4812: * @ctxt: an XML parser context
4813: * @value: Receive a possible fixed default value for the attribute
4814: *
4815: * Parse an attribute default declaration
4816: *
4817: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4818: *
1.99 daniel 4819: * [ VC: Required Attribute ]
1.117 daniel 4820: * if the default declaration is the keyword #REQUIRED, then the
4821: * attribute must be specified for all elements of the type in the
4822: * attribute-list declaration.
1.99 daniel 4823: *
4824: * [ VC: Attribute Default Legal ]
1.102 daniel 4825: * The declared default value must meet the lexical constraints of
4826: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4827: *
4828: * [ VC: Fixed Attribute Default ]
1.117 daniel 4829: * if an attribute has a default value declared with the #FIXED
4830: * keyword, instances of that attribute must match the default value.
1.99 daniel 4831: *
4832: * [ WFC: No < in Attribute Values ]
4833: * handled in xmlParseAttValue()
4834: *
1.59 daniel 4835: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4836: * or XML_ATTRIBUTE_FIXED.
4837: */
4838:
4839: int
1.123 daniel 4840: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4841: int val;
1.123 daniel 4842: xmlChar *ret;
1.59 daniel 4843:
4844: *value = NULL;
1.152 daniel 4845: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4846: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4847: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4848: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4849: (NXT(8) == 'D')) {
4850: SKIP(9);
4851: return(XML_ATTRIBUTE_REQUIRED);
4852: }
1.152 daniel 4853: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4854: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4855: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4856: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4857: SKIP(8);
4858: return(XML_ATTRIBUTE_IMPLIED);
4859: }
4860: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4861: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4862: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4863: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4864: SKIP(6);
4865: val = XML_ATTRIBUTE_FIXED;
4866: if (!IS_BLANK(CUR)) {
4867: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4868: ctxt->sax->error(ctxt->userData,
4869: "Space required after '#FIXED'\n");
1.123 daniel 4870: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4871: ctxt->wellFormed = 0;
1.180 daniel 4872: ctxt->disableSAX = 1;
1.59 daniel 4873: }
4874: SKIP_BLANKS;
4875: }
4876: ret = xmlParseAttValue(ctxt);
1.96 daniel 4877: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4878: if (ret == NULL) {
4879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4880: ctxt->sax->error(ctxt->userData,
1.59 daniel 4881: "Attribute default value declaration error\n");
4882: ctxt->wellFormed = 0;
1.180 daniel 4883: ctxt->disableSAX = 1;
1.59 daniel 4884: } else
4885: *value = ret;
4886: return(val);
4887: }
4888:
4889: /**
1.66 daniel 4890: * xmlParseNotationType:
4891: * @ctxt: an XML parser context
4892: *
4893: * parse an Notation attribute type.
4894: *
1.99 daniel 4895: * Note: the leading 'NOTATION' S part has already being parsed...
4896: *
1.66 daniel 4897: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4898: *
1.99 daniel 4899: * [ VC: Notation Attributes ]
1.117 daniel 4900: * Values of this type must match one of the notation names included
1.99 daniel 4901: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4902: *
4903: * Returns: the notation attribute tree built while parsing
4904: */
4905:
4906: xmlEnumerationPtr
4907: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4908: xmlChar *name;
1.66 daniel 4909: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4910:
1.152 daniel 4911: if (RAW != '(') {
1.66 daniel 4912: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4913: ctxt->sax->error(ctxt->userData,
4914: "'(' required to start 'NOTATION'\n");
1.123 daniel 4915: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4916: ctxt->wellFormed = 0;
1.180 daniel 4917: ctxt->disableSAX = 1;
1.66 daniel 4918: return(NULL);
4919: }
1.91 daniel 4920: SHRINK;
1.66 daniel 4921: do {
4922: NEXT;
4923: SKIP_BLANKS;
4924: name = xmlParseName(ctxt);
4925: if (name == NULL) {
4926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4927: ctxt->sax->error(ctxt->userData,
1.66 daniel 4928: "Name expected in NOTATION declaration\n");
1.123 daniel 4929: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4930: ctxt->wellFormed = 0;
1.180 daniel 4931: ctxt->disableSAX = 1;
1.66 daniel 4932: return(ret);
4933: }
4934: cur = xmlCreateEnumeration(name);
1.119 daniel 4935: xmlFree(name);
1.66 daniel 4936: if (cur == NULL) return(ret);
4937: if (last == NULL) ret = last = cur;
4938: else {
4939: last->next = cur;
4940: last = cur;
4941: }
4942: SKIP_BLANKS;
1.152 daniel 4943: } while (RAW == '|');
4944: if (RAW != ')') {
1.66 daniel 4945: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4946: ctxt->sax->error(ctxt->userData,
1.66 daniel 4947: "')' required to finish NOTATION declaration\n");
1.123 daniel 4948: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 4949: ctxt->wellFormed = 0;
1.180 daniel 4950: ctxt->disableSAX = 1;
1.170 daniel 4951: if ((last != NULL) && (last != ret))
4952: xmlFreeEnumeration(last);
1.66 daniel 4953: return(ret);
4954: }
4955: NEXT;
4956: return(ret);
4957: }
4958:
4959: /**
4960: * xmlParseEnumerationType:
4961: * @ctxt: an XML parser context
4962: *
4963: * parse an Enumeration attribute type.
4964: *
4965: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4966: *
1.99 daniel 4967: * [ VC: Enumeration ]
1.117 daniel 4968: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 4969: * the declaration
4970: *
1.66 daniel 4971: * Returns: the enumeration attribute tree built while parsing
4972: */
4973:
4974: xmlEnumerationPtr
4975: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4976: xmlChar *name;
1.66 daniel 4977: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4978:
1.152 daniel 4979: if (RAW != '(') {
1.66 daniel 4980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4981: ctxt->sax->error(ctxt->userData,
1.66 daniel 4982: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 4983: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 4984: ctxt->wellFormed = 0;
1.180 daniel 4985: ctxt->disableSAX = 1;
1.66 daniel 4986: return(NULL);
4987: }
1.91 daniel 4988: SHRINK;
1.66 daniel 4989: do {
4990: NEXT;
4991: SKIP_BLANKS;
4992: name = xmlParseNmtoken(ctxt);
4993: if (name == NULL) {
4994: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4995: ctxt->sax->error(ctxt->userData,
1.66 daniel 4996: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 4997: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 4998: ctxt->wellFormed = 0;
1.180 daniel 4999: ctxt->disableSAX = 1;
1.66 daniel 5000: return(ret);
5001: }
5002: cur = xmlCreateEnumeration(name);
1.119 daniel 5003: xmlFree(name);
1.66 daniel 5004: if (cur == NULL) return(ret);
5005: if (last == NULL) ret = last = cur;
5006: else {
5007: last->next = cur;
5008: last = cur;
5009: }
5010: SKIP_BLANKS;
1.152 daniel 5011: } while (RAW == '|');
5012: if (RAW != ')') {
1.66 daniel 5013: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5014: ctxt->sax->error(ctxt->userData,
1.66 daniel 5015: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5016: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5017: ctxt->wellFormed = 0;
1.180 daniel 5018: ctxt->disableSAX = 1;
1.66 daniel 5019: return(ret);
5020: }
5021: NEXT;
5022: return(ret);
5023: }
5024:
5025: /**
1.50 daniel 5026: * xmlParseEnumeratedType:
5027: * @ctxt: an XML parser context
1.66 daniel 5028: * @tree: the enumeration tree built while parsing
1.50 daniel 5029: *
1.66 daniel 5030: * parse an Enumerated attribute type.
1.22 daniel 5031: *
5032: * [57] EnumeratedType ::= NotationType | Enumeration
5033: *
5034: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5035: *
1.50 daniel 5036: *
1.66 daniel 5037: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5038: */
5039:
1.66 daniel 5040: int
5041: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5042: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5043: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5044: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5045: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5046: SKIP(8);
5047: if (!IS_BLANK(CUR)) {
5048: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5049: ctxt->sax->error(ctxt->userData,
5050: "Space required after 'NOTATION'\n");
1.123 daniel 5051: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5052: ctxt->wellFormed = 0;
1.180 daniel 5053: ctxt->disableSAX = 1;
1.66 daniel 5054: return(0);
5055: }
5056: SKIP_BLANKS;
5057: *tree = xmlParseNotationType(ctxt);
5058: if (*tree == NULL) return(0);
5059: return(XML_ATTRIBUTE_NOTATION);
5060: }
5061: *tree = xmlParseEnumerationType(ctxt);
5062: if (*tree == NULL) return(0);
5063: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5064: }
5065:
1.50 daniel 5066: /**
5067: * xmlParseAttributeType:
5068: * @ctxt: an XML parser context
1.66 daniel 5069: * @tree: the enumeration tree built while parsing
1.50 daniel 5070: *
1.59 daniel 5071: * parse the Attribute list def for an element
1.22 daniel 5072: *
5073: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5074: *
5075: * [55] StringType ::= 'CDATA'
5076: *
5077: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5078: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5079: *
1.102 daniel 5080: * Validity constraints for attribute values syntax are checked in
5081: * xmlValidateAttributeValue()
5082: *
1.99 daniel 5083: * [ VC: ID ]
1.117 daniel 5084: * Values of type ID must match the Name production. A name must not
1.99 daniel 5085: * appear more than once in an XML document as a value of this type;
5086: * i.e., ID values must uniquely identify the elements which bear them.
5087: *
5088: * [ VC: One ID per Element Type ]
1.117 daniel 5089: * No element type may have more than one ID attribute specified.
1.99 daniel 5090: *
5091: * [ VC: ID Attribute Default ]
1.117 daniel 5092: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5093: *
5094: * [ VC: IDREF ]
1.102 daniel 5095: * Values of type IDREF must match the Name production, and values
1.140 daniel 5096: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5097: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5098: * values must match the value of some ID attribute.
5099: *
5100: * [ VC: Entity Name ]
1.102 daniel 5101: * Values of type ENTITY must match the Name production, values
1.140 daniel 5102: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5103: * name of an unparsed entity declared in the DTD.
1.99 daniel 5104: *
5105: * [ VC: Name Token ]
1.102 daniel 5106: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5107: * of type NMTOKENS must match Nmtokens.
5108: *
1.69 daniel 5109: * Returns the attribute type
1.22 daniel 5110: */
1.59 daniel 5111: int
1.66 daniel 5112: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5113: SHRINK;
1.152 daniel 5114: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5115: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5116: (NXT(4) == 'A')) {
5117: SKIP(5);
1.66 daniel 5118: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5119: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5120: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5121: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5122: SKIP(6);
5123: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5124: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5125: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5126: (NXT(4) == 'F')) {
5127: SKIP(5);
1.59 daniel 5128: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5129: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5130: SKIP(2);
5131: return(XML_ATTRIBUTE_ID);
1.152 daniel 5132: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5133: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5134: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5135: SKIP(6);
1.59 daniel 5136: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5137: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5138: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5139: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5140: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5141: SKIP(8);
1.59 daniel 5142: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5143: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5144: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5145: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5146: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5147: SKIP(8);
5148: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5149: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5150: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5151: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5152: (NXT(6) == 'N')) {
5153: SKIP(7);
1.59 daniel 5154: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5155: }
1.66 daniel 5156: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5157: }
5158:
1.50 daniel 5159: /**
5160: * xmlParseAttributeListDecl:
5161: * @ctxt: an XML parser context
5162: *
5163: * : parse the Attribute list def for an element
1.22 daniel 5164: *
5165: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5166: *
5167: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5168: *
1.22 daniel 5169: */
1.55 daniel 5170: void
5171: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5172: xmlChar *elemName;
5173: xmlChar *attrName;
1.103 daniel 5174: xmlEnumerationPtr tree;
1.22 daniel 5175:
1.152 daniel 5176: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5177: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5178: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5179: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5180: (NXT(8) == 'T')) {
1.187 daniel 5181: xmlParserInputPtr input = ctxt->input;
5182:
1.40 daniel 5183: SKIP(9);
1.59 daniel 5184: if (!IS_BLANK(CUR)) {
5185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5186: ctxt->sax->error(ctxt->userData,
5187: "Space required after '<!ATTLIST'\n");
1.123 daniel 5188: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5189: ctxt->wellFormed = 0;
1.180 daniel 5190: ctxt->disableSAX = 1;
1.59 daniel 5191: }
1.42 daniel 5192: SKIP_BLANKS;
1.59 daniel 5193: elemName = xmlParseName(ctxt);
5194: if (elemName == NULL) {
1.55 daniel 5195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5196: ctxt->sax->error(ctxt->userData,
5197: "ATTLIST: no name for Element\n");
1.123 daniel 5198: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5199: ctxt->wellFormed = 0;
1.180 daniel 5200: ctxt->disableSAX = 1;
1.22 daniel 5201: return;
5202: }
1.42 daniel 5203: SKIP_BLANKS;
1.152 daniel 5204: while (RAW != '>') {
1.123 daniel 5205: const xmlChar *check = CUR_PTR;
1.59 daniel 5206: int type;
5207: int def;
1.123 daniel 5208: xmlChar *defaultValue = NULL;
1.59 daniel 5209:
1.103 daniel 5210: tree = NULL;
1.59 daniel 5211: attrName = xmlParseName(ctxt);
5212: if (attrName == NULL) {
5213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5214: ctxt->sax->error(ctxt->userData,
5215: "ATTLIST: no name for Attribute\n");
1.123 daniel 5216: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5217: ctxt->wellFormed = 0;
1.180 daniel 5218: ctxt->disableSAX = 1;
1.59 daniel 5219: break;
5220: }
1.97 daniel 5221: GROW;
1.59 daniel 5222: if (!IS_BLANK(CUR)) {
5223: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5224: ctxt->sax->error(ctxt->userData,
1.59 daniel 5225: "Space required after the attribute name\n");
1.123 daniel 5226: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5227: ctxt->wellFormed = 0;
1.180 daniel 5228: ctxt->disableSAX = 1;
1.170 daniel 5229: if (attrName != NULL)
5230: xmlFree(attrName);
5231: if (defaultValue != NULL)
5232: xmlFree(defaultValue);
1.59 daniel 5233: break;
5234: }
5235: SKIP_BLANKS;
5236:
1.66 daniel 5237: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5238: if (type <= 0) {
5239: if (attrName != NULL)
5240: xmlFree(attrName);
5241: if (defaultValue != NULL)
5242: xmlFree(defaultValue);
5243: break;
5244: }
1.22 daniel 5245:
1.97 daniel 5246: GROW;
1.59 daniel 5247: if (!IS_BLANK(CUR)) {
5248: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5249: ctxt->sax->error(ctxt->userData,
1.59 daniel 5250: "Space required after the attribute type\n");
1.123 daniel 5251: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5252: ctxt->wellFormed = 0;
1.180 daniel 5253: ctxt->disableSAX = 1;
1.170 daniel 5254: if (attrName != NULL)
5255: xmlFree(attrName);
5256: if (defaultValue != NULL)
5257: xmlFree(defaultValue);
5258: if (tree != NULL)
5259: xmlFreeEnumeration(tree);
1.59 daniel 5260: break;
5261: }
1.42 daniel 5262: SKIP_BLANKS;
1.59 daniel 5263:
5264: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5265: if (def <= 0) {
5266: if (attrName != NULL)
5267: xmlFree(attrName);
5268: if (defaultValue != NULL)
5269: xmlFree(defaultValue);
5270: if (tree != NULL)
5271: xmlFreeEnumeration(tree);
5272: break;
5273: }
1.59 daniel 5274:
1.97 daniel 5275: GROW;
1.152 daniel 5276: if (RAW != '>') {
1.59 daniel 5277: if (!IS_BLANK(CUR)) {
5278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5279: ctxt->sax->error(ctxt->userData,
1.59 daniel 5280: "Space required after the attribute default value\n");
1.123 daniel 5281: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5282: ctxt->wellFormed = 0;
1.180 daniel 5283: ctxt->disableSAX = 1;
1.170 daniel 5284: if (attrName != NULL)
5285: xmlFree(attrName);
5286: if (defaultValue != NULL)
5287: xmlFree(defaultValue);
5288: if (tree != NULL)
5289: xmlFreeEnumeration(tree);
1.59 daniel 5290: break;
5291: }
5292: SKIP_BLANKS;
5293: }
1.40 daniel 5294: if (check == CUR_PTR) {
1.55 daniel 5295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5296: ctxt->sax->error(ctxt->userData,
1.59 daniel 5297: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5298: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5299: if (attrName != NULL)
5300: xmlFree(attrName);
5301: if (defaultValue != NULL)
5302: xmlFree(defaultValue);
5303: if (tree != NULL)
5304: xmlFreeEnumeration(tree);
1.22 daniel 5305: break;
5306: }
1.171 daniel 5307: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5308: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5309: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5310: type, def, defaultValue, tree);
1.59 daniel 5311: if (attrName != NULL)
1.119 daniel 5312: xmlFree(attrName);
1.59 daniel 5313: if (defaultValue != NULL)
1.119 daniel 5314: xmlFree(defaultValue);
1.97 daniel 5315: GROW;
1.22 daniel 5316: }
1.187 daniel 5317: if (RAW == '>') {
5318: if (input != ctxt->input) {
5319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5320: ctxt->sax->error(ctxt->userData,
5321: "Attribute list declaration doesn't start and stop in the same entity\n");
5322: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5323: ctxt->wellFormed = 0;
5324: ctxt->disableSAX = 1;
5325: }
1.40 daniel 5326: NEXT;
1.187 daniel 5327: }
1.22 daniel 5328:
1.119 daniel 5329: xmlFree(elemName);
1.22 daniel 5330: }
5331: }
5332:
1.50 daniel 5333: /**
1.61 daniel 5334: * xmlParseElementMixedContentDecl:
5335: * @ctxt: an XML parser context
5336: *
5337: * parse the declaration for a Mixed Element content
5338: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5339: *
5340: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5341: * '(' S? '#PCDATA' S? ')'
5342: *
1.99 daniel 5343: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5344: *
5345: * [ VC: No Duplicate Types ]
1.117 daniel 5346: * The same name must not appear more than once in a single
5347: * mixed-content declaration.
1.99 daniel 5348: *
1.61 daniel 5349: * returns: the list of the xmlElementContentPtr describing the element choices
5350: */
5351: xmlElementContentPtr
1.62 daniel 5352: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5353: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5354: xmlChar *elem = NULL;
1.61 daniel 5355:
1.97 daniel 5356: GROW;
1.152 daniel 5357: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5358: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5359: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5360: (NXT(6) == 'A')) {
5361: SKIP(7);
5362: SKIP_BLANKS;
1.91 daniel 5363: SHRINK;
1.152 daniel 5364: if (RAW == ')') {
1.187 daniel 5365: ctxt->entity = ctxt->input;
1.63 daniel 5366: NEXT;
5367: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5368: if (RAW == '*') {
1.136 daniel 5369: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5370: NEXT;
5371: }
1.63 daniel 5372: return(ret);
5373: }
1.152 daniel 5374: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5375: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5376: if (ret == NULL) return(NULL);
1.99 daniel 5377: }
1.152 daniel 5378: while (RAW == '|') {
1.64 daniel 5379: NEXT;
1.61 daniel 5380: if (elem == NULL) {
5381: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5382: if (ret == NULL) return(NULL);
5383: ret->c1 = cur;
1.64 daniel 5384: cur = ret;
1.61 daniel 5385: } else {
1.64 daniel 5386: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5387: if (n == NULL) return(NULL);
5388: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5389: cur->c2 = n;
5390: cur = n;
1.119 daniel 5391: xmlFree(elem);
1.61 daniel 5392: }
5393: SKIP_BLANKS;
5394: elem = xmlParseName(ctxt);
5395: if (elem == NULL) {
5396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5397: ctxt->sax->error(ctxt->userData,
1.61 daniel 5398: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5399: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5400: ctxt->wellFormed = 0;
1.180 daniel 5401: ctxt->disableSAX = 1;
1.61 daniel 5402: xmlFreeElementContent(cur);
5403: return(NULL);
5404: }
5405: SKIP_BLANKS;
1.97 daniel 5406: GROW;
1.61 daniel 5407: }
1.152 daniel 5408: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5409: if (elem != NULL) {
1.61 daniel 5410: cur->c2 = xmlNewElementContent(elem,
5411: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5412: xmlFree(elem);
1.66 daniel 5413: }
1.65 daniel 5414: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5415: ctxt->entity = ctxt->input;
1.64 daniel 5416: SKIP(2);
1.61 daniel 5417: } else {
1.119 daniel 5418: if (elem != NULL) xmlFree(elem);
1.61 daniel 5419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5420: ctxt->sax->error(ctxt->userData,
1.63 daniel 5421: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5422: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5423: ctxt->wellFormed = 0;
1.180 daniel 5424: ctxt->disableSAX = 1;
1.61 daniel 5425: xmlFreeElementContent(ret);
5426: return(NULL);
5427: }
5428:
5429: } else {
5430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5431: ctxt->sax->error(ctxt->userData,
1.61 daniel 5432: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5433: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5434: ctxt->wellFormed = 0;
1.180 daniel 5435: ctxt->disableSAX = 1;
1.61 daniel 5436: }
5437: return(ret);
5438: }
5439:
5440: /**
5441: * xmlParseElementChildrenContentDecl:
1.50 daniel 5442: * @ctxt: an XML parser context
5443: *
1.61 daniel 5444: * parse the declaration for a Mixed Element content
5445: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5446: *
1.61 daniel 5447: *
1.22 daniel 5448: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5449: *
5450: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5451: *
5452: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5453: *
5454: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5455: *
1.99 daniel 5456: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5457: * TODO Parameter-entity replacement text must be properly nested
5458: * with parenthetized groups. That is to say, if either of the
5459: * opening or closing parentheses in a choice, seq, or Mixed
5460: * construct is contained in the replacement text for a parameter
5461: * entity, both must be contained in the same replacement text. For
5462: * interoperability, if a parameter-entity reference appears in a
5463: * choice, seq, or Mixed construct, its replacement text should not
5464: * be empty, and neither the first nor last non-blank character of
5465: * the replacement text should be a connector (| or ,).
5466: *
1.62 daniel 5467: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5468: * hierarchy.
5469: */
5470: xmlElementContentPtr
1.62 daniel 5471: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5472: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5473: xmlChar *elem;
5474: xmlChar type = 0;
1.62 daniel 5475:
5476: SKIP_BLANKS;
1.94 daniel 5477: GROW;
1.152 daniel 5478: if (RAW == '(') {
1.63 daniel 5479: /* Recurse on first child */
1.62 daniel 5480: NEXT;
5481: SKIP_BLANKS;
5482: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5483: SKIP_BLANKS;
1.101 daniel 5484: GROW;
1.62 daniel 5485: } else {
5486: elem = xmlParseName(ctxt);
5487: if (elem == NULL) {
5488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5489: ctxt->sax->error(ctxt->userData,
1.62 daniel 5490: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5491: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5492: ctxt->wellFormed = 0;
1.180 daniel 5493: ctxt->disableSAX = 1;
1.62 daniel 5494: return(NULL);
5495: }
5496: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5497: GROW;
1.152 daniel 5498: if (RAW == '?') {
1.104 daniel 5499: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5500: NEXT;
1.152 daniel 5501: } else if (RAW == '*') {
1.104 daniel 5502: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5503: NEXT;
1.152 daniel 5504: } else if (RAW == '+') {
1.104 daniel 5505: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5506: NEXT;
5507: } else {
1.104 daniel 5508: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5509: }
1.119 daniel 5510: xmlFree(elem);
1.101 daniel 5511: GROW;
1.62 daniel 5512: }
5513: SKIP_BLANKS;
1.91 daniel 5514: SHRINK;
1.152 daniel 5515: while (RAW != ')') {
1.63 daniel 5516: /*
5517: * Each loop we parse one separator and one element.
5518: */
1.152 daniel 5519: if (RAW == ',') {
1.62 daniel 5520: if (type == 0) type = CUR;
5521:
5522: /*
5523: * Detect "Name | Name , Name" error
5524: */
5525: else if (type != CUR) {
5526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5527: ctxt->sax->error(ctxt->userData,
1.62 daniel 5528: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5529: type);
1.123 daniel 5530: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5531: ctxt->wellFormed = 0;
1.180 daniel 5532: ctxt->disableSAX = 1;
1.170 daniel 5533: if ((op != NULL) && (op != ret))
5534: xmlFreeElementContent(op);
5535: if ((last != NULL) && (last != ret))
5536: xmlFreeElementContent(last);
5537: if (ret != NULL)
5538: xmlFreeElementContent(ret);
1.62 daniel 5539: return(NULL);
5540: }
1.64 daniel 5541: NEXT;
1.62 daniel 5542:
1.63 daniel 5543: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5544: if (op == NULL) {
5545: xmlFreeElementContent(ret);
5546: return(NULL);
5547: }
5548: if (last == NULL) {
5549: op->c1 = ret;
1.65 daniel 5550: ret = cur = op;
1.63 daniel 5551: } else {
5552: cur->c2 = op;
5553: op->c1 = last;
5554: cur =op;
1.65 daniel 5555: last = NULL;
1.63 daniel 5556: }
1.152 daniel 5557: } else if (RAW == '|') {
1.62 daniel 5558: if (type == 0) type = CUR;
5559:
5560: /*
1.63 daniel 5561: * Detect "Name , Name | Name" error
1.62 daniel 5562: */
5563: else if (type != CUR) {
5564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5565: ctxt->sax->error(ctxt->userData,
1.62 daniel 5566: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5567: type);
1.123 daniel 5568: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5569: ctxt->wellFormed = 0;
1.180 daniel 5570: ctxt->disableSAX = 1;
1.170 daniel 5571: if ((op != NULL) && (op != ret))
5572: xmlFreeElementContent(op);
5573: if ((last != NULL) && (last != ret))
5574: xmlFreeElementContent(last);
5575: if (ret != NULL)
5576: xmlFreeElementContent(ret);
1.62 daniel 5577: return(NULL);
5578: }
1.64 daniel 5579: NEXT;
1.62 daniel 5580:
1.63 daniel 5581: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5582: if (op == NULL) {
1.170 daniel 5583: if ((op != NULL) && (op != ret))
5584: xmlFreeElementContent(op);
5585: if ((last != NULL) && (last != ret))
5586: xmlFreeElementContent(last);
5587: if (ret != NULL)
5588: xmlFreeElementContent(ret);
1.63 daniel 5589: return(NULL);
5590: }
5591: if (last == NULL) {
5592: op->c1 = ret;
1.65 daniel 5593: ret = cur = op;
1.63 daniel 5594: } else {
5595: cur->c2 = op;
5596: op->c1 = last;
5597: cur =op;
1.65 daniel 5598: last = NULL;
1.63 daniel 5599: }
1.62 daniel 5600: } else {
5601: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5602: ctxt->sax->error(ctxt->userData,
1.62 daniel 5603: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5604: ctxt->wellFormed = 0;
1.180 daniel 5605: ctxt->disableSAX = 1;
1.123 daniel 5606: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5607: if ((op != NULL) && (op != ret))
5608: xmlFreeElementContent(op);
5609: if ((last != NULL) && (last != ret))
5610: xmlFreeElementContent(last);
5611: if (ret != NULL)
5612: xmlFreeElementContent(ret);
1.62 daniel 5613: return(NULL);
5614: }
1.101 daniel 5615: GROW;
1.62 daniel 5616: SKIP_BLANKS;
1.101 daniel 5617: GROW;
1.152 daniel 5618: if (RAW == '(') {
1.63 daniel 5619: /* Recurse on second child */
1.62 daniel 5620: NEXT;
5621: SKIP_BLANKS;
1.65 daniel 5622: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5623: SKIP_BLANKS;
5624: } else {
5625: elem = xmlParseName(ctxt);
5626: if (elem == NULL) {
5627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5628: ctxt->sax->error(ctxt->userData,
1.122 daniel 5629: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5630: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5631: ctxt->wellFormed = 0;
1.180 daniel 5632: ctxt->disableSAX = 1;
1.170 daniel 5633: if ((op != NULL) && (op != ret))
5634: xmlFreeElementContent(op);
5635: if ((last != NULL) && (last != ret))
5636: xmlFreeElementContent(last);
5637: if (ret != NULL)
5638: xmlFreeElementContent(ret);
1.62 daniel 5639: return(NULL);
5640: }
1.65 daniel 5641: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5642: xmlFree(elem);
1.152 daniel 5643: if (RAW == '?') {
1.105 daniel 5644: last->ocur = XML_ELEMENT_CONTENT_OPT;
5645: NEXT;
1.152 daniel 5646: } else if (RAW == '*') {
1.105 daniel 5647: last->ocur = XML_ELEMENT_CONTENT_MULT;
5648: NEXT;
1.152 daniel 5649: } else if (RAW == '+') {
1.105 daniel 5650: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5651: NEXT;
5652: } else {
5653: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5654: }
1.63 daniel 5655: }
5656: SKIP_BLANKS;
1.97 daniel 5657: GROW;
1.64 daniel 5658: }
1.65 daniel 5659: if ((cur != NULL) && (last != NULL)) {
5660: cur->c2 = last;
1.62 daniel 5661: }
1.187 daniel 5662: ctxt->entity = ctxt->input;
1.62 daniel 5663: NEXT;
1.152 daniel 5664: if (RAW == '?') {
1.62 daniel 5665: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5666: NEXT;
1.152 daniel 5667: } else if (RAW == '*') {
1.62 daniel 5668: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5669: NEXT;
1.152 daniel 5670: } else if (RAW == '+') {
1.62 daniel 5671: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5672: NEXT;
5673: }
5674: return(ret);
1.61 daniel 5675: }
5676:
5677: /**
5678: * xmlParseElementContentDecl:
5679: * @ctxt: an XML parser context
5680: * @name: the name of the element being defined.
5681: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5682: *
1.61 daniel 5683: * parse the declaration for an Element content either Mixed or Children,
5684: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5685: *
5686: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5687: *
1.61 daniel 5688: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5689: */
5690:
1.61 daniel 5691: int
1.123 daniel 5692: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5693: xmlElementContentPtr *result) {
5694:
5695: xmlElementContentPtr tree = NULL;
1.187 daniel 5696: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5697: int res;
5698:
5699: *result = NULL;
5700:
1.152 daniel 5701: if (RAW != '(') {
1.61 daniel 5702: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5703: ctxt->sax->error(ctxt->userData,
1.61 daniel 5704: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5705: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5706: ctxt->wellFormed = 0;
1.180 daniel 5707: ctxt->disableSAX = 1;
1.61 daniel 5708: return(-1);
5709: }
5710: NEXT;
1.97 daniel 5711: GROW;
1.61 daniel 5712: SKIP_BLANKS;
1.152 daniel 5713: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5714: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5715: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5716: (NXT(6) == 'A')) {
1.62 daniel 5717: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5718: res = XML_ELEMENT_TYPE_MIXED;
5719: } else {
1.62 daniel 5720: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5721: res = XML_ELEMENT_TYPE_ELEMENT;
5722: }
1.187 daniel 5723: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5725: ctxt->sax->error(ctxt->userData,
5726: "Element content declaration doesn't start and stop in the same entity\n");
5727: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5728: ctxt->wellFormed = 0;
5729: ctxt->disableSAX = 1;
5730: }
1.61 daniel 5731: SKIP_BLANKS;
1.63 daniel 5732: /****************************
1.152 daniel 5733: if (RAW != ')') {
1.61 daniel 5734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5735: ctxt->sax->error(ctxt->userData,
1.61 daniel 5736: "xmlParseElementContentDecl : ')' expected\n");
5737: ctxt->wellFormed = 0;
1.180 daniel 5738: ctxt->disableSAX = 1;
1.61 daniel 5739: return(-1);
5740: }
1.63 daniel 5741: ****************************/
5742: *result = tree;
1.61 daniel 5743: return(res);
1.22 daniel 5744: }
5745:
1.50 daniel 5746: /**
5747: * xmlParseElementDecl:
5748: * @ctxt: an XML parser context
5749: *
5750: * parse an Element declaration.
1.22 daniel 5751: *
5752: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5753: *
1.99 daniel 5754: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5755: * No element type may be declared more than once
1.69 daniel 5756: *
5757: * Returns the type of the element, or -1 in case of error
1.22 daniel 5758: */
1.59 daniel 5759: int
1.55 daniel 5760: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5761: xmlChar *name;
1.59 daniel 5762: int ret = -1;
1.61 daniel 5763: xmlElementContentPtr content = NULL;
1.22 daniel 5764:
1.97 daniel 5765: GROW;
1.152 daniel 5766: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5767: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5768: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5769: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5770: (NXT(8) == 'T')) {
1.187 daniel 5771: xmlParserInputPtr input = ctxt->input;
5772:
1.40 daniel 5773: SKIP(9);
1.59 daniel 5774: if (!IS_BLANK(CUR)) {
5775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5776: ctxt->sax->error(ctxt->userData,
1.59 daniel 5777: "Space required after 'ELEMENT'\n");
1.123 daniel 5778: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5779: ctxt->wellFormed = 0;
1.180 daniel 5780: ctxt->disableSAX = 1;
1.59 daniel 5781: }
1.42 daniel 5782: SKIP_BLANKS;
1.22 daniel 5783: name = xmlParseName(ctxt);
5784: if (name == NULL) {
1.55 daniel 5785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5786: ctxt->sax->error(ctxt->userData,
1.59 daniel 5787: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5788: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5789: ctxt->wellFormed = 0;
1.180 daniel 5790: ctxt->disableSAX = 1;
1.59 daniel 5791: return(-1);
5792: }
5793: if (!IS_BLANK(CUR)) {
5794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5795: ctxt->sax->error(ctxt->userData,
1.59 daniel 5796: "Space required after the element name\n");
1.123 daniel 5797: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5798: ctxt->wellFormed = 0;
1.180 daniel 5799: ctxt->disableSAX = 1;
1.22 daniel 5800: }
1.42 daniel 5801: SKIP_BLANKS;
1.152 daniel 5802: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5803: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5804: (NXT(4) == 'Y')) {
5805: SKIP(5);
1.22 daniel 5806: /*
5807: * Element must always be empty.
5808: */
1.59 daniel 5809: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5810: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5811: (NXT(2) == 'Y')) {
5812: SKIP(3);
1.22 daniel 5813: /*
5814: * Element is a generic container.
5815: */
1.59 daniel 5816: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5817: } else if (RAW == '(') {
1.61 daniel 5818: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5819: } else {
1.98 daniel 5820: /*
5821: * [ WFC: PEs in Internal Subset ] error handling.
5822: */
1.152 daniel 5823: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5824: (ctxt->inputNr == 1)) {
5825: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826: ctxt->sax->error(ctxt->userData,
5827: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5828: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5829: } else {
5830: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5831: ctxt->sax->error(ctxt->userData,
5832: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5833: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5834: }
1.61 daniel 5835: ctxt->wellFormed = 0;
1.180 daniel 5836: ctxt->disableSAX = 1;
1.119 daniel 5837: if (name != NULL) xmlFree(name);
1.61 daniel 5838: return(-1);
1.22 daniel 5839: }
1.142 daniel 5840:
5841: SKIP_BLANKS;
5842: /*
5843: * Pop-up of finished entities.
5844: */
1.152 daniel 5845: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5846: xmlPopInput(ctxt);
1.42 daniel 5847: SKIP_BLANKS;
1.142 daniel 5848:
1.152 daniel 5849: if (RAW != '>') {
1.55 daniel 5850: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5851: ctxt->sax->error(ctxt->userData,
1.31 daniel 5852: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5853: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5854: ctxt->wellFormed = 0;
1.180 daniel 5855: ctxt->disableSAX = 1;
1.61 daniel 5856: } else {
1.187 daniel 5857: if (input != ctxt->input) {
5858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5859: ctxt->sax->error(ctxt->userData,
5860: "Element declaration doesn't start and stop in the same entity\n");
5861: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5862: ctxt->wellFormed = 0;
5863: ctxt->disableSAX = 1;
5864: }
5865:
1.40 daniel 5866: NEXT;
1.171 daniel 5867: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5868: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5869: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5870: content);
1.61 daniel 5871: }
1.84 daniel 5872: if (content != NULL) {
5873: xmlFreeElementContent(content);
5874: }
1.61 daniel 5875: if (name != NULL) {
1.119 daniel 5876: xmlFree(name);
1.61 daniel 5877: }
1.22 daniel 5878: }
1.59 daniel 5879: return(ret);
1.22 daniel 5880: }
5881:
1.50 daniel 5882: /**
5883: * xmlParseMarkupDecl:
5884: * @ctxt: an XML parser context
5885: *
5886: * parse Markup declarations
1.22 daniel 5887: *
5888: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5889: * NotationDecl | PI | Comment
5890: *
1.98 daniel 5891: * [ VC: Proper Declaration/PE Nesting ]
5892: * TODO Parameter-entity replacement text must be properly nested with
5893: * markup declarations. That is to say, if either the first character
5894: * or the last character of a markup declaration (markupdecl above) is
5895: * contained in the replacement text for a parameter-entity reference,
5896: * both must be contained in the same replacement text.
5897: *
5898: * [ WFC: PEs in Internal Subset ]
5899: * In the internal DTD subset, parameter-entity references can occur
5900: * only where markup declarations can occur, not within markup declarations.
5901: * (This does not apply to references that occur in external parameter
5902: * entities or to the external subset.)
1.22 daniel 5903: */
1.55 daniel 5904: void
5905: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5906: GROW;
1.22 daniel 5907: xmlParseElementDecl(ctxt);
5908: xmlParseAttributeListDecl(ctxt);
5909: xmlParseEntityDecl(ctxt);
5910: xmlParseNotationDecl(ctxt);
5911: xmlParsePI(ctxt);
1.114 daniel 5912: xmlParseComment(ctxt);
1.98 daniel 5913: /*
5914: * This is only for internal subset. On external entities,
5915: * the replacement is done before parsing stage
5916: */
5917: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5918: xmlParsePEReference(ctxt);
1.97 daniel 5919: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5920: }
5921:
1.50 daniel 5922: /**
1.76 daniel 5923: * xmlParseTextDecl:
5924: * @ctxt: an XML parser context
5925: *
5926: * parse an XML declaration header for external entities
5927: *
5928: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 5929: *
5930: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 5931: */
5932:
1.172 daniel 5933: void
1.76 daniel 5934: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5935: xmlChar *version;
1.76 daniel 5936:
5937: /*
5938: * We know that '<?xml' is here.
5939: */
5940: SKIP(5);
5941:
5942: if (!IS_BLANK(CUR)) {
5943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5944: ctxt->sax->error(ctxt->userData,
5945: "Space needed after '<?xml'\n");
1.123 daniel 5946: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5947: ctxt->wellFormed = 0;
1.180 daniel 5948: ctxt->disableSAX = 1;
1.76 daniel 5949: }
5950: SKIP_BLANKS;
5951:
5952: /*
5953: * We may have the VersionInfo here.
5954: */
5955: version = xmlParseVersionInfo(ctxt);
5956: if (version == NULL)
5957: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 5958: ctxt->input->version = version;
1.76 daniel 5959:
5960: /*
5961: * We must have the encoding declaration
5962: */
5963: if (!IS_BLANK(CUR)) {
5964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5965: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 5966: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 5967: ctxt->wellFormed = 0;
1.180 daniel 5968: ctxt->disableSAX = 1;
1.76 daniel 5969: }
1.172 daniel 5970: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.76 daniel 5971:
5972: SKIP_BLANKS;
1.152 daniel 5973: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 5974: SKIP(2);
1.152 daniel 5975: } else if (RAW == '>') {
1.76 daniel 5976: /* Deprecated old WD ... */
5977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5978: ctxt->sax->error(ctxt->userData,
5979: "XML declaration must end-up with '?>'\n");
1.123 daniel 5980: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5981: ctxt->wellFormed = 0;
1.180 daniel 5982: ctxt->disableSAX = 1;
1.76 daniel 5983: NEXT;
5984: } else {
5985: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5986: ctxt->sax->error(ctxt->userData,
5987: "parsing XML declaration: '?>' expected\n");
1.123 daniel 5988: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 5989: ctxt->wellFormed = 0;
1.180 daniel 5990: ctxt->disableSAX = 1;
1.76 daniel 5991: MOVETO_ENDTAG(CUR_PTR);
5992: NEXT;
5993: }
5994: }
5995:
5996: /*
5997: * xmlParseConditionalSections
5998: * @ctxt: an XML parser context
5999: *
6000: * TODO : Conditionnal section are not yet supported !
6001: *
6002: * [61] conditionalSect ::= includeSect | ignoreSect
6003: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6004: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6005: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6006: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6007: */
6008:
6009: void
6010: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6011: SKIP(3);
6012: SKIP_BLANKS;
1.168 daniel 6013: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6014: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6015: (NXT(6) == 'E')) {
1.165 daniel 6016: SKIP(7);
1.168 daniel 6017: SKIP_BLANKS;
6018: if (RAW != '[') {
6019: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6020: ctxt->sax->error(ctxt->userData,
6021: "XML conditional section '[' expected\n");
6022: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6023: ctxt->wellFormed = 0;
1.180 daniel 6024: ctxt->disableSAX = 1;
1.168 daniel 6025: } else {
6026: NEXT;
6027: }
1.165 daniel 6028: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6029: (NXT(2) != '>'))) {
6030: const xmlChar *check = CUR_PTR;
6031: int cons = ctxt->input->consumed;
6032: int tok = ctxt->token;
6033:
6034: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6035: xmlParseConditionalSections(ctxt);
6036: } else if (IS_BLANK(CUR)) {
6037: NEXT;
6038: } else if (RAW == '%') {
6039: xmlParsePEReference(ctxt);
6040: } else
6041: xmlParseMarkupDecl(ctxt);
6042:
6043: /*
6044: * Pop-up of finished entities.
6045: */
6046: while ((RAW == 0) && (ctxt->inputNr > 1))
6047: xmlPopInput(ctxt);
6048:
6049: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6050: (tok == ctxt->token)) {
6051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6052: ctxt->sax->error(ctxt->userData,
6053: "Content error in the external subset\n");
6054: ctxt->wellFormed = 0;
1.180 daniel 6055: ctxt->disableSAX = 1;
1.165 daniel 6056: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6057: break;
6058: }
6059: }
1.168 daniel 6060: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6061: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6062: int state;
6063:
1.168 daniel 6064: SKIP(6);
6065: SKIP_BLANKS;
6066: if (RAW != '[') {
6067: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6068: ctxt->sax->error(ctxt->userData,
6069: "XML conditional section '[' expected\n");
6070: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6071: ctxt->wellFormed = 0;
1.180 daniel 6072: ctxt->disableSAX = 1;
1.168 daniel 6073: } else {
6074: NEXT;
6075: }
1.171 daniel 6076:
1.143 daniel 6077: /*
1.171 daniel 6078: * Parse up to the end of the conditionnal section
6079: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6080: */
1.171 daniel 6081: state = ctxt->disableSAX;
1.165 daniel 6082: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6083: (NXT(2) != '>'))) {
1.171 daniel 6084: const xmlChar *check = CUR_PTR;
6085: int cons = ctxt->input->consumed;
6086: int tok = ctxt->token;
6087:
6088: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6089: xmlParseConditionalSections(ctxt);
6090: } else if (IS_BLANK(CUR)) {
6091: NEXT;
6092: } else if (RAW == '%') {
6093: xmlParsePEReference(ctxt);
6094: } else
6095: xmlParseMarkupDecl(ctxt);
6096:
1.165 daniel 6097: /*
6098: * Pop-up of finished entities.
6099: */
6100: while ((RAW == 0) && (ctxt->inputNr > 1))
6101: xmlPopInput(ctxt);
1.143 daniel 6102:
1.171 daniel 6103: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6104: (tok == ctxt->token)) {
6105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6106: ctxt->sax->error(ctxt->userData,
6107: "Content error in the external subset\n");
6108: ctxt->wellFormed = 0;
1.180 daniel 6109: ctxt->disableSAX = 1;
1.171 daniel 6110: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6111: break;
6112: }
1.165 daniel 6113: }
1.171 daniel 6114: ctxt->disableSAX = state;
1.168 daniel 6115: } else {
6116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6117: ctxt->sax->error(ctxt->userData,
6118: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6119: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6120: ctxt->wellFormed = 0;
1.180 daniel 6121: ctxt->disableSAX = 1;
1.143 daniel 6122: }
6123:
1.152 daniel 6124: if (RAW == 0)
1.143 daniel 6125: SHRINK;
6126:
1.152 daniel 6127: if (RAW == 0) {
1.76 daniel 6128: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6129: ctxt->sax->error(ctxt->userData,
6130: "XML conditional section not closed\n");
1.123 daniel 6131: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6132: ctxt->wellFormed = 0;
1.180 daniel 6133: ctxt->disableSAX = 1;
1.143 daniel 6134: } else {
6135: SKIP(3);
1.76 daniel 6136: }
6137: }
6138:
6139: /**
1.124 daniel 6140: * xmlParseExternalSubset:
1.76 daniel 6141: * @ctxt: an XML parser context
1.124 daniel 6142: * @ExternalID: the external identifier
6143: * @SystemID: the system identifier (or URL)
1.76 daniel 6144: *
6145: * parse Markup declarations from an external subset
6146: *
6147: * [30] extSubset ::= textDecl? extSubsetDecl
6148: *
6149: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6150: */
6151: void
1.123 daniel 6152: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6153: const xmlChar *SystemID) {
1.132 daniel 6154: GROW;
1.152 daniel 6155: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6156: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6157: (NXT(4) == 'l')) {
1.172 daniel 6158: xmlParseTextDecl(ctxt);
1.76 daniel 6159: }
1.79 daniel 6160: if (ctxt->myDoc == NULL) {
1.116 daniel 6161: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6162: }
6163: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6164: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6165:
1.96 daniel 6166: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6167: ctxt->external = 1;
1.152 daniel 6168: while (((RAW == '<') && (NXT(1) == '?')) ||
6169: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6170: IS_BLANK(CUR)) {
1.123 daniel 6171: const xmlChar *check = CUR_PTR;
1.115 daniel 6172: int cons = ctxt->input->consumed;
1.164 daniel 6173: int tok = ctxt->token;
1.115 daniel 6174:
1.152 daniel 6175: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6176: xmlParseConditionalSections(ctxt);
6177: } else if (IS_BLANK(CUR)) {
6178: NEXT;
1.152 daniel 6179: } else if (RAW == '%') {
1.76 daniel 6180: xmlParsePEReference(ctxt);
6181: } else
6182: xmlParseMarkupDecl(ctxt);
1.77 daniel 6183:
6184: /*
6185: * Pop-up of finished entities.
6186: */
1.166 daniel 6187: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6188: xmlPopInput(ctxt);
6189:
1.164 daniel 6190: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6191: (tok == ctxt->token)) {
1.115 daniel 6192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6193: ctxt->sax->error(ctxt->userData,
6194: "Content error in the external subset\n");
6195: ctxt->wellFormed = 0;
1.180 daniel 6196: ctxt->disableSAX = 1;
1.123 daniel 6197: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6198: break;
6199: }
1.76 daniel 6200: }
6201:
1.152 daniel 6202: if (RAW != 0) {
1.76 daniel 6203: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6204: ctxt->sax->error(ctxt->userData,
6205: "Extra content at the end of the document\n");
1.123 daniel 6206: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6207: ctxt->wellFormed = 0;
1.180 daniel 6208: ctxt->disableSAX = 1;
1.76 daniel 6209: }
6210:
6211: }
6212:
6213: /**
1.77 daniel 6214: * xmlParseReference:
6215: * @ctxt: an XML parser context
6216: *
6217: * parse and handle entity references in content, depending on the SAX
6218: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6219: * CharRef, a predefined entity, if there is no reference() callback.
6220: * or if the parser was asked to switch to that mode.
1.77 daniel 6221: *
6222: * [67] Reference ::= EntityRef | CharRef
6223: */
6224: void
6225: xmlParseReference(xmlParserCtxtPtr ctxt) {
6226: xmlEntityPtr ent;
1.123 daniel 6227: xmlChar *val;
1.152 daniel 6228: if (RAW != '&') return;
1.77 daniel 6229:
1.113 daniel 6230: if (ctxt->inputNr > 1) {
1.123 daniel 6231: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6232:
1.171 daniel 6233: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6234: (!ctxt->disableSAX))
1.113 daniel 6235: ctxt->sax->characters(ctxt->userData, cur, 1);
6236: if (ctxt->token == '&')
6237: ctxt->token = 0;
6238: else {
6239: SKIP(1);
6240: }
6241: return;
6242: }
1.77 daniel 6243: if (NXT(1) == '#') {
1.152 daniel 6244: int i = 0;
1.153 daniel 6245: xmlChar out[10];
6246: int hex = NXT(2);
1.77 daniel 6247: int val = xmlParseCharRef(ctxt);
1.152 daniel 6248:
1.153 daniel 6249: if (ctxt->encoding != NULL) {
6250: /*
6251: * So we are using non-UTF-8 buffers
6252: * Check that the char fit on 8bits, if not
6253: * generate a CharRef.
6254: */
6255: if (val <= 0xFF) {
6256: out[0] = val;
6257: out[1] = 0;
1.171 daniel 6258: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6259: (!ctxt->disableSAX))
1.153 daniel 6260: ctxt->sax->characters(ctxt->userData, out, 1);
6261: } else {
6262: if ((hex == 'x') || (hex == 'X'))
6263: sprintf((char *)out, "#x%X", val);
6264: else
6265: sprintf((char *)out, "#%d", val);
1.171 daniel 6266: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6267: (!ctxt->disableSAX))
1.153 daniel 6268: ctxt->sax->reference(ctxt->userData, out);
6269: }
6270: } else {
6271: /*
6272: * Just encode the value in UTF-8
6273: */
6274: COPY_BUF(0 ,out, i, val);
6275: out[i] = 0;
1.171 daniel 6276: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6277: (!ctxt->disableSAX))
1.153 daniel 6278: ctxt->sax->characters(ctxt->userData, out, i);
6279: }
1.77 daniel 6280: } else {
6281: ent = xmlParseEntityRef(ctxt);
6282: if (ent == NULL) return;
6283: if ((ent->name != NULL) &&
1.159 daniel 6284: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6285: xmlNodePtr list = NULL;
6286: int ret;
6287:
6288:
6289: /*
6290: * The first reference to the entity trigger a parsing phase
6291: * where the ent->children is filled with the result from
6292: * the parsing.
6293: */
6294: if (ent->children == NULL) {
6295: xmlChar *value;
6296: value = ent->content;
6297:
6298: /*
6299: * Check that this entity is well formed
6300: */
6301: if ((value != NULL) &&
6302: (value[1] == 0) && (value[0] == '<') &&
6303: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6304: /*
6305: * TODO: get definite answer on this !!!
6306: * Lots of entity decls are used to declare a single
6307: * char
6308: * <!ENTITY lt "<">
6309: * Which seems to be valid since
6310: * 2.4: The ampersand character (&) and the left angle
6311: * bracket (<) may appear in their literal form only
6312: * when used ... They are also legal within the literal
6313: * entity value of an internal entity declaration;i
6314: * see "4.3.2 Well-Formed Parsed Entities".
6315: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6316: * Looking at the OASIS test suite and James Clark
6317: * tests, this is broken. However the XML REC uses
6318: * it. Is the XML REC not well-formed ????
6319: * This is a hack to avoid this problem
6320: */
6321: list = xmlNewDocText(ctxt->myDoc, value);
6322: if (list != NULL) {
6323: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6324: (ent->children == NULL)) {
6325: ent->children = list;
6326: ent->last = list;
6327: list->parent = (xmlNodePtr) ent;
6328: } else {
6329: xmlFreeNodeList(list);
6330: }
6331: } else if (list != NULL) {
6332: xmlFreeNodeList(list);
6333: }
1.181 daniel 6334: } else {
1.180 daniel 6335: /*
6336: * 4.3.2: An internal general parsed entity is well-formed
6337: * if its replacement text matches the production labeled
6338: * content.
6339: */
1.185 daniel 6340: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6341: ctxt->depth++;
1.180 daniel 6342: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6343: ctxt->sax, NULL, ctxt->depth,
6344: value, &list);
6345: ctxt->depth--;
6346: } else if (ent->etype ==
6347: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6348: ctxt->depth++;
1.180 daniel 6349: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6350: ctxt->sax, NULL, ctxt->depth,
6351: ent->SystemID, ent->ExternalID, &list);
6352: ctxt->depth--;
6353: } else {
1.180 daniel 6354: ret = -1;
6355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6356: ctxt->sax->error(ctxt->userData,
6357: "Internal: invalid entity type\n");
6358: }
1.185 daniel 6359: if (ret == XML_ERR_ENTITY_LOOP) {
6360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6361: ctxt->sax->error(ctxt->userData,
6362: "Detected entity reference loop\n");
6363: ctxt->wellFormed = 0;
6364: ctxt->disableSAX = 1;
6365: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6366: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6367: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6368: (ent->children == NULL)) {
6369: ent->children = list;
6370: while (list != NULL) {
6371: list->parent = (xmlNodePtr) ent;
6372: if (list->next == NULL)
6373: ent->last = list;
6374: list = list->next;
6375: }
6376: } else {
6377: xmlFreeNodeList(list);
6378: }
6379: } else if (ret > 0) {
6380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6381: ctxt->sax->error(ctxt->userData,
6382: "Entity value required\n");
6383: ctxt->errNo = ret;
6384: ctxt->wellFormed = 0;
6385: ctxt->disableSAX = 1;
6386: } else if (list != NULL) {
6387: xmlFreeNodeList(list);
6388: }
6389: }
6390: }
1.113 daniel 6391: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6392: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6393: /*
6394: * Create a node.
6395: */
6396: ctxt->sax->reference(ctxt->userData, ent->name);
6397: return;
6398: } else if (ctxt->replaceEntities) {
6399: xmlParserInputPtr input;
1.79 daniel 6400:
1.113 daniel 6401: input = xmlNewEntityInputStream(ctxt, ent);
6402: xmlPushInput(ctxt, input);
1.167 daniel 6403: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6404: (RAW == '<') && (NXT(1) == '?') &&
6405: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6406: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6407: xmlParseTextDecl(ctxt);
1.167 daniel 6408: if (input->standalone) {
6409: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6410: ctxt->sax->error(ctxt->userData,
6411: "external parsed entities cannot be standalone\n");
6412: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6413: ctxt->wellFormed = 0;
1.180 daniel 6414: ctxt->disableSAX = 1;
1.167 daniel 6415: }
6416: }
1.179 daniel 6417: /*
6418: * !!! TODO: build the tree under the entity first
6419: * 1234
6420: */
1.113 daniel 6421: return;
6422: }
1.77 daniel 6423: }
6424: val = ent->content;
6425: if (val == NULL) return;
6426: /*
6427: * inline the entity.
6428: */
1.171 daniel 6429: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6430: (!ctxt->disableSAX))
1.77 daniel 6431: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6432: }
1.24 daniel 6433: }
6434:
1.50 daniel 6435: /**
6436: * xmlParseEntityRef:
6437: * @ctxt: an XML parser context
6438: *
6439: * parse ENTITY references declarations
1.24 daniel 6440: *
6441: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6442: *
1.98 daniel 6443: * [ WFC: Entity Declared ]
6444: * In a document without any DTD, a document with only an internal DTD
6445: * subset which contains no parameter entity references, or a document
6446: * with "standalone='yes'", the Name given in the entity reference
6447: * must match that in an entity declaration, except that well-formed
6448: * documents need not declare any of the following entities: amp, lt,
6449: * gt, apos, quot. The declaration of a parameter entity must precede
6450: * any reference to it. Similarly, the declaration of a general entity
6451: * must precede any reference to it which appears in a default value in an
6452: * attribute-list declaration. Note that if entities are declared in the
6453: * external subset or in external parameter entities, a non-validating
6454: * processor is not obligated to read and process their declarations;
6455: * for such documents, the rule that an entity must be declared is a
6456: * well-formedness constraint only if standalone='yes'.
6457: *
6458: * [ WFC: Parsed Entity ]
6459: * An entity reference must not contain the name of an unparsed entity
6460: *
1.77 daniel 6461: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6462: */
1.77 daniel 6463: xmlEntityPtr
1.55 daniel 6464: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6465: xmlChar *name;
1.72 daniel 6466: xmlEntityPtr ent = NULL;
1.24 daniel 6467:
1.91 daniel 6468: GROW;
1.111 daniel 6469:
1.152 daniel 6470: if (RAW == '&') {
1.40 daniel 6471: NEXT;
1.24 daniel 6472: name = xmlParseName(ctxt);
6473: if (name == NULL) {
1.55 daniel 6474: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6475: ctxt->sax->error(ctxt->userData,
6476: "xmlParseEntityRef: no name\n");
1.123 daniel 6477: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6478: ctxt->wellFormed = 0;
1.180 daniel 6479: ctxt->disableSAX = 1;
1.24 daniel 6480: } else {
1.152 daniel 6481: if (RAW == ';') {
1.40 daniel 6482: NEXT;
1.24 daniel 6483: /*
1.77 daniel 6484: * Ask first SAX for entity resolution, otherwise try the
6485: * predefined set.
6486: */
6487: if (ctxt->sax != NULL) {
6488: if (ctxt->sax->getEntity != NULL)
6489: ent = ctxt->sax->getEntity(ctxt->userData, name);
6490: if (ent == NULL)
6491: ent = xmlGetPredefinedEntity(name);
6492: }
6493: /*
1.98 daniel 6494: * [ WFC: Entity Declared ]
6495: * In a document without any DTD, a document with only an
6496: * internal DTD subset which contains no parameter entity
6497: * references, or a document with "standalone='yes'", the
6498: * Name given in the entity reference must match that in an
6499: * entity declaration, except that well-formed documents
6500: * need not declare any of the following entities: amp, lt,
6501: * gt, apos, quot.
6502: * The declaration of a parameter entity must precede any
6503: * reference to it.
6504: * Similarly, the declaration of a general entity must
6505: * precede any reference to it which appears in a default
6506: * value in an attribute-list declaration. Note that if
6507: * entities are declared in the external subset or in
6508: * external parameter entities, a non-validating processor
6509: * is not obligated to read and process their declarations;
6510: * for such documents, the rule that an entity must be
6511: * declared is a well-formedness constraint only if
6512: * standalone='yes'.
1.59 daniel 6513: */
1.77 daniel 6514: if (ent == NULL) {
1.98 daniel 6515: if ((ctxt->standalone == 1) ||
6516: ((ctxt->hasExternalSubset == 0) &&
6517: (ctxt->hasPErefs == 0))) {
6518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6519: ctxt->sax->error(ctxt->userData,
6520: "Entity '%s' not defined\n", name);
1.123 daniel 6521: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6522: ctxt->wellFormed = 0;
1.180 daniel 6523: ctxt->disableSAX = 1;
1.77 daniel 6524: } else {
1.98 daniel 6525: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6526: ctxt->sax->warning(ctxt->userData,
6527: "Entity '%s' not defined\n", name);
1.123 daniel 6528: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6529: }
1.77 daniel 6530: }
1.59 daniel 6531:
6532: /*
1.98 daniel 6533: * [ WFC: Parsed Entity ]
6534: * An entity reference must not contain the name of an
6535: * unparsed entity
6536: */
1.159 daniel 6537: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6538: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6539: ctxt->sax->error(ctxt->userData,
6540: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6541: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6542: ctxt->wellFormed = 0;
1.180 daniel 6543: ctxt->disableSAX = 1;
1.98 daniel 6544: }
6545:
6546: /*
6547: * [ WFC: No External Entity References ]
6548: * Attribute values cannot contain direct or indirect
6549: * entity references to external entities.
6550: */
6551: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6552: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6554: ctxt->sax->error(ctxt->userData,
6555: "Attribute references external entity '%s'\n", name);
1.123 daniel 6556: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6557: ctxt->wellFormed = 0;
1.180 daniel 6558: ctxt->disableSAX = 1;
1.98 daniel 6559: }
6560: /*
6561: * [ WFC: No < in Attribute Values ]
6562: * The replacement text of any entity referred to directly or
6563: * indirectly in an attribute value (other than "<") must
6564: * not contain a <.
1.59 daniel 6565: */
1.98 daniel 6566: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6567: (ent != NULL) &&
6568: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6569: (ent->content != NULL) &&
6570: (xmlStrchr(ent->content, '<'))) {
6571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6572: ctxt->sax->error(ctxt->userData,
6573: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6574: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6575: ctxt->wellFormed = 0;
1.180 daniel 6576: ctxt->disableSAX = 1;
1.98 daniel 6577: }
6578:
6579: /*
6580: * Internal check, no parameter entities here ...
6581: */
6582: else {
1.159 daniel 6583: switch (ent->etype) {
1.59 daniel 6584: case XML_INTERNAL_PARAMETER_ENTITY:
6585: case XML_EXTERNAL_PARAMETER_ENTITY:
6586: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6587: ctxt->sax->error(ctxt->userData,
1.59 daniel 6588: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6589: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6590: ctxt->wellFormed = 0;
1.180 daniel 6591: ctxt->disableSAX = 1;
6592: break;
6593: default:
1.59 daniel 6594: break;
6595: }
6596: }
6597:
6598: /*
1.98 daniel 6599: * [ WFC: No Recursion ]
1.117 daniel 6600: * TODO A parsed entity must not contain a recursive reference
6601: * to itself, either directly or indirectly.
1.59 daniel 6602: */
1.77 daniel 6603:
1.24 daniel 6604: } else {
1.55 daniel 6605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6606: ctxt->sax->error(ctxt->userData,
1.59 daniel 6607: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6608: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6609: ctxt->wellFormed = 0;
1.180 daniel 6610: ctxt->disableSAX = 1;
1.24 daniel 6611: }
1.119 daniel 6612: xmlFree(name);
1.24 daniel 6613: }
6614: }
1.77 daniel 6615: return(ent);
1.24 daniel 6616: }
1.135 daniel 6617: /**
6618: * xmlParseStringEntityRef:
6619: * @ctxt: an XML parser context
6620: * @str: a pointer to an index in the string
6621: *
6622: * parse ENTITY references declarations, but this version parses it from
6623: * a string value.
6624: *
6625: * [68] EntityRef ::= '&' Name ';'
6626: *
6627: * [ WFC: Entity Declared ]
6628: * In a document without any DTD, a document with only an internal DTD
6629: * subset which contains no parameter entity references, or a document
6630: * with "standalone='yes'", the Name given in the entity reference
6631: * must match that in an entity declaration, except that well-formed
6632: * documents need not declare any of the following entities: amp, lt,
6633: * gt, apos, quot. The declaration of a parameter entity must precede
6634: * any reference to it. Similarly, the declaration of a general entity
6635: * must precede any reference to it which appears in a default value in an
6636: * attribute-list declaration. Note that if entities are declared in the
6637: * external subset or in external parameter entities, a non-validating
6638: * processor is not obligated to read and process their declarations;
6639: * for such documents, the rule that an entity must be declared is a
6640: * well-formedness constraint only if standalone='yes'.
6641: *
6642: * [ WFC: Parsed Entity ]
6643: * An entity reference must not contain the name of an unparsed entity
6644: *
6645: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6646: * is updated to the current location in the string.
6647: */
6648: xmlEntityPtr
6649: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6650: xmlChar *name;
6651: const xmlChar *ptr;
6652: xmlChar cur;
6653: xmlEntityPtr ent = NULL;
6654:
1.156 daniel 6655: if ((str == NULL) || (*str == NULL))
6656: return(NULL);
1.135 daniel 6657: ptr = *str;
6658: cur = *ptr;
6659: if (cur == '&') {
6660: ptr++;
6661: cur = *ptr;
6662: name = xmlParseStringName(ctxt, &ptr);
6663: if (name == NULL) {
6664: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6665: ctxt->sax->error(ctxt->userData,
6666: "xmlParseEntityRef: no name\n");
6667: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6668: ctxt->wellFormed = 0;
1.180 daniel 6669: ctxt->disableSAX = 1;
1.135 daniel 6670: } else {
1.185 daniel 6671: if (*ptr == ';') {
6672: ptr++;
1.135 daniel 6673: /*
6674: * Ask first SAX for entity resolution, otherwise try the
6675: * predefined set.
6676: */
6677: if (ctxt->sax != NULL) {
6678: if (ctxt->sax->getEntity != NULL)
6679: ent = ctxt->sax->getEntity(ctxt->userData, name);
6680: if (ent == NULL)
6681: ent = xmlGetPredefinedEntity(name);
6682: }
6683: /*
6684: * [ WFC: Entity Declared ]
6685: * In a document without any DTD, a document with only an
6686: * internal DTD subset which contains no parameter entity
6687: * references, or a document with "standalone='yes'", the
6688: * Name given in the entity reference must match that in an
6689: * entity declaration, except that well-formed documents
6690: * need not declare any of the following entities: amp, lt,
6691: * gt, apos, quot.
6692: * The declaration of a parameter entity must precede any
6693: * reference to it.
6694: * Similarly, the declaration of a general entity must
6695: * precede any reference to it which appears in a default
6696: * value in an attribute-list declaration. Note that if
6697: * entities are declared in the external subset or in
6698: * external parameter entities, a non-validating processor
6699: * is not obligated to read and process their declarations;
6700: * for such documents, the rule that an entity must be
6701: * declared is a well-formedness constraint only if
6702: * standalone='yes'.
6703: */
6704: if (ent == NULL) {
6705: if ((ctxt->standalone == 1) ||
6706: ((ctxt->hasExternalSubset == 0) &&
6707: (ctxt->hasPErefs == 0))) {
6708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6709: ctxt->sax->error(ctxt->userData,
6710: "Entity '%s' not defined\n", name);
6711: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6712: ctxt->wellFormed = 0;
1.180 daniel 6713: ctxt->disableSAX = 1;
1.135 daniel 6714: } else {
6715: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6716: ctxt->sax->warning(ctxt->userData,
6717: "Entity '%s' not defined\n", name);
6718: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6719: }
6720: }
6721:
6722: /*
6723: * [ WFC: Parsed Entity ]
6724: * An entity reference must not contain the name of an
6725: * unparsed entity
6726: */
1.159 daniel 6727: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6729: ctxt->sax->error(ctxt->userData,
6730: "Entity reference to unparsed entity %s\n", name);
6731: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6732: ctxt->wellFormed = 0;
1.180 daniel 6733: ctxt->disableSAX = 1;
1.135 daniel 6734: }
6735:
6736: /*
6737: * [ WFC: No External Entity References ]
6738: * Attribute values cannot contain direct or indirect
6739: * entity references to external entities.
6740: */
6741: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6742: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6743: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6744: ctxt->sax->error(ctxt->userData,
6745: "Attribute references external entity '%s'\n", name);
6746: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6747: ctxt->wellFormed = 0;
1.180 daniel 6748: ctxt->disableSAX = 1;
1.135 daniel 6749: }
6750: /*
6751: * [ WFC: No < in Attribute Values ]
6752: * The replacement text of any entity referred to directly or
6753: * indirectly in an attribute value (other than "<") must
6754: * not contain a <.
6755: */
6756: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6757: (ent != NULL) &&
6758: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6759: (ent->content != NULL) &&
6760: (xmlStrchr(ent->content, '<'))) {
6761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6762: ctxt->sax->error(ctxt->userData,
6763: "'<' in entity '%s' is not allowed in attributes values\n", name);
6764: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6765: ctxt->wellFormed = 0;
1.180 daniel 6766: ctxt->disableSAX = 1;
1.135 daniel 6767: }
6768:
6769: /*
6770: * Internal check, no parameter entities here ...
6771: */
6772: else {
1.159 daniel 6773: switch (ent->etype) {
1.135 daniel 6774: case XML_INTERNAL_PARAMETER_ENTITY:
6775: case XML_EXTERNAL_PARAMETER_ENTITY:
6776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6777: ctxt->sax->error(ctxt->userData,
6778: "Attempt to reference the parameter entity '%s'\n", name);
6779: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6780: ctxt->wellFormed = 0;
1.180 daniel 6781: ctxt->disableSAX = 1;
6782: break;
6783: default:
1.135 daniel 6784: break;
6785: }
6786: }
6787:
6788: /*
6789: * [ WFC: No Recursion ]
6790: * TODO A parsed entity must not contain a recursive reference
6791: * to itself, either directly or indirectly.
6792: */
6793:
6794: } else {
6795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6796: ctxt->sax->error(ctxt->userData,
6797: "xmlParseEntityRef: expecting ';'\n");
6798: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6799: ctxt->wellFormed = 0;
1.180 daniel 6800: ctxt->disableSAX = 1;
1.135 daniel 6801: }
6802: xmlFree(name);
6803: }
6804: }
1.185 daniel 6805: *str = ptr;
1.135 daniel 6806: return(ent);
6807: }
1.24 daniel 6808:
1.50 daniel 6809: /**
6810: * xmlParsePEReference:
6811: * @ctxt: an XML parser context
6812: *
6813: * parse PEReference declarations
1.77 daniel 6814: * The entity content is handled directly by pushing it's content as
6815: * a new input stream.
1.22 daniel 6816: *
6817: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6818: *
1.98 daniel 6819: * [ WFC: No Recursion ]
6820: * TODO A parsed entity must not contain a recursive
6821: * reference to itself, either directly or indirectly.
6822: *
6823: * [ WFC: Entity Declared ]
6824: * In a document without any DTD, a document with only an internal DTD
6825: * subset which contains no parameter entity references, or a document
6826: * with "standalone='yes'", ... ... The declaration of a parameter
6827: * entity must precede any reference to it...
6828: *
6829: * [ VC: Entity Declared ]
6830: * In a document with an external subset or external parameter entities
6831: * with "standalone='no'", ... ... The declaration of a parameter entity
6832: * must precede any reference to it...
6833: *
6834: * [ WFC: In DTD ]
6835: * Parameter-entity references may only appear in the DTD.
6836: * NOTE: misleading but this is handled.
1.22 daniel 6837: */
1.77 daniel 6838: void
1.55 daniel 6839: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6840: xmlChar *name;
1.72 daniel 6841: xmlEntityPtr entity = NULL;
1.50 daniel 6842: xmlParserInputPtr input;
1.22 daniel 6843:
1.152 daniel 6844: if (RAW == '%') {
1.40 daniel 6845: NEXT;
1.22 daniel 6846: name = xmlParseName(ctxt);
6847: if (name == NULL) {
1.55 daniel 6848: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6849: ctxt->sax->error(ctxt->userData,
6850: "xmlParsePEReference: no name\n");
1.123 daniel 6851: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6852: ctxt->wellFormed = 0;
1.180 daniel 6853: ctxt->disableSAX = 1;
1.22 daniel 6854: } else {
1.152 daniel 6855: if (RAW == ';') {
1.40 daniel 6856: NEXT;
1.98 daniel 6857: if ((ctxt->sax != NULL) &&
6858: (ctxt->sax->getParameterEntity != NULL))
6859: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6860: name);
1.45 daniel 6861: if (entity == NULL) {
1.98 daniel 6862: /*
6863: * [ WFC: Entity Declared ]
6864: * In a document without any DTD, a document with only an
6865: * internal DTD subset which contains no parameter entity
6866: * references, or a document with "standalone='yes'", ...
6867: * ... The declaration of a parameter entity must precede
6868: * any reference to it...
6869: */
6870: if ((ctxt->standalone == 1) ||
6871: ((ctxt->hasExternalSubset == 0) &&
6872: (ctxt->hasPErefs == 0))) {
6873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6874: ctxt->sax->error(ctxt->userData,
6875: "PEReference: %%%s; not found\n", name);
1.123 daniel 6876: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6877: ctxt->wellFormed = 0;
1.180 daniel 6878: ctxt->disableSAX = 1;
1.98 daniel 6879: } else {
6880: /*
6881: * [ VC: Entity Declared ]
6882: * In a document with an external subset or external
6883: * parameter entities with "standalone='no'", ...
6884: * ... The declaration of a parameter entity must precede
6885: * any reference to it...
6886: */
6887: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6888: ctxt->sax->warning(ctxt->userData,
6889: "PEReference: %%%s; not found\n", name);
6890: ctxt->valid = 0;
6891: }
1.50 daniel 6892: } else {
1.98 daniel 6893: /*
6894: * Internal checking in case the entity quest barfed
6895: */
1.159 daniel 6896: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6897: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 6898: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6899: ctxt->sax->warning(ctxt->userData,
6900: "Internal: %%%s; is not a parameter entity\n", name);
6901: } else {
1.164 daniel 6902: /*
6903: * TODO !!!
6904: * handle the extra spaces added before and after
6905: * c.f. http://www.w3.org/TR/REC-xml#as-PE
6906: */
1.98 daniel 6907: input = xmlNewEntityInputStream(ctxt, entity);
6908: xmlPushInput(ctxt, input);
1.164 daniel 6909: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6910: (RAW == '<') && (NXT(1) == '?') &&
6911: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6912: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6913: xmlParseTextDecl(ctxt);
1.164 daniel 6914: }
6915: if (ctxt->token == 0)
6916: ctxt->token = ' ';
1.98 daniel 6917: }
1.45 daniel 6918: }
1.98 daniel 6919: ctxt->hasPErefs = 1;
1.22 daniel 6920: } else {
1.55 daniel 6921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6922: ctxt->sax->error(ctxt->userData,
1.59 daniel 6923: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 6924: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6925: ctxt->wellFormed = 0;
1.180 daniel 6926: ctxt->disableSAX = 1;
1.22 daniel 6927: }
1.119 daniel 6928: xmlFree(name);
1.3 veillard 6929: }
6930: }
6931: }
6932:
1.50 daniel 6933: /**
1.135 daniel 6934: * xmlParseStringPEReference:
6935: * @ctxt: an XML parser context
6936: * @str: a pointer to an index in the string
6937: *
6938: * parse PEReference declarations
6939: *
6940: * [69] PEReference ::= '%' Name ';'
6941: *
6942: * [ WFC: No Recursion ]
6943: * TODO A parsed entity must not contain a recursive
6944: * reference to itself, either directly or indirectly.
6945: *
6946: * [ WFC: Entity Declared ]
6947: * In a document without any DTD, a document with only an internal DTD
6948: * subset which contains no parameter entity references, or a document
6949: * with "standalone='yes'", ... ... The declaration of a parameter
6950: * entity must precede any reference to it...
6951: *
6952: * [ VC: Entity Declared ]
6953: * In a document with an external subset or external parameter entities
6954: * with "standalone='no'", ... ... The declaration of a parameter entity
6955: * must precede any reference to it...
6956: *
6957: * [ WFC: In DTD ]
6958: * Parameter-entity references may only appear in the DTD.
6959: * NOTE: misleading but this is handled.
6960: *
6961: * Returns the string of the entity content.
6962: * str is updated to the current value of the index
6963: */
6964: xmlEntityPtr
6965: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6966: const xmlChar *ptr;
6967: xmlChar cur;
6968: xmlChar *name;
6969: xmlEntityPtr entity = NULL;
6970:
6971: if ((str == NULL) || (*str == NULL)) return(NULL);
6972: ptr = *str;
6973: cur = *ptr;
6974: if (cur == '%') {
6975: ptr++;
6976: cur = *ptr;
6977: name = xmlParseStringName(ctxt, &ptr);
6978: if (name == NULL) {
6979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6980: ctxt->sax->error(ctxt->userData,
6981: "xmlParseStringPEReference: no name\n");
6982: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6983: ctxt->wellFormed = 0;
1.180 daniel 6984: ctxt->disableSAX = 1;
1.135 daniel 6985: } else {
6986: cur = *ptr;
6987: if (cur == ';') {
6988: ptr++;
6989: cur = *ptr;
6990: if ((ctxt->sax != NULL) &&
6991: (ctxt->sax->getParameterEntity != NULL))
6992: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6993: name);
6994: if (entity == NULL) {
6995: /*
6996: * [ WFC: Entity Declared ]
6997: * In a document without any DTD, a document with only an
6998: * internal DTD subset which contains no parameter entity
6999: * references, or a document with "standalone='yes'", ...
7000: * ... The declaration of a parameter entity must precede
7001: * any reference to it...
7002: */
7003: if ((ctxt->standalone == 1) ||
7004: ((ctxt->hasExternalSubset == 0) &&
7005: (ctxt->hasPErefs == 0))) {
7006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7007: ctxt->sax->error(ctxt->userData,
7008: "PEReference: %%%s; not found\n", name);
7009: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7010: ctxt->wellFormed = 0;
1.180 daniel 7011: ctxt->disableSAX = 1;
1.135 daniel 7012: } else {
7013: /*
7014: * [ VC: Entity Declared ]
7015: * In a document with an external subset or external
7016: * parameter entities with "standalone='no'", ...
7017: * ... The declaration of a parameter entity must
7018: * precede any reference to it...
7019: */
7020: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7021: ctxt->sax->warning(ctxt->userData,
7022: "PEReference: %%%s; not found\n", name);
7023: ctxt->valid = 0;
7024: }
7025: } else {
7026: /*
7027: * Internal checking in case the entity quest barfed
7028: */
1.159 daniel 7029: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7030: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7031: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7032: ctxt->sax->warning(ctxt->userData,
7033: "Internal: %%%s; is not a parameter entity\n", name);
7034: }
7035: }
7036: ctxt->hasPErefs = 1;
7037: } else {
7038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7039: ctxt->sax->error(ctxt->userData,
7040: "xmlParseStringPEReference: expecting ';'\n");
7041: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7042: ctxt->wellFormed = 0;
1.180 daniel 7043: ctxt->disableSAX = 1;
1.135 daniel 7044: }
7045: xmlFree(name);
7046: }
7047: }
7048: *str = ptr;
7049: return(entity);
7050: }
7051:
7052: /**
1.181 daniel 7053: * xmlParseDocTypeDecl:
1.50 daniel 7054: * @ctxt: an XML parser context
7055: *
7056: * parse a DOCTYPE declaration
1.21 daniel 7057: *
1.22 daniel 7058: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7059: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7060: *
7061: * [ VC: Root Element Type ]
1.99 daniel 7062: * The Name in the document type declaration must match the element
1.98 daniel 7063: * type of the root element.
1.21 daniel 7064: */
7065:
1.55 daniel 7066: void
7067: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7068: xmlChar *name = NULL;
1.123 daniel 7069: xmlChar *ExternalID = NULL;
7070: xmlChar *URI = NULL;
1.21 daniel 7071:
7072: /*
7073: * We know that '<!DOCTYPE' has been detected.
7074: */
1.40 daniel 7075: SKIP(9);
1.21 daniel 7076:
1.42 daniel 7077: SKIP_BLANKS;
1.21 daniel 7078:
7079: /*
7080: * Parse the DOCTYPE name.
7081: */
7082: name = xmlParseName(ctxt);
7083: if (name == NULL) {
1.55 daniel 7084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7085: ctxt->sax->error(ctxt->userData,
7086: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7087: ctxt->wellFormed = 0;
1.180 daniel 7088: ctxt->disableSAX = 1;
1.123 daniel 7089: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7090: }
1.165 daniel 7091: ctxt->intSubName = name;
1.21 daniel 7092:
1.42 daniel 7093: SKIP_BLANKS;
1.21 daniel 7094:
7095: /*
1.22 daniel 7096: * Check for SystemID and ExternalID
7097: */
1.67 daniel 7098: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7099:
7100: if ((URI != NULL) || (ExternalID != NULL)) {
7101: ctxt->hasExternalSubset = 1;
7102: }
1.165 daniel 7103: ctxt->extSubURI = URI;
7104: ctxt->extSubSystem = ExternalID;
1.98 daniel 7105:
1.42 daniel 7106: SKIP_BLANKS;
1.36 daniel 7107:
1.76 daniel 7108: /*
1.165 daniel 7109: * Create and update the internal subset.
1.76 daniel 7110: */
1.171 daniel 7111: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7112: (!ctxt->disableSAX))
1.74 daniel 7113: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7114:
7115: /*
1.140 daniel 7116: * Is there any internal subset declarations ?
7117: * they are handled separately in xmlParseInternalSubset()
7118: */
1.152 daniel 7119: if (RAW == '[')
1.140 daniel 7120: return;
7121:
7122: /*
7123: * We should be at the end of the DOCTYPE declaration.
7124: */
1.152 daniel 7125: if (RAW != '>') {
1.140 daniel 7126: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7127: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7128: ctxt->wellFormed = 0;
1.180 daniel 7129: ctxt->disableSAX = 1;
1.140 daniel 7130: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7131: }
7132: NEXT;
7133: }
7134:
7135: /**
1.181 daniel 7136: * xmlParseInternalsubset:
1.140 daniel 7137: * @ctxt: an XML parser context
7138: *
7139: * parse the internal subset declaration
7140: *
7141: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7142: */
7143:
7144: void
7145: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7146: /*
1.22 daniel 7147: * Is there any DTD definition ?
7148: */
1.152 daniel 7149: if (RAW == '[') {
1.96 daniel 7150: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7151: NEXT;
1.22 daniel 7152: /*
7153: * Parse the succession of Markup declarations and
7154: * PEReferences.
7155: * Subsequence (markupdecl | PEReference | S)*
7156: */
1.152 daniel 7157: while (RAW != ']') {
1.123 daniel 7158: const xmlChar *check = CUR_PTR;
1.115 daniel 7159: int cons = ctxt->input->consumed;
1.22 daniel 7160:
1.42 daniel 7161: SKIP_BLANKS;
1.22 daniel 7162: xmlParseMarkupDecl(ctxt);
1.50 daniel 7163: xmlParsePEReference(ctxt);
1.22 daniel 7164:
1.115 daniel 7165: /*
7166: * Pop-up of finished entities.
7167: */
1.152 daniel 7168: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7169: xmlPopInput(ctxt);
7170:
1.118 daniel 7171: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7173: ctxt->sax->error(ctxt->userData,
1.140 daniel 7174: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7175: ctxt->wellFormed = 0;
1.180 daniel 7176: ctxt->disableSAX = 1;
1.123 daniel 7177: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7178: break;
7179: }
7180: }
1.152 daniel 7181: if (RAW == ']') NEXT;
1.22 daniel 7182: }
7183:
7184: /*
7185: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7186: */
1.152 daniel 7187: if (RAW != '>') {
1.55 daniel 7188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7189: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7190: ctxt->wellFormed = 0;
1.180 daniel 7191: ctxt->disableSAX = 1;
1.123 daniel 7192: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7193: }
1.40 daniel 7194: NEXT;
1.21 daniel 7195: }
7196:
1.50 daniel 7197: /**
7198: * xmlParseAttribute:
7199: * @ctxt: an XML parser context
1.123 daniel 7200: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7201: *
7202: * parse an attribute
1.3 veillard 7203: *
1.22 daniel 7204: * [41] Attribute ::= Name Eq AttValue
7205: *
1.98 daniel 7206: * [ WFC: No External Entity References ]
7207: * Attribute values cannot contain direct or indirect entity references
7208: * to external entities.
7209: *
7210: * [ WFC: No < in Attribute Values ]
7211: * The replacement text of any entity referred to directly or indirectly in
7212: * an attribute value (other than "<") must not contain a <.
7213: *
7214: * [ VC: Attribute Value Type ]
1.117 daniel 7215: * The attribute must have been declared; the value must be of the type
1.99 daniel 7216: * declared for it.
1.98 daniel 7217: *
1.22 daniel 7218: * [25] Eq ::= S? '=' S?
7219: *
1.29 daniel 7220: * With namespace:
7221: *
7222: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7223: *
7224: * Also the case QName == xmlns:??? is handled independently as a namespace
7225: * definition.
1.69 daniel 7226: *
1.72 daniel 7227: * Returns the attribute name, and the value in *value.
1.3 veillard 7228: */
7229:
1.123 daniel 7230: xmlChar *
7231: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7232: xmlChar *name, *val;
1.3 veillard 7233:
1.72 daniel 7234: *value = NULL;
7235: name = xmlParseName(ctxt);
1.22 daniel 7236: if (name == NULL) {
1.55 daniel 7237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7238: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7239: ctxt->wellFormed = 0;
1.180 daniel 7240: ctxt->disableSAX = 1;
1.123 daniel 7241: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7242: return(NULL);
1.3 veillard 7243: }
7244:
7245: /*
1.29 daniel 7246: * read the value
1.3 veillard 7247: */
1.42 daniel 7248: SKIP_BLANKS;
1.152 daniel 7249: if (RAW == '=') {
1.40 daniel 7250: NEXT;
1.42 daniel 7251: SKIP_BLANKS;
1.72 daniel 7252: val = xmlParseAttValue(ctxt);
1.96 daniel 7253: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7254: } else {
1.55 daniel 7255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7256: ctxt->sax->error(ctxt->userData,
1.59 daniel 7257: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7258: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7259: ctxt->wellFormed = 0;
1.180 daniel 7260: ctxt->disableSAX = 1;
1.170 daniel 7261: xmlFree(name);
1.52 daniel 7262: return(NULL);
1.43 daniel 7263: }
7264:
1.172 daniel 7265: /*
7266: * Check that xml:lang conforms to the specification
7267: */
7268: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7269: if (!xmlCheckLanguageID(val)) {
7270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7271: ctxt->sax->error(ctxt->userData,
7272: "Invalid value for xml:lang : %s\n", val);
7273: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7274: ctxt->wellFormed = 0;
1.180 daniel 7275: ctxt->disableSAX = 1;
1.172 daniel 7276: }
7277: }
7278:
1.176 daniel 7279: /*
7280: * Check that xml:space conforms to the specification
7281: */
7282: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7283: if (!xmlStrcmp(val, BAD_CAST "default"))
7284: *(ctxt->space) = 0;
7285: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7286: *(ctxt->space) = 1;
7287: else {
7288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7289: ctxt->sax->error(ctxt->userData,
7290: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7291: val);
7292: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7293: ctxt->wellFormed = 0;
1.180 daniel 7294: ctxt->disableSAX = 1;
1.176 daniel 7295: }
7296: }
7297:
1.72 daniel 7298: *value = val;
7299: return(name);
1.3 veillard 7300: }
7301:
1.50 daniel 7302: /**
7303: * xmlParseStartTag:
7304: * @ctxt: an XML parser context
7305: *
7306: * parse a start of tag either for rule element or
7307: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7308: *
7309: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7310: *
1.98 daniel 7311: * [ WFC: Unique Att Spec ]
7312: * No attribute name may appear more than once in the same start-tag or
7313: * empty-element tag.
7314: *
1.29 daniel 7315: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7316: *
1.98 daniel 7317: * [ WFC: Unique Att Spec ]
7318: * No attribute name may appear more than once in the same start-tag or
7319: * empty-element tag.
7320: *
1.29 daniel 7321: * With namespace:
7322: *
7323: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7324: *
7325: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7326: *
1.129 daniel 7327: * Returne the element name parsed
1.2 veillard 7328: */
7329:
1.123 daniel 7330: xmlChar *
1.69 daniel 7331: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7332: xmlChar *name;
7333: xmlChar *attname;
7334: xmlChar *attvalue;
7335: const xmlChar **atts = NULL;
1.72 daniel 7336: int nbatts = 0;
7337: int maxatts = 0;
7338: int i;
1.2 veillard 7339:
1.152 daniel 7340: if (RAW != '<') return(NULL);
1.40 daniel 7341: NEXT;
1.3 veillard 7342:
1.72 daniel 7343: name = xmlParseName(ctxt);
1.59 daniel 7344: if (name == NULL) {
7345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7346: ctxt->sax->error(ctxt->userData,
1.59 daniel 7347: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7348: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7349: ctxt->wellFormed = 0;
1.180 daniel 7350: ctxt->disableSAX = 1;
1.83 daniel 7351: return(NULL);
1.50 daniel 7352: }
7353:
7354: /*
1.3 veillard 7355: * Now parse the attributes, it ends up with the ending
7356: *
7357: * (S Attribute)* S?
7358: */
1.42 daniel 7359: SKIP_BLANKS;
1.91 daniel 7360: GROW;
1.168 daniel 7361:
1.153 daniel 7362: while ((IS_CHAR(RAW)) &&
1.152 daniel 7363: (RAW != '>') &&
7364: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7365: const xmlChar *q = CUR_PTR;
1.91 daniel 7366: int cons = ctxt->input->consumed;
1.29 daniel 7367:
1.72 daniel 7368: attname = xmlParseAttribute(ctxt, &attvalue);
7369: if ((attname != NULL) && (attvalue != NULL)) {
7370: /*
1.98 daniel 7371: * [ WFC: Unique Att Spec ]
7372: * No attribute name may appear more than once in the same
7373: * start-tag or empty-element tag.
1.72 daniel 7374: */
7375: for (i = 0; i < nbatts;i += 2) {
7376: if (!xmlStrcmp(atts[i], attname)) {
7377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7378: ctxt->sax->error(ctxt->userData,
7379: "Attribute %s redefined\n",
7380: attname);
1.72 daniel 7381: ctxt->wellFormed = 0;
1.180 daniel 7382: ctxt->disableSAX = 1;
1.123 daniel 7383: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7384: xmlFree(attname);
7385: xmlFree(attvalue);
1.98 daniel 7386: goto failed;
1.72 daniel 7387: }
7388: }
7389:
7390: /*
7391: * Add the pair to atts
7392: */
7393: if (atts == NULL) {
7394: maxatts = 10;
1.123 daniel 7395: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7396: if (atts == NULL) {
1.86 daniel 7397: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7398: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7399: return(NULL);
1.72 daniel 7400: }
1.127 daniel 7401: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7402: maxatts *= 2;
1.123 daniel 7403: atts = (const xmlChar **) xmlRealloc(atts,
7404: maxatts * sizeof(xmlChar *));
1.72 daniel 7405: if (atts == NULL) {
1.86 daniel 7406: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7407: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7408: return(NULL);
1.72 daniel 7409: }
7410: }
7411: atts[nbatts++] = attname;
7412: atts[nbatts++] = attvalue;
7413: atts[nbatts] = NULL;
7414: atts[nbatts + 1] = NULL;
1.176 daniel 7415: } else {
7416: if (attname != NULL)
7417: xmlFree(attname);
7418: if (attvalue != NULL)
7419: xmlFree(attvalue);
1.72 daniel 7420: }
7421:
1.116 daniel 7422: failed:
1.168 daniel 7423:
7424: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7425: break;
7426: if (!IS_BLANK(RAW)) {
7427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7428: ctxt->sax->error(ctxt->userData,
7429: "attributes construct error\n");
7430: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7431: ctxt->wellFormed = 0;
1.180 daniel 7432: ctxt->disableSAX = 1;
1.168 daniel 7433: }
1.42 daniel 7434: SKIP_BLANKS;
1.91 daniel 7435: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7437: ctxt->sax->error(ctxt->userData,
1.31 daniel 7438: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7439: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7440: ctxt->wellFormed = 0;
1.180 daniel 7441: ctxt->disableSAX = 1;
1.29 daniel 7442: break;
1.3 veillard 7443: }
1.91 daniel 7444: GROW;
1.3 veillard 7445: }
7446:
1.43 daniel 7447: /*
1.72 daniel 7448: * SAX: Start of Element !
1.43 daniel 7449: */
1.171 daniel 7450: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7451: (!ctxt->disableSAX))
1.74 daniel 7452: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7453:
1.72 daniel 7454: if (atts != NULL) {
1.123 daniel 7455: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7456: xmlFree(atts);
1.72 daniel 7457: }
1.83 daniel 7458: return(name);
1.3 veillard 7459: }
7460:
1.50 daniel 7461: /**
7462: * xmlParseEndTag:
7463: * @ctxt: an XML parser context
7464: *
7465: * parse an end of tag
1.27 daniel 7466: *
7467: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7468: *
7469: * With namespace
7470: *
1.72 daniel 7471: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7472: */
7473:
1.55 daniel 7474: void
1.140 daniel 7475: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7476: xmlChar *name;
1.140 daniel 7477: xmlChar *oldname;
1.7 veillard 7478:
1.91 daniel 7479: GROW;
1.152 daniel 7480: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7482: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7483: ctxt->wellFormed = 0;
1.180 daniel 7484: ctxt->disableSAX = 1;
1.123 daniel 7485: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7486: return;
7487: }
1.40 daniel 7488: SKIP(2);
1.7 veillard 7489:
1.72 daniel 7490: name = xmlParseName(ctxt);
1.7 veillard 7491:
7492: /*
7493: * We should definitely be at the ending "S? '>'" part
7494: */
1.91 daniel 7495: GROW;
1.42 daniel 7496: SKIP_BLANKS;
1.153 daniel 7497: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7498: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7499: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7500: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7501: ctxt->wellFormed = 0;
1.180 daniel 7502: ctxt->disableSAX = 1;
1.7 veillard 7503: } else
1.40 daniel 7504: NEXT;
1.7 veillard 7505:
1.72 daniel 7506: /*
1.98 daniel 7507: * [ WFC: Element Type Match ]
7508: * The Name in an element's end-tag must match the element type in the
7509: * start-tag.
7510: *
1.83 daniel 7511: */
1.147 daniel 7512: if ((name == NULL) || (ctxt->name == NULL) ||
7513: (xmlStrcmp(name, ctxt->name))) {
7514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7515: if ((name != NULL) && (ctxt->name != NULL)) {
7516: ctxt->sax->error(ctxt->userData,
7517: "Opening and ending tag mismatch: %s and %s\n",
7518: ctxt->name, name);
7519: } else if (ctxt->name != NULL) {
7520: ctxt->sax->error(ctxt->userData,
7521: "Ending tag eror for: %s\n", ctxt->name);
7522: } else {
7523: ctxt->sax->error(ctxt->userData,
7524: "Ending tag error: internal error ???\n");
7525: }
1.122 daniel 7526:
1.147 daniel 7527: }
1.123 daniel 7528: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7529: ctxt->wellFormed = 0;
1.180 daniel 7530: ctxt->disableSAX = 1;
1.83 daniel 7531: }
7532:
7533: /*
1.72 daniel 7534: * SAX: End of Tag
7535: */
1.171 daniel 7536: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7537: (!ctxt->disableSAX))
1.74 daniel 7538: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7539:
7540: if (name != NULL)
1.119 daniel 7541: xmlFree(name);
1.140 daniel 7542: oldname = namePop(ctxt);
1.176 daniel 7543: spacePop(ctxt);
1.140 daniel 7544: if (oldname != NULL) {
7545: #ifdef DEBUG_STACK
7546: fprintf(stderr,"Close: popped %s\n", oldname);
7547: #endif
7548: xmlFree(oldname);
7549: }
1.7 veillard 7550: return;
7551: }
7552:
1.50 daniel 7553: /**
7554: * xmlParseCDSect:
7555: * @ctxt: an XML parser context
7556: *
7557: * Parse escaped pure raw content.
1.29 daniel 7558: *
7559: * [18] CDSect ::= CDStart CData CDEnd
7560: *
7561: * [19] CDStart ::= '<![CDATA['
7562: *
7563: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7564: *
7565: * [21] CDEnd ::= ']]>'
1.3 veillard 7566: */
1.55 daniel 7567: void
7568: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7569: xmlChar *buf = NULL;
7570: int len = 0;
1.140 daniel 7571: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7572: int r, rl;
7573: int s, sl;
7574: int cur, l;
1.3 veillard 7575:
1.106 daniel 7576: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7577: (NXT(2) == '[') && (NXT(3) == 'C') &&
7578: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7579: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7580: (NXT(8) == '[')) {
7581: SKIP(9);
1.29 daniel 7582: } else
1.45 daniel 7583: return;
1.109 daniel 7584:
7585: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7586: r = CUR_CHAR(rl);
7587: if (!IS_CHAR(r)) {
1.55 daniel 7588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7589: ctxt->sax->error(ctxt->userData,
1.135 daniel 7590: "CData section not finished\n");
1.59 daniel 7591: ctxt->wellFormed = 0;
1.180 daniel 7592: ctxt->disableSAX = 1;
1.123 daniel 7593: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7594: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7595: return;
1.3 veillard 7596: }
1.152 daniel 7597: NEXTL(rl);
7598: s = CUR_CHAR(sl);
7599: if (!IS_CHAR(s)) {
1.55 daniel 7600: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7601: ctxt->sax->error(ctxt->userData,
1.135 daniel 7602: "CData section not finished\n");
1.123 daniel 7603: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7604: ctxt->wellFormed = 0;
1.180 daniel 7605: ctxt->disableSAX = 1;
1.109 daniel 7606: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7607: return;
1.3 veillard 7608: }
1.152 daniel 7609: NEXTL(sl);
7610: cur = CUR_CHAR(l);
1.135 daniel 7611: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7612: if (buf == NULL) {
7613: fprintf(stderr, "malloc of %d byte failed\n", size);
7614: return;
7615: }
1.108 veillard 7616: while (IS_CHAR(cur) &&
1.110 daniel 7617: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7618: if (len + 5 >= size) {
1.135 daniel 7619: size *= 2;
7620: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7621: if (buf == NULL) {
7622: fprintf(stderr, "realloc of %d byte failed\n", size);
7623: return;
7624: }
7625: }
1.152 daniel 7626: COPY_BUF(rl,buf,len,r);
1.110 daniel 7627: r = s;
1.152 daniel 7628: rl = sl;
1.110 daniel 7629: s = cur;
1.152 daniel 7630: sl = l;
7631: NEXTL(l);
7632: cur = CUR_CHAR(l);
1.3 veillard 7633: }
1.135 daniel 7634: buf[len] = 0;
1.109 daniel 7635: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7636: if (cur != '>') {
1.55 daniel 7637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7638: ctxt->sax->error(ctxt->userData,
1.135 daniel 7639: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7640: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7641: ctxt->wellFormed = 0;
1.180 daniel 7642: ctxt->disableSAX = 1;
1.135 daniel 7643: xmlFree(buf);
1.45 daniel 7644: return;
1.3 veillard 7645: }
1.152 daniel 7646: NEXTL(l);
1.16 daniel 7647:
1.45 daniel 7648: /*
1.135 daniel 7649: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7650: */
1.171 daniel 7651: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7652: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7653: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7654: }
1.135 daniel 7655: xmlFree(buf);
1.2 veillard 7656: }
7657:
1.50 daniel 7658: /**
7659: * xmlParseContent:
7660: * @ctxt: an XML parser context
7661: *
7662: * Parse a content:
1.2 veillard 7663: *
1.27 daniel 7664: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7665: */
7666:
1.55 daniel 7667: void
7668: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7669: GROW;
1.176 daniel 7670: while (((RAW != 0) || (ctxt->token != 0)) &&
7671: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7672: const xmlChar *test = CUR_PTR;
1.91 daniel 7673: int cons = ctxt->input->consumed;
1.123 daniel 7674: xmlChar tok = ctxt->token;
1.27 daniel 7675:
7676: /*
1.152 daniel 7677: * Handle possible processed charrefs.
7678: */
7679: if (ctxt->token != 0) {
7680: xmlParseCharData(ctxt, 0);
7681: }
7682: /*
1.27 daniel 7683: * First case : a Processing Instruction.
7684: */
1.152 daniel 7685: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7686: xmlParsePI(ctxt);
7687: }
1.72 daniel 7688:
1.27 daniel 7689: /*
7690: * Second case : a CDSection
7691: */
1.152 daniel 7692: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7693: (NXT(2) == '[') && (NXT(3) == 'C') &&
7694: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7695: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7696: (NXT(8) == '[')) {
1.45 daniel 7697: xmlParseCDSect(ctxt);
1.27 daniel 7698: }
1.72 daniel 7699:
1.27 daniel 7700: /*
7701: * Third case : a comment
7702: */
1.152 daniel 7703: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7704: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7705: xmlParseComment(ctxt);
1.97 daniel 7706: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7707: }
1.72 daniel 7708:
1.27 daniel 7709: /*
7710: * Fourth case : a sub-element.
7711: */
1.152 daniel 7712: else if (RAW == '<') {
1.72 daniel 7713: xmlParseElement(ctxt);
1.45 daniel 7714: }
1.72 daniel 7715:
1.45 daniel 7716: /*
1.50 daniel 7717: * Fifth case : a reference. If if has not been resolved,
7718: * parsing returns it's Name, create the node
1.45 daniel 7719: */
1.97 daniel 7720:
1.152 daniel 7721: else if (RAW == '&') {
1.77 daniel 7722: xmlParseReference(ctxt);
1.27 daniel 7723: }
1.72 daniel 7724:
1.27 daniel 7725: /*
7726: * Last case, text. Note that References are handled directly.
7727: */
7728: else {
1.45 daniel 7729: xmlParseCharData(ctxt, 0);
1.3 veillard 7730: }
1.14 veillard 7731:
1.91 daniel 7732: GROW;
1.14 veillard 7733: /*
1.45 daniel 7734: * Pop-up of finished entities.
1.14 veillard 7735: */
1.152 daniel 7736: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7737: xmlPopInput(ctxt);
1.135 daniel 7738: SHRINK;
1.45 daniel 7739:
1.113 daniel 7740: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7741: (tok == ctxt->token)) {
1.55 daniel 7742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7743: ctxt->sax->error(ctxt->userData,
1.59 daniel 7744: "detected an error in element content\n");
1.123 daniel 7745: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7746: ctxt->wellFormed = 0;
1.180 daniel 7747: ctxt->disableSAX = 1;
1.29 daniel 7748: break;
7749: }
1.3 veillard 7750: }
1.2 veillard 7751: }
7752:
1.50 daniel 7753: /**
7754: * xmlParseElement:
7755: * @ctxt: an XML parser context
7756: *
7757: * parse an XML element, this is highly recursive
1.26 daniel 7758: *
7759: * [39] element ::= EmptyElemTag | STag content ETag
7760: *
1.98 daniel 7761: * [ WFC: Element Type Match ]
7762: * The Name in an element's end-tag must match the element type in the
7763: * start-tag.
7764: *
7765: * [ VC: Element Valid ]
1.117 daniel 7766: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7767: * where the Name matches the element type and one of the following holds:
7768: * - The declaration matches EMPTY and the element has no content.
7769: * - The declaration matches children and the sequence of child elements
7770: * belongs to the language generated by the regular expression in the
7771: * content model, with optional white space (characters matching the
7772: * nonterminal S) between each pair of child elements.
7773: * - The declaration matches Mixed and the content consists of character
7774: * data and child elements whose types match names in the content model.
7775: * - The declaration matches ANY, and the types of any child elements have
7776: * been declared.
1.2 veillard 7777: */
1.26 daniel 7778:
1.72 daniel 7779: void
1.69 daniel 7780: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7781: const xmlChar *openTag = CUR_PTR;
7782: xmlChar *name;
1.140 daniel 7783: xmlChar *oldname;
1.32 daniel 7784: xmlParserNodeInfo node_info;
1.118 daniel 7785: xmlNodePtr ret;
1.2 veillard 7786:
1.32 daniel 7787: /* Capture start position */
1.118 daniel 7788: if (ctxt->record_info) {
7789: node_info.begin_pos = ctxt->input->consumed +
7790: (CUR_PTR - ctxt->input->base);
7791: node_info.begin_line = ctxt->input->line;
7792: }
1.32 daniel 7793:
1.176 daniel 7794: if (ctxt->spaceNr == 0)
7795: spacePush(ctxt, -1);
7796: else
7797: spacePush(ctxt, *ctxt->space);
7798:
1.83 daniel 7799: name = xmlParseStartTag(ctxt);
7800: if (name == NULL) {
1.176 daniel 7801: spacePop(ctxt);
1.83 daniel 7802: return;
7803: }
1.140 daniel 7804: namePush(ctxt, name);
1.118 daniel 7805: ret = ctxt->node;
1.2 veillard 7806:
7807: /*
1.99 daniel 7808: * [ VC: Root Element Type ]
7809: * The Name in the document type declaration must match the element
7810: * type of the root element.
7811: */
1.105 daniel 7812: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7813: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7814: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7815:
7816: /*
1.2 veillard 7817: * Check for an Empty Element.
7818: */
1.152 daniel 7819: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7820: SKIP(2);
1.171 daniel 7821: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7822: (!ctxt->disableSAX))
1.83 daniel 7823: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7824: oldname = namePop(ctxt);
1.176 daniel 7825: spacePop(ctxt);
1.140 daniel 7826: if (oldname != NULL) {
7827: #ifdef DEBUG_STACK
7828: fprintf(stderr,"Close: popped %s\n", oldname);
7829: #endif
7830: xmlFree(oldname);
7831: }
1.72 daniel 7832: return;
1.2 veillard 7833: }
1.152 daniel 7834: if (RAW == '>') {
1.91 daniel 7835: NEXT;
7836: } else {
1.55 daniel 7837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7838: ctxt->sax->error(ctxt->userData,
7839: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7840: openTag);
1.59 daniel 7841: ctxt->wellFormed = 0;
1.180 daniel 7842: ctxt->disableSAX = 1;
1.123 daniel 7843: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7844:
7845: /*
7846: * end of parsing of this node.
7847: */
7848: nodePop(ctxt);
1.140 daniel 7849: oldname = namePop(ctxt);
1.176 daniel 7850: spacePop(ctxt);
1.140 daniel 7851: if (oldname != NULL) {
7852: #ifdef DEBUG_STACK
7853: fprintf(stderr,"Close: popped %s\n", oldname);
7854: #endif
7855: xmlFree(oldname);
7856: }
1.118 daniel 7857:
7858: /*
7859: * Capture end position and add node
7860: */
7861: if ( ret != NULL && ctxt->record_info ) {
7862: node_info.end_pos = ctxt->input->consumed +
7863: (CUR_PTR - ctxt->input->base);
7864: node_info.end_line = ctxt->input->line;
7865: node_info.node = ret;
7866: xmlParserAddNodeInfo(ctxt, &node_info);
7867: }
1.72 daniel 7868: return;
1.2 veillard 7869: }
7870:
7871: /*
7872: * Parse the content of the element:
7873: */
1.45 daniel 7874: xmlParseContent(ctxt);
1.153 daniel 7875: if (!IS_CHAR(RAW)) {
1.55 daniel 7876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7877: ctxt->sax->error(ctxt->userData,
1.57 daniel 7878: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7879: ctxt->wellFormed = 0;
1.180 daniel 7880: ctxt->disableSAX = 1;
1.123 daniel 7881: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7882:
7883: /*
7884: * end of parsing of this node.
7885: */
7886: nodePop(ctxt);
1.140 daniel 7887: oldname = namePop(ctxt);
1.176 daniel 7888: spacePop(ctxt);
1.140 daniel 7889: if (oldname != NULL) {
7890: #ifdef DEBUG_STACK
7891: fprintf(stderr,"Close: popped %s\n", oldname);
7892: #endif
7893: xmlFree(oldname);
7894: }
1.72 daniel 7895: return;
1.2 veillard 7896: }
7897:
7898: /*
1.27 daniel 7899: * parse the end of tag: '</' should be here.
1.2 veillard 7900: */
1.140 daniel 7901: xmlParseEndTag(ctxt);
1.118 daniel 7902:
7903: /*
7904: * Capture end position and add node
7905: */
7906: if ( ret != NULL && ctxt->record_info ) {
7907: node_info.end_pos = ctxt->input->consumed +
7908: (CUR_PTR - ctxt->input->base);
7909: node_info.end_line = ctxt->input->line;
7910: node_info.node = ret;
7911: xmlParserAddNodeInfo(ctxt, &node_info);
7912: }
1.2 veillard 7913: }
7914:
1.50 daniel 7915: /**
7916: * xmlParseVersionNum:
7917: * @ctxt: an XML parser context
7918: *
7919: * parse the XML version value.
1.29 daniel 7920: *
7921: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 7922: *
7923: * Returns the string giving the XML version number, or NULL
1.29 daniel 7924: */
1.123 daniel 7925: xmlChar *
1.55 daniel 7926: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 7927: xmlChar *buf = NULL;
7928: int len = 0;
7929: int size = 10;
7930: xmlChar cur;
1.29 daniel 7931:
1.135 daniel 7932: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7933: if (buf == NULL) {
7934: fprintf(stderr, "malloc of %d byte failed\n", size);
7935: return(NULL);
7936: }
7937: cur = CUR;
1.152 daniel 7938: while (((cur >= 'a') && (cur <= 'z')) ||
7939: ((cur >= 'A') && (cur <= 'Z')) ||
7940: ((cur >= '0') && (cur <= '9')) ||
7941: (cur == '_') || (cur == '.') ||
7942: (cur == ':') || (cur == '-')) {
1.135 daniel 7943: if (len + 1 >= size) {
7944: size *= 2;
7945: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7946: if (buf == NULL) {
7947: fprintf(stderr, "realloc of %d byte failed\n", size);
7948: return(NULL);
7949: }
7950: }
7951: buf[len++] = cur;
7952: NEXT;
7953: cur=CUR;
7954: }
7955: buf[len] = 0;
7956: return(buf);
1.29 daniel 7957: }
7958:
1.50 daniel 7959: /**
7960: * xmlParseVersionInfo:
7961: * @ctxt: an XML parser context
7962: *
7963: * parse the XML version.
1.29 daniel 7964: *
7965: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7966: *
7967: * [25] Eq ::= S? '=' S?
1.50 daniel 7968: *
1.68 daniel 7969: * Returns the version string, e.g. "1.0"
1.29 daniel 7970: */
7971:
1.123 daniel 7972: xmlChar *
1.55 daniel 7973: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 7974: xmlChar *version = NULL;
7975: const xmlChar *q;
1.29 daniel 7976:
1.152 daniel 7977: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 7978: (NXT(2) == 'r') && (NXT(3) == 's') &&
7979: (NXT(4) == 'i') && (NXT(5) == 'o') &&
7980: (NXT(6) == 'n')) {
7981: SKIP(7);
1.42 daniel 7982: SKIP_BLANKS;
1.152 daniel 7983: if (RAW != '=') {
1.55 daniel 7984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7985: ctxt->sax->error(ctxt->userData,
7986: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 7987: ctxt->wellFormed = 0;
1.180 daniel 7988: ctxt->disableSAX = 1;
1.123 daniel 7989: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 7990: return(NULL);
7991: }
1.40 daniel 7992: NEXT;
1.42 daniel 7993: SKIP_BLANKS;
1.152 daniel 7994: if (RAW == '"') {
1.40 daniel 7995: NEXT;
7996: q = CUR_PTR;
1.29 daniel 7997: version = xmlParseVersionNum(ctxt);
1.152 daniel 7998: if (RAW != '"') {
1.55 daniel 7999: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8000: ctxt->sax->error(ctxt->userData,
8001: "String not closed\n%.50s\n", q);
1.59 daniel 8002: ctxt->wellFormed = 0;
1.180 daniel 8003: ctxt->disableSAX = 1;
1.123 daniel 8004: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8005: } else
1.40 daniel 8006: NEXT;
1.152 daniel 8007: } else if (RAW == '\''){
1.40 daniel 8008: NEXT;
8009: q = CUR_PTR;
1.29 daniel 8010: version = xmlParseVersionNum(ctxt);
1.152 daniel 8011: if (RAW != '\'') {
1.55 daniel 8012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8013: ctxt->sax->error(ctxt->userData,
8014: "String not closed\n%.50s\n", q);
1.123 daniel 8015: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8016: ctxt->wellFormed = 0;
1.180 daniel 8017: ctxt->disableSAX = 1;
1.55 daniel 8018: } else
1.40 daniel 8019: NEXT;
1.31 daniel 8020: } else {
1.55 daniel 8021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8022: ctxt->sax->error(ctxt->userData,
1.59 daniel 8023: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8024: ctxt->wellFormed = 0;
1.180 daniel 8025: ctxt->disableSAX = 1;
1.123 daniel 8026: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8027: }
8028: }
8029: return(version);
8030: }
8031:
1.50 daniel 8032: /**
8033: * xmlParseEncName:
8034: * @ctxt: an XML parser context
8035: *
8036: * parse the XML encoding name
1.29 daniel 8037: *
8038: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8039: *
1.68 daniel 8040: * Returns the encoding name value or NULL
1.29 daniel 8041: */
1.123 daniel 8042: xmlChar *
1.55 daniel 8043: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8044: xmlChar *buf = NULL;
8045: int len = 0;
8046: int size = 10;
8047: xmlChar cur;
1.29 daniel 8048:
1.135 daniel 8049: cur = CUR;
8050: if (((cur >= 'a') && (cur <= 'z')) ||
8051: ((cur >= 'A') && (cur <= 'Z'))) {
8052: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8053: if (buf == NULL) {
8054: fprintf(stderr, "malloc of %d byte failed\n", size);
8055: return(NULL);
8056: }
8057:
8058: buf[len++] = cur;
1.40 daniel 8059: NEXT;
1.135 daniel 8060: cur = CUR;
1.152 daniel 8061: while (((cur >= 'a') && (cur <= 'z')) ||
8062: ((cur >= 'A') && (cur <= 'Z')) ||
8063: ((cur >= '0') && (cur <= '9')) ||
8064: (cur == '.') || (cur == '_') ||
8065: (cur == '-')) {
1.135 daniel 8066: if (len + 1 >= size) {
8067: size *= 2;
8068: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8069: if (buf == NULL) {
8070: fprintf(stderr, "realloc of %d byte failed\n", size);
8071: return(NULL);
8072: }
8073: }
8074: buf[len++] = cur;
8075: NEXT;
8076: cur = CUR;
8077: if (cur == 0) {
8078: SHRINK;
8079: GROW;
8080: cur = CUR;
8081: }
8082: }
8083: buf[len] = 0;
1.29 daniel 8084: } else {
1.55 daniel 8085: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8086: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8087: ctxt->wellFormed = 0;
1.180 daniel 8088: ctxt->disableSAX = 1;
1.123 daniel 8089: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8090: }
1.135 daniel 8091: return(buf);
1.29 daniel 8092: }
8093:
1.50 daniel 8094: /**
8095: * xmlParseEncodingDecl:
8096: * @ctxt: an XML parser context
8097: *
8098: * parse the XML encoding declaration
1.29 daniel 8099: *
8100: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8101: *
8102: * TODO: this should setup the conversion filters.
8103: *
1.68 daniel 8104: * Returns the encoding value or NULL
1.29 daniel 8105: */
8106:
1.123 daniel 8107: xmlChar *
1.55 daniel 8108: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8109: xmlChar *encoding = NULL;
8110: const xmlChar *q;
1.29 daniel 8111:
1.42 daniel 8112: SKIP_BLANKS;
1.152 daniel 8113: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8114: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8115: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8116: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8117: SKIP(8);
1.42 daniel 8118: SKIP_BLANKS;
1.152 daniel 8119: if (RAW != '=') {
1.55 daniel 8120: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8121: ctxt->sax->error(ctxt->userData,
8122: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8123: ctxt->wellFormed = 0;
1.180 daniel 8124: ctxt->disableSAX = 1;
1.123 daniel 8125: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8126: return(NULL);
8127: }
1.40 daniel 8128: NEXT;
1.42 daniel 8129: SKIP_BLANKS;
1.152 daniel 8130: if (RAW == '"') {
1.40 daniel 8131: NEXT;
8132: q = CUR_PTR;
1.29 daniel 8133: encoding = xmlParseEncName(ctxt);
1.152 daniel 8134: if (RAW != '"') {
1.55 daniel 8135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8136: ctxt->sax->error(ctxt->userData,
8137: "String not closed\n%.50s\n", q);
1.59 daniel 8138: ctxt->wellFormed = 0;
1.180 daniel 8139: ctxt->disableSAX = 1;
1.123 daniel 8140: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8141: } else
1.40 daniel 8142: NEXT;
1.152 daniel 8143: } else if (RAW == '\''){
1.40 daniel 8144: NEXT;
8145: q = CUR_PTR;
1.29 daniel 8146: encoding = xmlParseEncName(ctxt);
1.152 daniel 8147: if (RAW != '\'') {
1.55 daniel 8148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8149: ctxt->sax->error(ctxt->userData,
8150: "String not closed\n%.50s\n", q);
1.59 daniel 8151: ctxt->wellFormed = 0;
1.180 daniel 8152: ctxt->disableSAX = 1;
1.123 daniel 8153: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8154: } else
1.40 daniel 8155: NEXT;
1.152 daniel 8156: } else if (RAW == '"'){
1.55 daniel 8157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8158: ctxt->sax->error(ctxt->userData,
1.59 daniel 8159: "xmlParseEncodingDecl : expected ' or \"\n");
8160: ctxt->wellFormed = 0;
1.180 daniel 8161: ctxt->disableSAX = 1;
1.123 daniel 8162: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8163: }
8164: }
8165: return(encoding);
8166: }
8167:
1.50 daniel 8168: /**
8169: * xmlParseSDDecl:
8170: * @ctxt: an XML parser context
8171: *
8172: * parse the XML standalone declaration
1.29 daniel 8173: *
8174: * [32] SDDecl ::= S 'standalone' Eq
8175: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8176: *
8177: * [ VC: Standalone Document Declaration ]
8178: * TODO The standalone document declaration must have the value "no"
8179: * if any external markup declarations contain declarations of:
8180: * - attributes with default values, if elements to which these
8181: * attributes apply appear in the document without specifications
8182: * of values for these attributes, or
8183: * - entities (other than amp, lt, gt, apos, quot), if references
8184: * to those entities appear in the document, or
8185: * - attributes with values subject to normalization, where the
8186: * attribute appears in the document with a value which will change
8187: * as a result of normalization, or
8188: * - element types with element content, if white space occurs directly
8189: * within any instance of those types.
1.68 daniel 8190: *
8191: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8192: */
8193:
1.55 daniel 8194: int
8195: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8196: int standalone = -1;
8197:
1.42 daniel 8198: SKIP_BLANKS;
1.152 daniel 8199: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8200: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8201: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8202: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8203: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8204: SKIP(10);
1.81 daniel 8205: SKIP_BLANKS;
1.152 daniel 8206: if (RAW != '=') {
1.55 daniel 8207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8208: ctxt->sax->error(ctxt->userData,
1.59 daniel 8209: "XML standalone declaration : expected '='\n");
1.123 daniel 8210: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8211: ctxt->wellFormed = 0;
1.180 daniel 8212: ctxt->disableSAX = 1;
1.32 daniel 8213: return(standalone);
8214: }
1.40 daniel 8215: NEXT;
1.42 daniel 8216: SKIP_BLANKS;
1.152 daniel 8217: if (RAW == '\''){
1.40 daniel 8218: NEXT;
1.152 daniel 8219: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8220: standalone = 0;
1.40 daniel 8221: SKIP(2);
1.152 daniel 8222: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8223: (NXT(2) == 's')) {
1.29 daniel 8224: standalone = 1;
1.40 daniel 8225: SKIP(3);
1.29 daniel 8226: } else {
1.55 daniel 8227: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8228: ctxt->sax->error(ctxt->userData,
8229: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8230: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8231: ctxt->wellFormed = 0;
1.180 daniel 8232: ctxt->disableSAX = 1;
1.29 daniel 8233: }
1.152 daniel 8234: if (RAW != '\'') {
1.55 daniel 8235: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8236: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8237: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8238: ctxt->wellFormed = 0;
1.180 daniel 8239: ctxt->disableSAX = 1;
1.55 daniel 8240: } else
1.40 daniel 8241: NEXT;
1.152 daniel 8242: } else if (RAW == '"'){
1.40 daniel 8243: NEXT;
1.152 daniel 8244: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8245: standalone = 0;
1.40 daniel 8246: SKIP(2);
1.152 daniel 8247: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8248: (NXT(2) == 's')) {
1.29 daniel 8249: standalone = 1;
1.40 daniel 8250: SKIP(3);
1.29 daniel 8251: } else {
1.55 daniel 8252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8253: ctxt->sax->error(ctxt->userData,
1.59 daniel 8254: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8255: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8256: ctxt->wellFormed = 0;
1.180 daniel 8257: ctxt->disableSAX = 1;
1.29 daniel 8258: }
1.152 daniel 8259: if (RAW != '"') {
1.55 daniel 8260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8261: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8262: ctxt->wellFormed = 0;
1.180 daniel 8263: ctxt->disableSAX = 1;
1.123 daniel 8264: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8265: } else
1.40 daniel 8266: NEXT;
1.37 daniel 8267: } else {
1.55 daniel 8268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8269: ctxt->sax->error(ctxt->userData,
8270: "Standalone value not found\n");
1.59 daniel 8271: ctxt->wellFormed = 0;
1.180 daniel 8272: ctxt->disableSAX = 1;
1.123 daniel 8273: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8274: }
1.29 daniel 8275: }
8276: return(standalone);
8277: }
8278:
1.50 daniel 8279: /**
8280: * xmlParseXMLDecl:
8281: * @ctxt: an XML parser context
8282: *
8283: * parse an XML declaration header
1.29 daniel 8284: *
8285: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8286: */
8287:
1.55 daniel 8288: void
8289: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8290: xmlChar *version;
1.1 veillard 8291:
8292: /*
1.19 daniel 8293: * We know that '<?xml' is here.
1.1 veillard 8294: */
1.40 daniel 8295: SKIP(5);
1.1 veillard 8296:
1.153 daniel 8297: if (!IS_BLANK(RAW)) {
1.59 daniel 8298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8299: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8300: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8301: ctxt->wellFormed = 0;
1.180 daniel 8302: ctxt->disableSAX = 1;
1.59 daniel 8303: }
1.42 daniel 8304: SKIP_BLANKS;
1.1 veillard 8305:
8306: /*
1.29 daniel 8307: * We should have the VersionInfo here.
1.1 veillard 8308: */
1.29 daniel 8309: version = xmlParseVersionInfo(ctxt);
8310: if (version == NULL)
1.45 daniel 8311: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8312: ctxt->version = xmlStrdup(version);
1.119 daniel 8313: xmlFree(version);
1.29 daniel 8314:
8315: /*
8316: * We may have the encoding declaration
8317: */
1.153 daniel 8318: if (!IS_BLANK(RAW)) {
1.152 daniel 8319: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8320: SKIP(2);
8321: return;
8322: }
8323: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8324: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8325: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8326: ctxt->wellFormed = 0;
1.180 daniel 8327: ctxt->disableSAX = 1;
1.59 daniel 8328: }
1.164 daniel 8329: ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 8330:
8331: /*
1.29 daniel 8332: * We may have the standalone status.
1.1 veillard 8333: */
1.164 daniel 8334: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8335: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8336: SKIP(2);
8337: return;
8338: }
8339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8340: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8341: ctxt->wellFormed = 0;
1.180 daniel 8342: ctxt->disableSAX = 1;
1.123 daniel 8343: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8344: }
8345: SKIP_BLANKS;
1.167 daniel 8346: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8347:
1.42 daniel 8348: SKIP_BLANKS;
1.152 daniel 8349: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8350: SKIP(2);
1.152 daniel 8351: } else if (RAW == '>') {
1.31 daniel 8352: /* Deprecated old WD ... */
1.55 daniel 8353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8354: ctxt->sax->error(ctxt->userData,
8355: "XML declaration must end-up with '?>'\n");
1.59 daniel 8356: ctxt->wellFormed = 0;
1.180 daniel 8357: ctxt->disableSAX = 1;
1.123 daniel 8358: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8359: NEXT;
1.29 daniel 8360: } else {
1.55 daniel 8361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8362: ctxt->sax->error(ctxt->userData,
8363: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8364: ctxt->wellFormed = 0;
1.180 daniel 8365: ctxt->disableSAX = 1;
1.123 daniel 8366: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8367: MOVETO_ENDTAG(CUR_PTR);
8368: NEXT;
1.29 daniel 8369: }
1.1 veillard 8370: }
8371:
1.50 daniel 8372: /**
8373: * xmlParseMisc:
8374: * @ctxt: an XML parser context
8375: *
8376: * parse an XML Misc* optionnal field.
1.21 daniel 8377: *
1.22 daniel 8378: * [27] Misc ::= Comment | PI | S
1.1 veillard 8379: */
8380:
1.55 daniel 8381: void
8382: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8383: while (((RAW == '<') && (NXT(1) == '?')) ||
8384: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8385: (NXT(2) == '-') && (NXT(3) == '-')) ||
8386: IS_BLANK(CUR)) {
1.152 daniel 8387: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8388: xmlParsePI(ctxt);
1.40 daniel 8389: } else if (IS_BLANK(CUR)) {
8390: NEXT;
1.1 veillard 8391: } else
1.114 daniel 8392: xmlParseComment(ctxt);
1.1 veillard 8393: }
8394: }
8395:
1.50 daniel 8396: /**
1.181 daniel 8397: * xmlParseDocument:
1.50 daniel 8398: * @ctxt: an XML parser context
8399: *
8400: * parse an XML document (and build a tree if using the standard SAX
8401: * interface).
1.21 daniel 8402: *
1.22 daniel 8403: * [1] document ::= prolog element Misc*
1.29 daniel 8404: *
8405: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8406: *
1.68 daniel 8407: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8408: * as a result of the parsing.
1.1 veillard 8409: */
8410:
1.55 daniel 8411: int
8412: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8413: xmlChar start[4];
8414: xmlCharEncoding enc;
8415:
1.45 daniel 8416: xmlDefaultSAXHandlerInit();
8417:
1.91 daniel 8418: GROW;
8419:
1.14 veillard 8420: /*
1.44 daniel 8421: * SAX: beginning of the document processing.
8422: */
1.72 daniel 8423: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8424: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8425:
1.156 daniel 8426: /*
8427: * Get the 4 first bytes and decode the charset
8428: * if enc != XML_CHAR_ENCODING_NONE
8429: * plug some encoding conversion routines.
8430: */
8431: start[0] = RAW;
8432: start[1] = NXT(1);
8433: start[2] = NXT(2);
8434: start[3] = NXT(3);
8435: enc = xmlDetectCharEncoding(start, 4);
8436: if (enc != XML_CHAR_ENCODING_NONE) {
8437: xmlSwitchEncoding(ctxt, enc);
8438: }
8439:
1.1 veillard 8440:
1.59 daniel 8441: if (CUR == 0) {
8442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8443: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8444: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8445: ctxt->wellFormed = 0;
1.180 daniel 8446: ctxt->disableSAX = 1;
1.59 daniel 8447: }
1.1 veillard 8448:
8449: /*
8450: * Check for the XMLDecl in the Prolog.
8451: */
1.91 daniel 8452: GROW;
1.152 daniel 8453: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8454: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8455: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.19 daniel 8456: xmlParseXMLDecl(ctxt);
1.167 daniel 8457: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8458: SKIP_BLANKS;
1.164 daniel 8459: if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
8460: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8461:
1.1 veillard 8462: } else {
1.72 daniel 8463: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8464: }
1.171 daniel 8465: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8466: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8467:
8468: /*
8469: * The Misc part of the Prolog
8470: */
1.91 daniel 8471: GROW;
1.16 daniel 8472: xmlParseMisc(ctxt);
1.1 veillard 8473:
8474: /*
1.29 daniel 8475: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8476: * (doctypedecl Misc*)?
8477: */
1.91 daniel 8478: GROW;
1.152 daniel 8479: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8480: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8481: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8482: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8483: (NXT(8) == 'E')) {
1.165 daniel 8484:
1.166 daniel 8485: ctxt->inSubset = 1;
1.22 daniel 8486: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8487: if (RAW == '[') {
1.140 daniel 8488: ctxt->instate = XML_PARSER_DTD;
8489: xmlParseInternalSubset(ctxt);
8490: }
1.165 daniel 8491:
8492: /*
8493: * Create and update the external subset.
8494: */
1.166 daniel 8495: ctxt->inSubset = 2;
1.171 daniel 8496: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8497: (!ctxt->disableSAX))
1.165 daniel 8498: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8499: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8500: ctxt->inSubset = 0;
1.165 daniel 8501:
8502:
1.96 daniel 8503: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8504: xmlParseMisc(ctxt);
1.21 daniel 8505: }
8506:
8507: /*
8508: * Time to start parsing the tree itself
1.1 veillard 8509: */
1.91 daniel 8510: GROW;
1.152 daniel 8511: if (RAW != '<') {
1.59 daniel 8512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8513: ctxt->sax->error(ctxt->userData,
1.151 daniel 8514: "Start tag expected, '<' not found\n");
1.140 daniel 8515: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8516: ctxt->wellFormed = 0;
1.180 daniel 8517: ctxt->disableSAX = 1;
1.140 daniel 8518: ctxt->instate = XML_PARSER_EOF;
8519: } else {
8520: ctxt->instate = XML_PARSER_CONTENT;
8521: xmlParseElement(ctxt);
8522: ctxt->instate = XML_PARSER_EPILOG;
8523:
8524:
8525: /*
8526: * The Misc part at the end
8527: */
8528: xmlParseMisc(ctxt);
8529:
1.152 daniel 8530: if (RAW != 0) {
1.140 daniel 8531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8532: ctxt->sax->error(ctxt->userData,
8533: "Extra content at the end of the document\n");
8534: ctxt->wellFormed = 0;
1.180 daniel 8535: ctxt->disableSAX = 1;
1.140 daniel 8536: ctxt->errNo = XML_ERR_DOCUMENT_END;
8537: }
8538: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8539: }
8540:
1.44 daniel 8541: /*
8542: * SAX: end of the document processing.
8543: */
1.171 daniel 8544: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8545: (!ctxt->disableSAX))
1.74 daniel 8546: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8547:
8548: /*
8549: * Grab the encoding if it was added on-the-fly
8550: */
8551: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
8552: (ctxt->myDoc->encoding == NULL)) {
8553: ctxt->myDoc->encoding = ctxt->encoding;
8554: ctxt->encoding = NULL;
8555: }
1.59 daniel 8556: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8557: return(0);
8558: }
8559:
1.98 daniel 8560: /************************************************************************
8561: * *
1.128 daniel 8562: * Progressive parsing interfaces *
8563: * *
8564: ************************************************************************/
8565:
8566: /**
8567: * xmlParseLookupSequence:
8568: * @ctxt: an XML parser context
8569: * @first: the first char to lookup
1.140 daniel 8570: * @next: the next char to lookup or zero
8571: * @third: the next char to lookup or zero
1.128 daniel 8572: *
1.140 daniel 8573: * Try to find if a sequence (first, next, third) or just (first next) or
8574: * (first) is available in the input stream.
8575: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8576: * to avoid rescanning sequences of bytes, it DOES change the state of the
8577: * parser, do not use liberally.
1.128 daniel 8578: *
1.140 daniel 8579: * Returns the index to the current parsing point if the full sequence
8580: * is available, -1 otherwise.
1.128 daniel 8581: */
8582: int
1.140 daniel 8583: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8584: xmlChar next, xmlChar third) {
8585: int base, len;
8586: xmlParserInputPtr in;
8587: const xmlChar *buf;
8588:
8589: in = ctxt->input;
8590: if (in == NULL) return(-1);
8591: base = in->cur - in->base;
8592: if (base < 0) return(-1);
8593: if (ctxt->checkIndex > base)
8594: base = ctxt->checkIndex;
8595: if (in->buf == NULL) {
8596: buf = in->base;
8597: len = in->length;
8598: } else {
8599: buf = in->buf->buffer->content;
8600: len = in->buf->buffer->use;
8601: }
8602: /* take into account the sequence length */
8603: if (third) len -= 2;
8604: else if (next) len --;
8605: for (;base < len;base++) {
8606: if (buf[base] == first) {
8607: if (third != 0) {
8608: if ((buf[base + 1] != next) ||
8609: (buf[base + 2] != third)) continue;
8610: } else if (next != 0) {
8611: if (buf[base + 1] != next) continue;
8612: }
8613: ctxt->checkIndex = 0;
8614: #ifdef DEBUG_PUSH
8615: if (next == 0)
8616: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8617: first, base);
8618: else if (third == 0)
8619: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8620: first, next, base);
8621: else
8622: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8623: first, next, third, base);
8624: #endif
8625: return(base - (in->cur - in->base));
8626: }
8627: }
8628: ctxt->checkIndex = base;
8629: #ifdef DEBUG_PUSH
8630: if (next == 0)
8631: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8632: else if (third == 0)
8633: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8634: else
8635: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8636: #endif
8637: return(-1);
1.128 daniel 8638: }
8639:
8640: /**
1.143 daniel 8641: * xmlParseTryOrFinish:
1.128 daniel 8642: * @ctxt: an XML parser context
1.143 daniel 8643: * @terminate: last chunk indicator
1.128 daniel 8644: *
8645: * Try to progress on parsing
8646: *
8647: * Returns zero if no parsing was possible
8648: */
8649: int
1.143 daniel 8650: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8651: int ret = 0;
1.140 daniel 8652: int avail;
8653: xmlChar cur, next;
8654:
8655: #ifdef DEBUG_PUSH
8656: switch (ctxt->instate) {
8657: case XML_PARSER_EOF:
8658: fprintf(stderr, "PP: try EOF\n"); break;
8659: case XML_PARSER_START:
8660: fprintf(stderr, "PP: try START\n"); break;
8661: case XML_PARSER_MISC:
8662: fprintf(stderr, "PP: try MISC\n");break;
8663: case XML_PARSER_COMMENT:
8664: fprintf(stderr, "PP: try COMMENT\n");break;
8665: case XML_PARSER_PROLOG:
8666: fprintf(stderr, "PP: try PROLOG\n");break;
8667: case XML_PARSER_START_TAG:
8668: fprintf(stderr, "PP: try START_TAG\n");break;
8669: case XML_PARSER_CONTENT:
8670: fprintf(stderr, "PP: try CONTENT\n");break;
8671: case XML_PARSER_CDATA_SECTION:
8672: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8673: case XML_PARSER_END_TAG:
8674: fprintf(stderr, "PP: try END_TAG\n");break;
8675: case XML_PARSER_ENTITY_DECL:
8676: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8677: case XML_PARSER_ENTITY_VALUE:
8678: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8679: case XML_PARSER_ATTRIBUTE_VALUE:
8680: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8681: case XML_PARSER_DTD:
8682: fprintf(stderr, "PP: try DTD\n");break;
8683: case XML_PARSER_EPILOG:
8684: fprintf(stderr, "PP: try EPILOG\n");break;
8685: case XML_PARSER_PI:
8686: fprintf(stderr, "PP: try PI\n");break;
8687: }
8688: #endif
1.128 daniel 8689:
8690: while (1) {
1.140 daniel 8691: /*
8692: * Pop-up of finished entities.
8693: */
1.152 daniel 8694: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8695: xmlPopInput(ctxt);
8696:
1.184 daniel 8697: if (ctxt->input ==NULL) break;
8698: if (ctxt->input->buf == NULL)
8699: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8700: else
1.184 daniel 8701: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8702: if (avail < 1)
8703: goto done;
1.128 daniel 8704: switch (ctxt->instate) {
8705: case XML_PARSER_EOF:
1.140 daniel 8706: /*
8707: * Document parsing is done !
8708: */
8709: goto done;
8710: case XML_PARSER_START:
8711: /*
8712: * Very first chars read from the document flow.
8713: */
1.184 daniel 8714: cur = ctxt->input->cur[0];
1.140 daniel 8715: if (IS_BLANK(cur)) {
8716: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8717: ctxt->sax->setDocumentLocator(ctxt->userData,
8718: &xmlDefaultSAXLocator);
8719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8720: ctxt->sax->error(ctxt->userData,
8721: "Extra spaces at the beginning of the document are not allowed\n");
8722: ctxt->errNo = XML_ERR_DOCUMENT_START;
8723: ctxt->wellFormed = 0;
1.180 daniel 8724: ctxt->disableSAX = 1;
1.140 daniel 8725: SKIP_BLANKS;
8726: ret++;
1.184 daniel 8727: if (ctxt->input->buf == NULL)
8728: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8729: else
1.184 daniel 8730: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8731: }
8732: if (avail < 2)
8733: goto done;
8734:
1.184 daniel 8735: cur = ctxt->input->cur[0];
8736: next = ctxt->input->cur[1];
1.140 daniel 8737: if (cur == 0) {
8738: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8739: ctxt->sax->setDocumentLocator(ctxt->userData,
8740: &xmlDefaultSAXLocator);
8741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8742: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8743: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8744: ctxt->wellFormed = 0;
1.180 daniel 8745: ctxt->disableSAX = 1;
1.140 daniel 8746: ctxt->instate = XML_PARSER_EOF;
8747: #ifdef DEBUG_PUSH
8748: fprintf(stderr, "PP: entering EOF\n");
8749: #endif
8750: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8751: ctxt->sax->endDocument(ctxt->userData);
8752: goto done;
8753: }
8754: if ((cur == '<') && (next == '?')) {
8755: /* PI or XML decl */
8756: if (avail < 5) return(ret);
1.143 daniel 8757: if ((!terminate) &&
8758: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8759: return(ret);
8760: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8761: ctxt->sax->setDocumentLocator(ctxt->userData,
8762: &xmlDefaultSAXLocator);
1.184 daniel 8763: if ((ctxt->input->cur[2] == 'x') &&
8764: (ctxt->input->cur[3] == 'm') &&
8765: (ctxt->input->cur[4] == 'l') &&
8766: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8767: ret += 5;
8768: #ifdef DEBUG_PUSH
8769: fprintf(stderr, "PP: Parsing XML Decl\n");
8770: #endif
8771: xmlParseXMLDecl(ctxt);
1.167 daniel 8772: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8773: if ((ctxt->encoding == NULL) &&
8774: (ctxt->input->encoding != NULL))
8775: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8776: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8777: (!ctxt->disableSAX))
1.140 daniel 8778: ctxt->sax->startDocument(ctxt->userData);
8779: ctxt->instate = XML_PARSER_MISC;
8780: #ifdef DEBUG_PUSH
8781: fprintf(stderr, "PP: entering MISC\n");
8782: #endif
8783: } else {
8784: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8785: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8786: (!ctxt->disableSAX))
1.140 daniel 8787: ctxt->sax->startDocument(ctxt->userData);
8788: ctxt->instate = XML_PARSER_MISC;
8789: #ifdef DEBUG_PUSH
8790: fprintf(stderr, "PP: entering MISC\n");
8791: #endif
8792: }
8793: } else {
8794: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8795: ctxt->sax->setDocumentLocator(ctxt->userData,
8796: &xmlDefaultSAXLocator);
8797: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8798: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8799: (!ctxt->disableSAX))
1.140 daniel 8800: ctxt->sax->startDocument(ctxt->userData);
8801: ctxt->instate = XML_PARSER_MISC;
8802: #ifdef DEBUG_PUSH
8803: fprintf(stderr, "PP: entering MISC\n");
8804: #endif
8805: }
8806: break;
8807: case XML_PARSER_MISC:
8808: SKIP_BLANKS;
1.184 daniel 8809: if (ctxt->input->buf == NULL)
8810: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8811: else
1.184 daniel 8812: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8813: if (avail < 2)
8814: goto done;
1.184 daniel 8815: cur = ctxt->input->cur[0];
8816: next = ctxt->input->cur[1];
1.140 daniel 8817: if ((cur == '<') && (next == '?')) {
1.143 daniel 8818: if ((!terminate) &&
8819: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8820: goto done;
8821: #ifdef DEBUG_PUSH
8822: fprintf(stderr, "PP: Parsing PI\n");
8823: #endif
8824: xmlParsePI(ctxt);
8825: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8826: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8827: if ((!terminate) &&
8828: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8829: goto done;
8830: #ifdef DEBUG_PUSH
8831: fprintf(stderr, "PP: Parsing Comment\n");
8832: #endif
8833: xmlParseComment(ctxt);
8834: ctxt->instate = XML_PARSER_MISC;
8835: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8836: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8837: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8838: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8839: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 8840: if ((!terminate) &&
8841: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8842: goto done;
8843: #ifdef DEBUG_PUSH
8844: fprintf(stderr, "PP: Parsing internal subset\n");
8845: #endif
1.166 daniel 8846: ctxt->inSubset = 1;
1.140 daniel 8847: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8848: if (RAW == '[') {
1.140 daniel 8849: ctxt->instate = XML_PARSER_DTD;
8850: #ifdef DEBUG_PUSH
8851: fprintf(stderr, "PP: entering DTD\n");
8852: #endif
8853: } else {
1.166 daniel 8854: /*
8855: * Create and update the external subset.
8856: */
8857: ctxt->inSubset = 2;
1.171 daniel 8858: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8859: (ctxt->sax->externalSubset != NULL))
8860: ctxt->sax->externalSubset(ctxt->userData,
8861: ctxt->intSubName, ctxt->extSubSystem,
8862: ctxt->extSubURI);
8863: ctxt->inSubset = 0;
1.140 daniel 8864: ctxt->instate = XML_PARSER_PROLOG;
8865: #ifdef DEBUG_PUSH
8866: fprintf(stderr, "PP: entering PROLOG\n");
8867: #endif
8868: }
8869: } else if ((cur == '<') && (next == '!') &&
8870: (avail < 9)) {
8871: goto done;
8872: } else {
8873: ctxt->instate = XML_PARSER_START_TAG;
8874: #ifdef DEBUG_PUSH
8875: fprintf(stderr, "PP: entering START_TAG\n");
8876: #endif
8877: }
8878: break;
1.128 daniel 8879: case XML_PARSER_PROLOG:
1.140 daniel 8880: SKIP_BLANKS;
1.184 daniel 8881: if (ctxt->input->buf == NULL)
8882: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8883: else
1.184 daniel 8884: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8885: if (avail < 2)
8886: goto done;
1.184 daniel 8887: cur = ctxt->input->cur[0];
8888: next = ctxt->input->cur[1];
1.140 daniel 8889: if ((cur == '<') && (next == '?')) {
1.143 daniel 8890: if ((!terminate) &&
8891: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8892: goto done;
8893: #ifdef DEBUG_PUSH
8894: fprintf(stderr, "PP: Parsing PI\n");
8895: #endif
8896: xmlParsePI(ctxt);
8897: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8898: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8899: if ((!terminate) &&
8900: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8901: goto done;
8902: #ifdef DEBUG_PUSH
8903: fprintf(stderr, "PP: Parsing Comment\n");
8904: #endif
8905: xmlParseComment(ctxt);
8906: ctxt->instate = XML_PARSER_PROLOG;
8907: } else if ((cur == '<') && (next == '!') &&
8908: (avail < 4)) {
8909: goto done;
8910: } else {
8911: ctxt->instate = XML_PARSER_START_TAG;
8912: #ifdef DEBUG_PUSH
8913: fprintf(stderr, "PP: entering START_TAG\n");
8914: #endif
8915: }
8916: break;
8917: case XML_PARSER_EPILOG:
8918: SKIP_BLANKS;
1.184 daniel 8919: if (ctxt->input->buf == NULL)
8920: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8921: else
1.184 daniel 8922: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8923: if (avail < 2)
8924: goto done;
1.184 daniel 8925: cur = ctxt->input->cur[0];
8926: next = ctxt->input->cur[1];
1.140 daniel 8927: if ((cur == '<') && (next == '?')) {
1.143 daniel 8928: if ((!terminate) &&
8929: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8930: goto done;
8931: #ifdef DEBUG_PUSH
8932: fprintf(stderr, "PP: Parsing PI\n");
8933: #endif
8934: xmlParsePI(ctxt);
8935: ctxt->instate = XML_PARSER_EPILOG;
8936: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8937: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8938: if ((!terminate) &&
8939: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8940: goto done;
8941: #ifdef DEBUG_PUSH
8942: fprintf(stderr, "PP: Parsing Comment\n");
8943: #endif
8944: xmlParseComment(ctxt);
8945: ctxt->instate = XML_PARSER_EPILOG;
8946: } else if ((cur == '<') && (next == '!') &&
8947: (avail < 4)) {
8948: goto done;
8949: } else {
8950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8951: ctxt->sax->error(ctxt->userData,
8952: "Extra content at the end of the document\n");
8953: ctxt->wellFormed = 0;
1.180 daniel 8954: ctxt->disableSAX = 1;
1.140 daniel 8955: ctxt->errNo = XML_ERR_DOCUMENT_END;
8956: ctxt->instate = XML_PARSER_EOF;
8957: #ifdef DEBUG_PUSH
8958: fprintf(stderr, "PP: entering EOF\n");
8959: #endif
1.171 daniel 8960: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8961: (!ctxt->disableSAX))
1.140 daniel 8962: ctxt->sax->endDocument(ctxt->userData);
8963: goto done;
8964: }
8965: break;
8966: case XML_PARSER_START_TAG: {
8967: xmlChar *name, *oldname;
8968:
1.184 daniel 8969: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 8970: goto done;
1.184 daniel 8971: cur = ctxt->input->cur[0];
1.140 daniel 8972: if (cur != '<') {
8973: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8974: ctxt->sax->error(ctxt->userData,
8975: "Start tag expect, '<' not found\n");
8976: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8977: ctxt->wellFormed = 0;
1.180 daniel 8978: ctxt->disableSAX = 1;
1.140 daniel 8979: ctxt->instate = XML_PARSER_EOF;
8980: #ifdef DEBUG_PUSH
8981: fprintf(stderr, "PP: entering EOF\n");
8982: #endif
1.171 daniel 8983: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8984: (!ctxt->disableSAX))
1.140 daniel 8985: ctxt->sax->endDocument(ctxt->userData);
8986: goto done;
8987: }
1.143 daniel 8988: if ((!terminate) &&
8989: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8990: goto done;
1.176 daniel 8991: if (ctxt->spaceNr == 0)
8992: spacePush(ctxt, -1);
8993: else
8994: spacePush(ctxt, *ctxt->space);
1.140 daniel 8995: name = xmlParseStartTag(ctxt);
8996: if (name == NULL) {
1.176 daniel 8997: spacePop(ctxt);
1.140 daniel 8998: ctxt->instate = XML_PARSER_EOF;
8999: #ifdef DEBUG_PUSH
9000: fprintf(stderr, "PP: entering EOF\n");
9001: #endif
1.171 daniel 9002: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9003: (!ctxt->disableSAX))
1.140 daniel 9004: ctxt->sax->endDocument(ctxt->userData);
9005: goto done;
9006: }
9007: namePush(ctxt, xmlStrdup(name));
9008:
9009: /*
9010: * [ VC: Root Element Type ]
9011: * The Name in the document type declaration must match
9012: * the element type of the root element.
9013: */
9014: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9015: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9016: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9017:
9018: /*
9019: * Check for an Empty Element.
9020: */
1.152 daniel 9021: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9022: SKIP(2);
1.171 daniel 9023: if ((ctxt->sax != NULL) &&
9024: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9025: ctxt->sax->endElement(ctxt->userData, name);
9026: xmlFree(name);
9027: oldname = namePop(ctxt);
1.176 daniel 9028: spacePop(ctxt);
1.140 daniel 9029: if (oldname != NULL) {
9030: #ifdef DEBUG_STACK
9031: fprintf(stderr,"Close: popped %s\n", oldname);
9032: #endif
9033: xmlFree(oldname);
9034: }
9035: if (ctxt->name == NULL) {
9036: ctxt->instate = XML_PARSER_EPILOG;
9037: #ifdef DEBUG_PUSH
9038: fprintf(stderr, "PP: entering EPILOG\n");
9039: #endif
9040: } else {
9041: ctxt->instate = XML_PARSER_CONTENT;
9042: #ifdef DEBUG_PUSH
9043: fprintf(stderr, "PP: entering CONTENT\n");
9044: #endif
9045: }
9046: break;
9047: }
1.152 daniel 9048: if (RAW == '>') {
1.140 daniel 9049: NEXT;
9050: } else {
9051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9052: ctxt->sax->error(ctxt->userData,
9053: "Couldn't find end of Start Tag %s\n",
9054: name);
9055: ctxt->wellFormed = 0;
1.180 daniel 9056: ctxt->disableSAX = 1;
1.140 daniel 9057: ctxt->errNo = XML_ERR_GT_REQUIRED;
9058:
9059: /*
9060: * end of parsing of this node.
9061: */
9062: nodePop(ctxt);
9063: oldname = namePop(ctxt);
1.176 daniel 9064: spacePop(ctxt);
1.140 daniel 9065: if (oldname != NULL) {
9066: #ifdef DEBUG_STACK
9067: fprintf(stderr,"Close: popped %s\n", oldname);
9068: #endif
9069: xmlFree(oldname);
9070: }
9071: }
9072: xmlFree(name);
9073: ctxt->instate = XML_PARSER_CONTENT;
9074: #ifdef DEBUG_PUSH
9075: fprintf(stderr, "PP: entering CONTENT\n");
9076: #endif
9077: break;
9078: }
1.128 daniel 9079: case XML_PARSER_CONTENT:
1.140 daniel 9080: /*
9081: * Handle preparsed entities and charRef
9082: */
9083: if (ctxt->token != 0) {
9084: xmlChar cur[2] = { 0 , 0 } ;
9085:
9086: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9087: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9088: (ctxt->sax->characters != NULL))
1.140 daniel 9089: ctxt->sax->characters(ctxt->userData, cur, 1);
9090: ctxt->token = 0;
9091: }
1.184 daniel 9092: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9093: goto done;
1.184 daniel 9094: cur = ctxt->input->cur[0];
9095: next = ctxt->input->cur[1];
1.140 daniel 9096: if ((cur == '<') && (next == '?')) {
1.143 daniel 9097: if ((!terminate) &&
9098: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9099: goto done;
9100: #ifdef DEBUG_PUSH
9101: fprintf(stderr, "PP: Parsing PI\n");
9102: #endif
9103: xmlParsePI(ctxt);
9104: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9105: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9106: if ((!terminate) &&
9107: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9108: goto done;
9109: #ifdef DEBUG_PUSH
9110: fprintf(stderr, "PP: Parsing Comment\n");
9111: #endif
9112: xmlParseComment(ctxt);
9113: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9114: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9115: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9116: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9117: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9118: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9119: SKIP(9);
9120: ctxt->instate = XML_PARSER_CDATA_SECTION;
9121: #ifdef DEBUG_PUSH
9122: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9123: #endif
9124: break;
9125: } else if ((cur == '<') && (next == '!') &&
9126: (avail < 9)) {
9127: goto done;
9128: } else if ((cur == '<') && (next == '/')) {
9129: ctxt->instate = XML_PARSER_END_TAG;
9130: #ifdef DEBUG_PUSH
9131: fprintf(stderr, "PP: entering END_TAG\n");
9132: #endif
9133: break;
9134: } else if (cur == '<') {
9135: ctxt->instate = XML_PARSER_START_TAG;
9136: #ifdef DEBUG_PUSH
9137: fprintf(stderr, "PP: entering START_TAG\n");
9138: #endif
9139: break;
9140: } else if (cur == '&') {
1.143 daniel 9141: if ((!terminate) &&
9142: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9143: goto done;
9144: #ifdef DEBUG_PUSH
9145: fprintf(stderr, "PP: Parsing Reference\n");
9146: #endif
9147: /* TODO: check generation of subtrees if noent !!! */
9148: xmlParseReference(ctxt);
9149: } else {
1.156 daniel 9150: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9151: /*
1.181 daniel 9152: * Goal of the following test is:
1.140 daniel 9153: * - minimize calls to the SAX 'character' callback
9154: * when they are mergeable
9155: * - handle an problem for isBlank when we only parse
9156: * a sequence of blank chars and the next one is
9157: * not available to check against '<' presence.
9158: * - tries to homogenize the differences in SAX
9159: * callbacks beween the push and pull versions
9160: * of the parser.
9161: */
9162: if ((ctxt->inputNr == 1) &&
9163: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9164: if ((!terminate) &&
9165: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9166: goto done;
9167: }
9168: ctxt->checkIndex = 0;
9169: #ifdef DEBUG_PUSH
9170: fprintf(stderr, "PP: Parsing char data\n");
9171: #endif
9172: xmlParseCharData(ctxt, 0);
9173: }
9174: /*
9175: * Pop-up of finished entities.
9176: */
1.152 daniel 9177: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9178: xmlPopInput(ctxt);
9179: break;
9180: case XML_PARSER_CDATA_SECTION: {
9181: /*
9182: * The Push mode need to have the SAX callback for
9183: * cdataBlock merge back contiguous callbacks.
9184: */
9185: int base;
9186:
9187: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9188: if (base < 0) {
9189: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9190: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9191: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9192: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9193: XML_PARSER_BIG_BUFFER_SIZE);
9194: }
9195: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9196: ctxt->checkIndex = 0;
9197: }
9198: goto done;
9199: } else {
1.171 daniel 9200: if ((ctxt->sax != NULL) && (base > 0) &&
9201: (!ctxt->disableSAX)) {
1.140 daniel 9202: if (ctxt->sax->cdataBlock != NULL)
9203: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9204: ctxt->input->cur, base);
1.140 daniel 9205: }
9206: SKIP(base + 3);
9207: ctxt->checkIndex = 0;
9208: ctxt->instate = XML_PARSER_CONTENT;
9209: #ifdef DEBUG_PUSH
9210: fprintf(stderr, "PP: entering CONTENT\n");
9211: #endif
9212: }
9213: break;
9214: }
1.141 daniel 9215: case XML_PARSER_END_TAG:
1.140 daniel 9216: if (avail < 2)
9217: goto done;
1.143 daniel 9218: if ((!terminate) &&
9219: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9220: goto done;
9221: xmlParseEndTag(ctxt);
9222: if (ctxt->name == NULL) {
9223: ctxt->instate = XML_PARSER_EPILOG;
9224: #ifdef DEBUG_PUSH
9225: fprintf(stderr, "PP: entering EPILOG\n");
9226: #endif
9227: } else {
9228: ctxt->instate = XML_PARSER_CONTENT;
9229: #ifdef DEBUG_PUSH
9230: fprintf(stderr, "PP: entering CONTENT\n");
9231: #endif
9232: }
9233: break;
9234: case XML_PARSER_DTD: {
9235: /*
9236: * Sorry but progressive parsing of the internal subset
9237: * is not expected to be supported. We first check that
9238: * the full content of the internal subset is available and
9239: * the parsing is launched only at that point.
9240: * Internal subset ends up with "']' S? '>'" in an unescaped
9241: * section and not in a ']]>' sequence which are conditional
9242: * sections (whoever argued to keep that crap in XML deserve
9243: * a place in hell !).
9244: */
9245: int base, i;
9246: xmlChar *buf;
9247: xmlChar quote = 0;
9248:
1.184 daniel 9249: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9250: if (base < 0) return(0);
9251: if (ctxt->checkIndex > base)
9252: base = ctxt->checkIndex;
1.184 daniel 9253: buf = ctxt->input->buf->buffer->content;
9254: for (;base < ctxt->input->buf->buffer->use;base++) {
1.140 daniel 9255: if (quote != 0) {
9256: if (buf[base] == quote)
9257: quote = 0;
9258: continue;
9259: }
9260: if (buf[base] == '"') {
9261: quote = '"';
9262: continue;
9263: }
9264: if (buf[base] == '\'') {
9265: quote = '\'';
9266: continue;
9267: }
9268: if (buf[base] == ']') {
1.184 daniel 9269: if (base +1 >= ctxt->input->buf->buffer->use)
1.140 daniel 9270: break;
9271: if (buf[base + 1] == ']') {
9272: /* conditional crap, skip both ']' ! */
9273: base++;
9274: continue;
9275: }
1.184 daniel 9276: for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
1.140 daniel 9277: if (buf[base + i] == '>')
9278: goto found_end_int_subset;
9279: }
9280: break;
9281: }
9282: }
9283: /*
9284: * We didn't found the end of the Internal subset
9285: */
9286: if (quote == 0)
9287: ctxt->checkIndex = base;
9288: #ifdef DEBUG_PUSH
9289: if (next == 0)
9290: fprintf(stderr, "PP: lookup of int subset end filed\n");
9291: #endif
9292: goto done;
9293:
9294: found_end_int_subset:
9295: xmlParseInternalSubset(ctxt);
1.166 daniel 9296: ctxt->inSubset = 2;
1.171 daniel 9297: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9298: (ctxt->sax->externalSubset != NULL))
9299: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9300: ctxt->extSubSystem, ctxt->extSubURI);
9301: ctxt->inSubset = 0;
1.140 daniel 9302: ctxt->instate = XML_PARSER_PROLOG;
9303: ctxt->checkIndex = 0;
9304: #ifdef DEBUG_PUSH
9305: fprintf(stderr, "PP: entering PROLOG\n");
9306: #endif
9307: break;
9308: }
9309: case XML_PARSER_COMMENT:
9310: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9311: ctxt->instate = XML_PARSER_CONTENT;
9312: #ifdef DEBUG_PUSH
9313: fprintf(stderr, "PP: entering CONTENT\n");
9314: #endif
9315: break;
9316: case XML_PARSER_PI:
9317: fprintf(stderr, "PP: internal error, state == PI\n");
9318: ctxt->instate = XML_PARSER_CONTENT;
9319: #ifdef DEBUG_PUSH
9320: fprintf(stderr, "PP: entering CONTENT\n");
9321: #endif
9322: break;
1.128 daniel 9323: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9324: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9325: ctxt->instate = XML_PARSER_DTD;
9326: #ifdef DEBUG_PUSH
9327: fprintf(stderr, "PP: entering DTD\n");
9328: #endif
9329: break;
1.128 daniel 9330: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9331: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9332: ctxt->instate = XML_PARSER_CONTENT;
9333: #ifdef DEBUG_PUSH
9334: fprintf(stderr, "PP: entering DTD\n");
9335: #endif
9336: break;
1.128 daniel 9337: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9338: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9339: ctxt->instate = XML_PARSER_START_TAG;
9340: #ifdef DEBUG_PUSH
9341: fprintf(stderr, "PP: entering START_TAG\n");
9342: #endif
9343: break;
9344: case XML_PARSER_SYSTEM_LITERAL:
9345: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9346: ctxt->instate = XML_PARSER_START_TAG;
9347: #ifdef DEBUG_PUSH
9348: fprintf(stderr, "PP: entering START_TAG\n");
9349: #endif
9350: break;
1.128 daniel 9351: }
9352: }
1.140 daniel 9353: done:
9354: #ifdef DEBUG_PUSH
9355: fprintf(stderr, "PP: done %d\n", ret);
9356: #endif
1.128 daniel 9357: return(ret);
9358: }
9359:
9360: /**
1.143 daniel 9361: * xmlParseTry:
9362: * @ctxt: an XML parser context
9363: *
9364: * Try to progress on parsing
9365: *
9366: * Returns zero if no parsing was possible
9367: */
9368: int
9369: xmlParseTry(xmlParserCtxtPtr ctxt) {
9370: return(xmlParseTryOrFinish(ctxt, 0));
9371: }
9372:
9373: /**
1.128 daniel 9374: * xmlParseChunk:
9375: * @ctxt: an XML parser context
9376: * @chunk: an char array
9377: * @size: the size in byte of the chunk
9378: * @terminate: last chunk indicator
9379: *
9380: * Parse a Chunk of memory
9381: *
9382: * Returns zero if no error, the xmlParserErrors otherwise.
9383: */
1.140 daniel 9384: int
1.128 daniel 9385: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9386: int terminate) {
1.132 daniel 9387: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9388: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9389: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9390: int cur = ctxt->input->cur - ctxt->input->base;
9391:
1.132 daniel 9392: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9393: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9394: ctxt->input->cur = ctxt->input->base + cur;
9395: #ifdef DEBUG_PUSH
9396: fprintf(stderr, "PP: pushed %d\n", size);
9397: #endif
9398:
1.150 daniel 9399: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9400: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9401: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9402: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9403: if (terminate) {
1.151 daniel 9404: /*
9405: * Grab the encoding if it was added on-the-fly
9406: */
9407: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
9408: (ctxt->myDoc->encoding == NULL)) {
9409: ctxt->myDoc->encoding = ctxt->encoding;
9410: ctxt->encoding = NULL;
9411: }
9412:
9413: /*
9414: * Check for termination
9415: */
1.140 daniel 9416: if ((ctxt->instate != XML_PARSER_EOF) &&
9417: (ctxt->instate != XML_PARSER_EPILOG)) {
9418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9419: ctxt->sax->error(ctxt->userData,
9420: "Extra content at the end of the document\n");
9421: ctxt->wellFormed = 0;
1.180 daniel 9422: ctxt->disableSAX = 1;
1.140 daniel 9423: ctxt->errNo = XML_ERR_DOCUMENT_END;
9424: }
9425: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9426: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9427: (!ctxt->disableSAX))
1.140 daniel 9428: ctxt->sax->endDocument(ctxt->userData);
9429: }
9430: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9431: }
9432: return((xmlParserErrors) ctxt->errNo);
9433: }
9434:
9435: /************************************************************************
9436: * *
1.98 daniel 9437: * I/O front end functions to the parser *
9438: * *
9439: ************************************************************************/
9440:
1.50 daniel 9441: /**
1.181 daniel 9442: * xmlCreatePushParserCtxt:
1.140 daniel 9443: * @sax: a SAX handler
9444: * @user_data: The user data returned on SAX callbacks
9445: * @chunk: a pointer to an array of chars
9446: * @size: number of chars in the array
9447: * @filename: an optional file name or URI
9448: *
9449: * Create a parser context for using the XML parser in push mode
9450: * To allow content encoding detection, @size should be >= 4
9451: * The value of @filename is used for fetching external entities
9452: * and error/warning reports.
9453: *
9454: * Returns the new parser context or NULL
9455: */
9456: xmlParserCtxtPtr
9457: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9458: const char *chunk, int size, const char *filename) {
9459: xmlParserCtxtPtr ctxt;
9460: xmlParserInputPtr inputStream;
9461: xmlParserInputBufferPtr buf;
9462: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9463:
9464: /*
1.156 daniel 9465: * plug some encoding conversion routines
1.140 daniel 9466: */
9467: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9468: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9469:
9470: buf = xmlAllocParserInputBuffer(enc);
9471: if (buf == NULL) return(NULL);
9472:
9473: ctxt = xmlNewParserCtxt();
9474: if (ctxt == NULL) {
9475: xmlFree(buf);
9476: return(NULL);
9477: }
9478: if (sax != NULL) {
9479: if (ctxt->sax != &xmlDefaultSAXHandler)
9480: xmlFree(ctxt->sax);
9481: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9482: if (ctxt->sax == NULL) {
9483: xmlFree(buf);
9484: xmlFree(ctxt);
9485: return(NULL);
9486: }
9487: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9488: if (user_data != NULL)
9489: ctxt->userData = user_data;
9490: }
9491: if (filename == NULL) {
9492: ctxt->directory = NULL;
9493: } else {
9494: ctxt->directory = xmlParserGetDirectory(filename);
9495: }
9496:
9497: inputStream = xmlNewInputStream(ctxt);
9498: if (inputStream == NULL) {
9499: xmlFreeParserCtxt(ctxt);
9500: return(NULL);
9501: }
9502:
9503: if (filename == NULL)
9504: inputStream->filename = NULL;
9505: else
9506: inputStream->filename = xmlMemStrdup(filename);
9507: inputStream->buf = buf;
9508: inputStream->base = inputStream->buf->buffer->content;
9509: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9510: if (enc != XML_CHAR_ENCODING_NONE) {
9511: xmlSwitchEncoding(ctxt, enc);
9512: }
1.140 daniel 9513:
9514: inputPush(ctxt, inputStream);
9515:
9516: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9517: (ctxt->input->buf != NULL)) {
9518: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9519: #ifdef DEBUG_PUSH
9520: fprintf(stderr, "PP: pushed %d\n", size);
9521: #endif
9522: }
9523:
9524: return(ctxt);
9525: }
9526:
9527: /**
1.181 daniel 9528: * xmlCreateDocParserCtxt:
1.123 daniel 9529: * @cur: a pointer to an array of xmlChar
1.50 daniel 9530: *
1.69 daniel 9531: * Create a parser context for an XML in-memory document.
9532: *
9533: * Returns the new parser context or NULL
1.16 daniel 9534: */
1.69 daniel 9535: xmlParserCtxtPtr
1.123 daniel 9536: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9537: xmlParserCtxtPtr ctxt;
1.40 daniel 9538: xmlParserInputPtr input;
1.16 daniel 9539:
1.97 daniel 9540: ctxt = xmlNewParserCtxt();
1.16 daniel 9541: if (ctxt == NULL) {
9542: return(NULL);
9543: }
1.96 daniel 9544: input = xmlNewInputStream(ctxt);
1.40 daniel 9545: if (input == NULL) {
1.97 daniel 9546: xmlFreeParserCtxt(ctxt);
1.40 daniel 9547: return(NULL);
9548: }
9549:
9550: input->base = cur;
9551: input->cur = cur;
9552:
9553: inputPush(ctxt, input);
1.69 daniel 9554: return(ctxt);
9555: }
9556:
9557: /**
1.181 daniel 9558: * xmlSAXParseDoc:
1.69 daniel 9559: * @sax: the SAX handler block
1.123 daniel 9560: * @cur: a pointer to an array of xmlChar
1.69 daniel 9561: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9562: * documents
9563: *
9564: * parse an XML in-memory document and build a tree.
9565: * It use the given SAX function block to handle the parsing callback.
9566: * If sax is NULL, fallback to the default DOM tree building routines.
9567: *
9568: * Returns the resulting document tree
9569: */
9570:
9571: xmlDocPtr
1.123 daniel 9572: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9573: xmlDocPtr ret;
9574: xmlParserCtxtPtr ctxt;
9575:
9576: if (cur == NULL) return(NULL);
1.16 daniel 9577:
9578:
1.69 daniel 9579: ctxt = xmlCreateDocParserCtxt(cur);
9580: if (ctxt == NULL) return(NULL);
1.74 daniel 9581: if (sax != NULL) {
9582: ctxt->sax = sax;
9583: ctxt->userData = NULL;
9584: }
1.69 daniel 9585:
1.16 daniel 9586: xmlParseDocument(ctxt);
1.72 daniel 9587: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9588: else {
9589: ret = NULL;
1.72 daniel 9590: xmlFreeDoc(ctxt->myDoc);
9591: ctxt->myDoc = NULL;
1.59 daniel 9592: }
1.86 daniel 9593: if (sax != NULL)
9594: ctxt->sax = NULL;
1.69 daniel 9595: xmlFreeParserCtxt(ctxt);
1.16 daniel 9596:
1.1 veillard 9597: return(ret);
9598: }
9599:
1.50 daniel 9600: /**
1.181 daniel 9601: * xmlParseDoc:
1.123 daniel 9602: * @cur: a pointer to an array of xmlChar
1.55 daniel 9603: *
9604: * parse an XML in-memory document and build a tree.
9605: *
1.68 daniel 9606: * Returns the resulting document tree
1.55 daniel 9607: */
9608:
1.69 daniel 9609: xmlDocPtr
1.123 daniel 9610: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9611: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9612: }
9613:
9614: /**
1.181 daniel 9615: * xmlSAXParseDTD:
1.76 daniel 9616: * @sax: the SAX handler block
9617: * @ExternalID: a NAME* containing the External ID of the DTD
9618: * @SystemID: a NAME* containing the URL to the DTD
9619: *
9620: * Load and parse an external subset.
9621: *
9622: * Returns the resulting xmlDtdPtr or NULL in case of error.
9623: */
9624:
9625: xmlDtdPtr
1.123 daniel 9626: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9627: const xmlChar *SystemID) {
1.76 daniel 9628: xmlDtdPtr ret = NULL;
9629: xmlParserCtxtPtr ctxt;
1.83 daniel 9630: xmlParserInputPtr input = NULL;
1.76 daniel 9631: xmlCharEncoding enc;
9632:
9633: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9634:
1.97 daniel 9635: ctxt = xmlNewParserCtxt();
1.76 daniel 9636: if (ctxt == NULL) {
9637: return(NULL);
9638: }
9639:
9640: /*
9641: * Set-up the SAX context
9642: */
9643: if (ctxt == NULL) return(NULL);
9644: if (sax != NULL) {
1.93 veillard 9645: if (ctxt->sax != NULL)
1.119 daniel 9646: xmlFree(ctxt->sax);
1.76 daniel 9647: ctxt->sax = sax;
9648: ctxt->userData = NULL;
9649: }
9650:
9651: /*
9652: * Ask the Entity resolver to load the damn thing
9653: */
9654:
9655: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9656: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9657: if (input == NULL) {
1.86 daniel 9658: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9659: xmlFreeParserCtxt(ctxt);
9660: return(NULL);
9661: }
9662:
9663: /*
1.156 daniel 9664: * plug some encoding conversion routines here.
1.76 daniel 9665: */
9666: xmlPushInput(ctxt, input);
1.156 daniel 9667: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9668: xmlSwitchEncoding(ctxt, enc);
9669:
1.95 veillard 9670: if (input->filename == NULL)
1.156 daniel 9671: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9672: input->line = 1;
9673: input->col = 1;
9674: input->base = ctxt->input->cur;
9675: input->cur = ctxt->input->cur;
9676: input->free = NULL;
9677:
9678: /*
9679: * let's parse that entity knowing it's an external subset.
9680: */
1.79 daniel 9681: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9682:
9683: if (ctxt->myDoc != NULL) {
9684: if (ctxt->wellFormed) {
9685: ret = ctxt->myDoc->intSubset;
9686: ctxt->myDoc->intSubset = NULL;
9687: } else {
9688: ret = NULL;
9689: }
9690: xmlFreeDoc(ctxt->myDoc);
9691: ctxt->myDoc = NULL;
9692: }
1.86 daniel 9693: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9694: xmlFreeParserCtxt(ctxt);
9695:
9696: return(ret);
9697: }
9698:
9699: /**
1.181 daniel 9700: * xmlParseDTD:
1.76 daniel 9701: * @ExternalID: a NAME* containing the External ID of the DTD
9702: * @SystemID: a NAME* containing the URL to the DTD
9703: *
9704: * Load and parse an external subset.
9705: *
9706: * Returns the resulting xmlDtdPtr or NULL in case of error.
9707: */
9708:
9709: xmlDtdPtr
1.123 daniel 9710: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9711: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9712: }
9713:
9714: /**
1.181 daniel 9715: * xmlSAXParseBalancedChunk:
1.144 daniel 9716: * @ctx: an XML parser context (possibly NULL)
9717: * @sax: the SAX handler bloc (possibly NULL)
9718: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9719: * @input: a parser input stream
9720: * @enc: the encoding
9721: *
9722: * Parse a well-balanced chunk of an XML document
9723: * The user has to provide SAX callback block whose routines will be
9724: * called by the parser
9725: * The allowed sequence for the Well Balanced Chunk is the one defined by
9726: * the content production in the XML grammar:
9727: *
9728: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9729: *
1.176 daniel 9730: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9731: * the error code otherwise
9732: */
9733:
9734: int
9735: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9736: void *user_data, xmlParserInputPtr input,
9737: xmlCharEncoding enc) {
9738: xmlParserCtxtPtr ctxt;
9739: int ret;
9740:
9741: if (input == NULL) return(-1);
9742:
9743: if (ctx != NULL)
9744: ctxt = ctx;
9745: else {
9746: ctxt = xmlNewParserCtxt();
9747: if (ctxt == NULL)
9748: return(-1);
9749: if (sax == NULL)
9750: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9751: }
9752:
9753: /*
9754: * Set-up the SAX context
9755: */
9756: if (sax != NULL) {
9757: if (ctxt->sax != NULL)
9758: xmlFree(ctxt->sax);
9759: ctxt->sax = sax;
9760: ctxt->userData = user_data;
9761: }
9762:
9763: /*
9764: * plug some encoding conversion routines here.
9765: */
9766: xmlPushInput(ctxt, input);
9767: if (enc != XML_CHAR_ENCODING_NONE)
9768: xmlSwitchEncoding(ctxt, enc);
9769:
9770: /*
9771: * let's parse that entity knowing it's an external subset.
9772: */
9773: xmlParseContent(ctxt);
9774: ret = ctxt->errNo;
9775:
9776: if (ctx == NULL) {
9777: if (sax != NULL)
9778: ctxt->sax = NULL;
9779: else
9780: xmlFreeDoc(ctxt->myDoc);
9781: xmlFreeParserCtxt(ctxt);
9782: }
9783: return(ret);
9784: }
9785:
9786: /**
1.181 daniel 9787: * xmlParseExternalEntity:
9788: * @doc: the document the chunk pertains to
9789: * @sax: the SAX handler bloc (possibly NULL)
9790: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 9791: * @depth: Used for loop detection, use 0
1.181 daniel 9792: * @URL: the URL for the entity to load
9793: * @ID: the System ID for the entity to load
9794: * @list: the return value for the set of parsed nodes
9795: *
9796: * Parse an external general entity
9797: * An external general parsed entity is well-formed if it matches the
9798: * production labeled extParsedEnt.
9799: *
9800: * [78] extParsedEnt ::= TextDecl? content
9801: *
9802: * Returns 0 if the entity is well formed, -1 in case of args problem and
9803: * the parser error code otherwise
9804: */
9805:
9806: int
9807: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 9808: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 9809: xmlParserCtxtPtr ctxt;
9810: xmlDocPtr newDoc;
9811: xmlSAXHandlerPtr oldsax = NULL;
9812: int ret = 0;
9813:
1.185 daniel 9814: if (depth > 40) {
9815: return(XML_ERR_ENTITY_LOOP);
9816: }
9817:
9818:
1.181 daniel 9819:
9820: if (list != NULL)
9821: *list = NULL;
9822: if ((URL == NULL) && (ID == NULL))
9823: return(-1);
9824:
9825:
9826: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
9827: if (ctxt == NULL) return(-1);
9828: ctxt->userData = ctxt;
9829: if (sax != NULL) {
9830: oldsax = ctxt->sax;
9831: ctxt->sax = sax;
9832: if (user_data != NULL)
9833: ctxt->userData = user_data;
9834: }
9835: newDoc = xmlNewDoc(BAD_CAST "1.0");
9836: if (newDoc == NULL) {
9837: xmlFreeParserCtxt(ctxt);
9838: return(-1);
9839: }
9840: if (doc != NULL) {
9841: newDoc->intSubset = doc->intSubset;
9842: newDoc->extSubset = doc->extSubset;
9843: }
9844: if (doc->URL != NULL) {
9845: newDoc->URL = xmlStrdup(doc->URL);
9846: }
9847: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9848: if (newDoc->children == NULL) {
9849: if (sax != NULL)
9850: ctxt->sax = oldsax;
9851: xmlFreeParserCtxt(ctxt);
9852: newDoc->intSubset = NULL;
9853: newDoc->extSubset = NULL;
9854: xmlFreeDoc(newDoc);
9855: return(-1);
9856: }
9857: nodePush(ctxt, newDoc->children);
9858: if (doc == NULL) {
9859: ctxt->myDoc = newDoc;
9860: } else {
9861: ctxt->myDoc = doc;
9862: newDoc->children->doc = doc;
9863: }
9864:
9865: /*
9866: * Parse a possible text declaration first
9867: */
9868: GROW;
9869: if ((RAW == '<') && (NXT(1) == '?') &&
9870: (NXT(2) == 'x') && (NXT(3) == 'm') &&
9871: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9872: xmlParseTextDecl(ctxt);
9873: }
9874:
9875: /*
9876: * Doing validity checking on chunk doesn't make sense
9877: */
9878: ctxt->instate = XML_PARSER_CONTENT;
9879: ctxt->validate = 0;
1.185 daniel 9880: ctxt->depth = depth;
1.181 daniel 9881:
9882: xmlParseContent(ctxt);
9883:
9884: if ((RAW == '<') && (NXT(1) == '/')) {
9885: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9886: ctxt->sax->error(ctxt->userData,
9887: "chunk is not well balanced\n");
9888: ctxt->wellFormed = 0;
9889: ctxt->disableSAX = 1;
9890: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9891: } else if (RAW != 0) {
9892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9893: ctxt->sax->error(ctxt->userData,
9894: "extra content at the end of well balanced chunk\n");
9895: ctxt->wellFormed = 0;
9896: ctxt->disableSAX = 1;
9897: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9898: }
9899: if (ctxt->node != newDoc->children) {
9900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9901: ctxt->sax->error(ctxt->userData,
9902: "chunk is not well balanced\n");
9903: ctxt->wellFormed = 0;
9904: ctxt->disableSAX = 1;
9905: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9906: }
9907:
9908: if (!ctxt->wellFormed) {
9909: if (ctxt->errNo == 0)
9910: ret = 1;
9911: else
9912: ret = ctxt->errNo;
9913: } else {
9914: if (list != NULL) {
9915: xmlNodePtr cur;
9916:
9917: /*
9918: * Return the newly created nodeset after unlinking it from
9919: * they pseudo parent.
9920: */
9921: cur = newDoc->children->children;
9922: *list = cur;
9923: while (cur != NULL) {
9924: cur->parent = NULL;
9925: cur = cur->next;
9926: }
9927: newDoc->children->children = NULL;
9928: }
9929: ret = 0;
9930: }
9931: if (sax != NULL)
9932: ctxt->sax = oldsax;
9933: xmlFreeParserCtxt(ctxt);
9934: newDoc->intSubset = NULL;
9935: newDoc->extSubset = NULL;
9936: xmlFreeDoc(newDoc);
9937:
9938: return(ret);
9939: }
9940:
9941: /**
9942: * xmlParseBalancedChunk:
1.176 daniel 9943: * @doc: the document the chunk pertains to
9944: * @sax: the SAX handler bloc (possibly NULL)
9945: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 9946: * @depth: Used for loop detection, use 0
1.176 daniel 9947: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9948: * @list: the return value for the set of parsed nodes
9949: *
9950: * Parse a well-balanced chunk of an XML document
9951: * called by the parser
9952: * The allowed sequence for the Well Balanced Chunk is the one defined by
9953: * the content production in the XML grammar:
1.144 daniel 9954: *
1.175 daniel 9955: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9956: *
1.176 daniel 9957: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9958: * the parser error code otherwise
1.144 daniel 9959: */
9960:
1.175 daniel 9961: int
9962: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 9963: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 9964: xmlParserCtxtPtr ctxt;
1.175 daniel 9965: xmlDocPtr newDoc;
1.181 daniel 9966: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 9967: int size;
1.176 daniel 9968: int ret = 0;
1.175 daniel 9969:
1.185 daniel 9970: if (depth > 40) {
9971: return(XML_ERR_ENTITY_LOOP);
9972: }
9973:
1.175 daniel 9974:
1.176 daniel 9975: if (list != NULL)
9976: *list = NULL;
9977: if (string == NULL)
9978: return(-1);
9979:
9980: size = xmlStrlen(string);
9981:
1.183 daniel 9982: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 9983: if (ctxt == NULL) return(-1);
9984: ctxt->userData = ctxt;
1.175 daniel 9985: if (sax != NULL) {
1.176 daniel 9986: oldsax = ctxt->sax;
9987: ctxt->sax = sax;
9988: if (user_data != NULL)
9989: ctxt->userData = user_data;
1.175 daniel 9990: }
9991: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 9992: if (newDoc == NULL) {
9993: xmlFreeParserCtxt(ctxt);
9994: return(-1);
9995: }
1.175 daniel 9996: if (doc != NULL) {
9997: newDoc->intSubset = doc->intSubset;
9998: newDoc->extSubset = doc->extSubset;
9999: }
1.176 daniel 10000: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10001: if (newDoc->children == NULL) {
10002: if (sax != NULL)
10003: ctxt->sax = oldsax;
10004: xmlFreeParserCtxt(ctxt);
10005: newDoc->intSubset = NULL;
10006: newDoc->extSubset = NULL;
10007: xmlFreeDoc(newDoc);
10008: return(-1);
10009: }
10010: nodePush(ctxt, newDoc->children);
10011: if (doc == NULL) {
10012: ctxt->myDoc = newDoc;
10013: } else {
10014: ctxt->myDoc = doc;
10015: newDoc->children->doc = doc;
10016: }
10017: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10018: ctxt->depth = depth;
1.176 daniel 10019:
10020: /*
10021: * Doing validity checking on chunk doesn't make sense
10022: */
10023: ctxt->validate = 0;
10024:
1.175 daniel 10025: xmlParseContent(ctxt);
1.176 daniel 10026:
10027: if ((RAW == '<') && (NXT(1) == '/')) {
10028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10029: ctxt->sax->error(ctxt->userData,
10030: "chunk is not well balanced\n");
10031: ctxt->wellFormed = 0;
1.180 daniel 10032: ctxt->disableSAX = 1;
1.176 daniel 10033: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10034: } else if (RAW != 0) {
10035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10036: ctxt->sax->error(ctxt->userData,
10037: "extra content at the end of well balanced chunk\n");
10038: ctxt->wellFormed = 0;
1.180 daniel 10039: ctxt->disableSAX = 1;
1.176 daniel 10040: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10041: }
10042: if (ctxt->node != newDoc->children) {
10043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10044: ctxt->sax->error(ctxt->userData,
10045: "chunk is not well balanced\n");
10046: ctxt->wellFormed = 0;
1.180 daniel 10047: ctxt->disableSAX = 1;
1.176 daniel 10048: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10049: }
1.175 daniel 10050:
1.176 daniel 10051: if (!ctxt->wellFormed) {
10052: if (ctxt->errNo == 0)
10053: ret = 1;
10054: else
10055: ret = ctxt->errNo;
10056: } else {
10057: if (list != NULL) {
10058: xmlNodePtr cur;
1.175 daniel 10059:
1.176 daniel 10060: /*
10061: * Return the newly created nodeset after unlinking it from
10062: * they pseudo parent.
10063: */
10064: cur = newDoc->children->children;
10065: *list = cur;
10066: while (cur != NULL) {
10067: cur->parent = NULL;
10068: cur = cur->next;
10069: }
10070: newDoc->children->children = NULL;
10071: }
10072: ret = 0;
1.175 daniel 10073: }
1.176 daniel 10074: if (sax != NULL)
10075: ctxt->sax = oldsax;
1.175 daniel 10076: xmlFreeParserCtxt(ctxt);
10077: newDoc->intSubset = NULL;
10078: newDoc->extSubset = NULL;
1.176 daniel 10079: xmlFreeDoc(newDoc);
1.175 daniel 10080:
1.176 daniel 10081: return(ret);
1.144 daniel 10082: }
10083:
10084: /**
1.181 daniel 10085: * xmlParseBalancedChunkFile:
1.144 daniel 10086: * @doc: the document the chunk pertains to
10087: *
10088: * Parse a well-balanced chunk of an XML document contained in a file
10089: *
10090: * Returns the resulting list of nodes resulting from the parsing,
10091: * they are not added to @node
10092: */
10093:
10094: xmlNodePtr
10095: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10096: /* TODO !!! */
10097: return(NULL);
1.144 daniel 10098: }
10099:
10100: /**
1.181 daniel 10101: * xmlRecoverDoc:
1.123 daniel 10102: * @cur: a pointer to an array of xmlChar
1.59 daniel 10103: *
10104: * parse an XML in-memory document and build a tree.
10105: * In the case the document is not Well Formed, a tree is built anyway
10106: *
1.68 daniel 10107: * Returns the resulting document tree
1.59 daniel 10108: */
10109:
1.69 daniel 10110: xmlDocPtr
1.123 daniel 10111: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10112: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10113: }
10114:
10115: /**
1.181 daniel 10116: * xmlCreateEntityParserCtxt:
10117: * @URL: the entity URL
10118: * @ID: the entity PUBLIC ID
10119: * @base: a posible base for the target URI
10120: *
10121: * Create a parser context for an external entity
10122: * Automatic support for ZLIB/Compress compressed document is provided
10123: * by default if found at compile-time.
10124: *
10125: * Returns the new parser context or NULL
10126: */
10127: xmlParserCtxtPtr
10128: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10129: const xmlChar *base) {
10130: xmlParserCtxtPtr ctxt;
10131: xmlParserInputPtr inputStream;
10132: char *directory = NULL;
10133:
10134: ctxt = xmlNewParserCtxt();
10135: if (ctxt == NULL) {
10136: return(NULL);
10137: }
10138:
1.182 daniel 10139: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
1.181 daniel 10140: if (inputStream == NULL) {
10141: xmlFreeParserCtxt(ctxt);
10142: return(NULL);
10143: }
10144:
10145: inputPush(ctxt, inputStream);
10146:
10147: if ((ctxt->directory == NULL) && (directory == NULL))
1.182 daniel 10148: directory = xmlParserGetDirectory((char *)URL);
1.181 daniel 10149: if ((ctxt->directory == NULL) && (directory != NULL))
10150: ctxt->directory = directory;
10151:
10152: return(ctxt);
10153: }
10154:
10155: /**
10156: * xmlCreateFileParserCtxt:
1.50 daniel 10157: * @filename: the filename
10158: *
1.69 daniel 10159: * Create a parser context for a file content.
10160: * Automatic support for ZLIB/Compress compressed document is provided
10161: * by default if found at compile-time.
1.50 daniel 10162: *
1.69 daniel 10163: * Returns the new parser context or NULL
1.9 httpng 10164: */
1.69 daniel 10165: xmlParserCtxtPtr
10166: xmlCreateFileParserCtxt(const char *filename)
10167: {
10168: xmlParserCtxtPtr ctxt;
1.40 daniel 10169: xmlParserInputPtr inputStream;
1.91 daniel 10170: xmlParserInputBufferPtr buf;
1.111 daniel 10171: char *directory = NULL;
1.9 httpng 10172:
1.91 daniel 10173: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10174: if (buf == NULL) return(NULL);
1.9 httpng 10175:
1.97 daniel 10176: ctxt = xmlNewParserCtxt();
1.16 daniel 10177: if (ctxt == NULL) {
10178: return(NULL);
10179: }
1.97 daniel 10180:
1.96 daniel 10181: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10182: if (inputStream == NULL) {
1.97 daniel 10183: xmlFreeParserCtxt(ctxt);
1.40 daniel 10184: return(NULL);
10185: }
10186:
1.119 daniel 10187: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10188: inputStream->buf = buf;
10189: inputStream->base = inputStream->buf->buffer->content;
10190: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10191:
1.40 daniel 10192: inputPush(ctxt, inputStream);
1.110 daniel 10193: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10194: directory = xmlParserGetDirectory(filename);
10195: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10196: ctxt->directory = directory;
1.106 daniel 10197:
1.69 daniel 10198: return(ctxt);
10199: }
10200:
10201: /**
1.181 daniel 10202: * xmlSAXParseFile:
1.69 daniel 10203: * @sax: the SAX handler block
10204: * @filename: the filename
10205: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10206: * documents
10207: *
10208: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10209: * compressed document is provided by default if found at compile-time.
10210: * It use the given SAX function block to handle the parsing callback.
10211: * If sax is NULL, fallback to the default DOM tree building routines.
10212: *
10213: * Returns the resulting document tree
10214: */
10215:
1.79 daniel 10216: xmlDocPtr
10217: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10218: int recovery) {
10219: xmlDocPtr ret;
10220: xmlParserCtxtPtr ctxt;
1.111 daniel 10221: char *directory = NULL;
1.69 daniel 10222:
10223: ctxt = xmlCreateFileParserCtxt(filename);
10224: if (ctxt == NULL) return(NULL);
1.74 daniel 10225: if (sax != NULL) {
1.93 veillard 10226: if (ctxt->sax != NULL)
1.119 daniel 10227: xmlFree(ctxt->sax);
1.74 daniel 10228: ctxt->sax = sax;
10229: ctxt->userData = NULL;
10230: }
1.106 daniel 10231:
1.110 daniel 10232: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10233: directory = xmlParserGetDirectory(filename);
10234: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10235: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10236:
10237: xmlParseDocument(ctxt);
1.40 daniel 10238:
1.72 daniel 10239: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10240: else {
10241: ret = NULL;
1.72 daniel 10242: xmlFreeDoc(ctxt->myDoc);
10243: ctxt->myDoc = NULL;
1.59 daniel 10244: }
1.86 daniel 10245: if (sax != NULL)
10246: ctxt->sax = NULL;
1.69 daniel 10247: xmlFreeParserCtxt(ctxt);
1.20 daniel 10248:
10249: return(ret);
10250: }
10251:
1.55 daniel 10252: /**
1.181 daniel 10253: * xmlParseFile:
1.55 daniel 10254: * @filename: the filename
10255: *
10256: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10257: * compressed document is provided by default if found at compile-time.
10258: *
1.68 daniel 10259: * Returns the resulting document tree
1.55 daniel 10260: */
10261:
1.79 daniel 10262: xmlDocPtr
10263: xmlParseFile(const char *filename) {
1.59 daniel 10264: return(xmlSAXParseFile(NULL, filename, 0));
10265: }
10266:
10267: /**
1.181 daniel 10268: * xmlRecoverFile:
1.59 daniel 10269: * @filename: the filename
10270: *
10271: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10272: * compressed document is provided by default if found at compile-time.
10273: * In the case the document is not Well Formed, a tree is built anyway
10274: *
1.68 daniel 10275: * Returns the resulting document tree
1.59 daniel 10276: */
10277:
1.79 daniel 10278: xmlDocPtr
10279: xmlRecoverFile(const char *filename) {
1.59 daniel 10280: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10281: }
1.32 daniel 10282:
1.50 daniel 10283: /**
1.181 daniel 10284: * xmlCreateMemoryParserCtxt:
10285: * @buffer: a pointer to a zero terminated char array
10286: * @size: the size of the array (without the trailing 0)
1.50 daniel 10287: *
1.69 daniel 10288: * Create a parser context for an XML in-memory document.
1.50 daniel 10289: *
1.69 daniel 10290: * Returns the new parser context or NULL
1.20 daniel 10291: */
1.69 daniel 10292: xmlParserCtxtPtr
10293: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10294: xmlParserCtxtPtr ctxt;
1.40 daniel 10295: xmlParserInputPtr input;
10296:
1.179 daniel 10297: if (buffer[size] != 0)
1.181 daniel 10298: return(NULL);
1.40 daniel 10299:
1.97 daniel 10300: ctxt = xmlNewParserCtxt();
1.181 daniel 10301: if (ctxt == NULL)
1.20 daniel 10302: return(NULL);
1.97 daniel 10303:
1.96 daniel 10304: input = xmlNewInputStream(ctxt);
1.40 daniel 10305: if (input == NULL) {
1.97 daniel 10306: xmlFreeParserCtxt(ctxt);
1.40 daniel 10307: return(NULL);
10308: }
1.20 daniel 10309:
1.40 daniel 10310: input->filename = NULL;
10311: input->line = 1;
10312: input->col = 1;
1.96 daniel 10313: input->buf = NULL;
1.91 daniel 10314: input->consumed = 0;
1.75 daniel 10315:
1.116 daniel 10316: input->base = BAD_CAST buffer;
10317: input->cur = BAD_CAST buffer;
1.69 daniel 10318: input->free = NULL;
1.20 daniel 10319:
1.40 daniel 10320: inputPush(ctxt, input);
1.69 daniel 10321: return(ctxt);
10322: }
10323:
10324: /**
1.181 daniel 10325: * xmlSAXParseMemory:
1.69 daniel 10326: * @sax: the SAX handler block
10327: * @buffer: an pointer to a char array
1.127 daniel 10328: * @size: the size of the array
10329: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10330: * documents
10331: *
10332: * parse an XML in-memory block and use the given SAX function block
10333: * to handle the parsing callback. If sax is NULL, fallback to the default
10334: * DOM tree building routines.
10335: *
10336: * Returns the resulting document tree
10337: */
10338: xmlDocPtr
10339: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10340: xmlDocPtr ret;
10341: xmlParserCtxtPtr ctxt;
10342:
10343: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10344: if (ctxt == NULL) return(NULL);
1.74 daniel 10345: if (sax != NULL) {
10346: ctxt->sax = sax;
10347: ctxt->userData = NULL;
10348: }
1.20 daniel 10349:
10350: xmlParseDocument(ctxt);
1.40 daniel 10351:
1.72 daniel 10352: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10353: else {
10354: ret = NULL;
1.72 daniel 10355: xmlFreeDoc(ctxt->myDoc);
10356: ctxt->myDoc = NULL;
1.59 daniel 10357: }
1.86 daniel 10358: if (sax != NULL)
10359: ctxt->sax = NULL;
1.69 daniel 10360: xmlFreeParserCtxt(ctxt);
1.16 daniel 10361:
1.9 httpng 10362: return(ret);
1.17 daniel 10363: }
10364:
1.55 daniel 10365: /**
1.181 daniel 10366: * xmlParseMemory:
1.68 daniel 10367: * @buffer: an pointer to a char array
1.55 daniel 10368: * @size: the size of the array
10369: *
10370: * parse an XML in-memory block and build a tree.
10371: *
1.68 daniel 10372: * Returns the resulting document tree
1.55 daniel 10373: */
10374:
10375: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10376: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10377: }
10378:
10379: /**
1.181 daniel 10380: * xmlRecoverMemory:
1.68 daniel 10381: * @buffer: an pointer to a char array
1.59 daniel 10382: * @size: the size of the array
10383: *
10384: * parse an XML in-memory block and build a tree.
10385: * In the case the document is not Well Formed, a tree is built anyway
10386: *
1.68 daniel 10387: * Returns the resulting document tree
1.59 daniel 10388: */
10389:
10390: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10391: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10392: }
10393:
10394:
1.50 daniel 10395: /**
10396: * xmlSetupParserForBuffer:
10397: * @ctxt: an XML parser context
1.123 daniel 10398: * @buffer: a xmlChar * buffer
1.50 daniel 10399: * @filename: a file name
10400: *
1.19 daniel 10401: * Setup the parser context to parse a new buffer; Clears any prior
10402: * contents from the parser context. The buffer parameter must not be
10403: * NULL, but the filename parameter can be
10404: */
1.55 daniel 10405: void
1.123 daniel 10406: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10407: const char* filename)
10408: {
1.96 daniel 10409: xmlParserInputPtr input;
1.40 daniel 10410:
1.96 daniel 10411: input = xmlNewInputStream(ctxt);
10412: if (input == NULL) {
10413: perror("malloc");
1.119 daniel 10414: xmlFree(ctxt);
1.145 daniel 10415: return;
1.96 daniel 10416: }
10417:
10418: xmlClearParserCtxt(ctxt);
10419: if (filename != NULL)
1.119 daniel 10420: input->filename = xmlMemStrdup(filename);
1.96 daniel 10421: input->base = buffer;
10422: input->cur = buffer;
10423: inputPush(ctxt, input);
1.17 daniel 10424: }
10425:
1.123 daniel 10426: /**
10427: * xmlSAXUserParseFile:
10428: * @sax: a SAX handler
10429: * @user_data: The user data returned on SAX callbacks
10430: * @filename: a file name
10431: *
10432: * parse an XML file and call the given SAX handler routines.
10433: * Automatic support for ZLIB/Compress compressed document is provided
10434: *
10435: * Returns 0 in case of success or a error number otherwise
10436: */
1.131 daniel 10437: int
10438: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10439: const char *filename) {
1.123 daniel 10440: int ret = 0;
10441: xmlParserCtxtPtr ctxt;
10442:
10443: ctxt = xmlCreateFileParserCtxt(filename);
10444: if (ctxt == NULL) return -1;
1.134 daniel 10445: if (ctxt->sax != &xmlDefaultSAXHandler)
10446: xmlFree(ctxt->sax);
1.123 daniel 10447: ctxt->sax = sax;
1.140 daniel 10448: if (user_data != NULL)
10449: ctxt->userData = user_data;
1.123 daniel 10450:
10451: xmlParseDocument(ctxt);
10452:
10453: if (ctxt->wellFormed)
10454: ret = 0;
10455: else {
10456: if (ctxt->errNo != 0)
10457: ret = ctxt->errNo;
10458: else
10459: ret = -1;
10460: }
10461: if (sax != NULL)
10462: ctxt->sax = NULL;
10463: xmlFreeParserCtxt(ctxt);
10464:
10465: return ret;
10466: }
10467:
10468: /**
10469: * xmlSAXUserParseMemory:
10470: * @sax: a SAX handler
10471: * @user_data: The user data returned on SAX callbacks
10472: * @buffer: an in-memory XML document input
1.127 daniel 10473: * @size: the length of the XML document in bytes
1.123 daniel 10474: *
10475: * A better SAX parsing routine.
10476: * parse an XML in-memory buffer and call the given SAX handler routines.
10477: *
10478: * Returns 0 in case of success or a error number otherwise
10479: */
10480: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10481: char *buffer, int size) {
10482: int ret = 0;
10483: xmlParserCtxtPtr ctxt;
10484:
10485: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10486: if (ctxt == NULL) return -1;
10487: ctxt->sax = sax;
10488: ctxt->userData = user_data;
10489:
10490: xmlParseDocument(ctxt);
10491:
10492: if (ctxt->wellFormed)
10493: ret = 0;
10494: else {
10495: if (ctxt->errNo != 0)
10496: ret = ctxt->errNo;
10497: else
10498: ret = -1;
10499: }
10500: if (sax != NULL)
10501: ctxt->sax = NULL;
10502: xmlFreeParserCtxt(ctxt);
10503:
10504: return ret;
10505: }
10506:
1.32 daniel 10507:
1.98 daniel 10508: /************************************************************************
10509: * *
1.127 daniel 10510: * Miscellaneous *
1.98 daniel 10511: * *
10512: ************************************************************************/
10513:
1.132 daniel 10514: /**
10515: * xmlCleanupParser:
10516: *
10517: * Cleanup function for the XML parser. It tries to reclaim all
10518: * parsing related global memory allocated for the parser processing.
10519: * It doesn't deallocate any document related memory. Calling this
10520: * function should not prevent reusing the parser.
10521: */
10522:
10523: void
10524: xmlCleanupParser(void) {
10525: xmlCleanupCharEncodingHandlers();
1.133 daniel 10526: xmlCleanupPredefinedEntities();
1.132 daniel 10527: }
1.98 daniel 10528:
1.50 daniel 10529: /**
10530: * xmlParserFindNodeInfo:
10531: * @ctxt: an XML parser context
10532: * @node: an XML node within the tree
10533: *
10534: * Find the parser node info struct for a given node
10535: *
1.68 daniel 10536: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10537: */
10538: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10539: const xmlNode* node)
10540: {
10541: unsigned long pos;
10542:
10543: /* Find position where node should be at */
10544: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10545: if ( ctx->node_seq.buffer[pos].node == node )
10546: return &ctx->node_seq.buffer[pos];
10547: else
10548: return NULL;
10549: }
10550:
10551:
1.50 daniel 10552: /**
1.181 daniel 10553: * xmlInitNodeInfoSeq:
1.50 daniel 10554: * @seq: a node info sequence pointer
10555: *
10556: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10557: */
1.55 daniel 10558: void
10559: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10560: {
10561: seq->length = 0;
10562: seq->maximum = 0;
10563: seq->buffer = NULL;
10564: }
10565:
1.50 daniel 10566: /**
1.181 daniel 10567: * xmlClearNodeInfoSeq:
1.50 daniel 10568: * @seq: a node info sequence pointer
10569: *
10570: * -- Clear (release memory and reinitialize) node
1.32 daniel 10571: * info sequence
10572: */
1.55 daniel 10573: void
10574: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10575: {
10576: if ( seq->buffer != NULL )
1.119 daniel 10577: xmlFree(seq->buffer);
1.32 daniel 10578: xmlInitNodeInfoSeq(seq);
10579: }
10580:
10581:
1.50 daniel 10582: /**
10583: * xmlParserFindNodeInfoIndex:
10584: * @seq: a node info sequence pointer
10585: * @node: an XML node pointer
10586: *
10587: *
1.32 daniel 10588: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10589: * the given node is or should be at in a sorted sequence
1.68 daniel 10590: *
10591: * Returns a long indicating the position of the record
1.32 daniel 10592: */
10593: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10594: const xmlNode* node)
10595: {
10596: unsigned long upper, lower, middle;
10597: int found = 0;
10598:
10599: /* Do a binary search for the key */
10600: lower = 1;
10601: upper = seq->length;
10602: middle = 0;
10603: while ( lower <= upper && !found) {
10604: middle = lower + (upper - lower) / 2;
10605: if ( node == seq->buffer[middle - 1].node )
10606: found = 1;
10607: else if ( node < seq->buffer[middle - 1].node )
10608: upper = middle - 1;
10609: else
10610: lower = middle + 1;
10611: }
10612:
10613: /* Return position */
10614: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10615: return middle;
10616: else
10617: return middle - 1;
10618: }
10619:
10620:
1.50 daniel 10621: /**
10622: * xmlParserAddNodeInfo:
10623: * @ctxt: an XML parser context
1.68 daniel 10624: * @info: a node info sequence pointer
1.50 daniel 10625: *
10626: * Insert node info record into the sorted sequence
1.32 daniel 10627: */
1.55 daniel 10628: void
10629: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10630: const xmlParserNodeInfo* info)
1.32 daniel 10631: {
10632: unsigned long pos;
10633: static unsigned int block_size = 5;
10634:
10635: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10636: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10637: if ( pos < ctxt->node_seq.length
10638: && ctxt->node_seq.buffer[pos].node == info->node ) {
10639: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10640: }
10641:
10642: /* Otherwise, we need to add new node to buffer */
10643: else {
10644: /* Expand buffer by 5 if needed */
1.55 daniel 10645: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10646: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10647: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10648: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10649:
1.55 daniel 10650: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10651: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10652: else
1.119 daniel 10653: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10654:
10655: if ( tmp_buffer == NULL ) {
1.55 daniel 10656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10657: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10658: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10659: return;
10660: }
1.55 daniel 10661: ctxt->node_seq.buffer = tmp_buffer;
10662: ctxt->node_seq.maximum += block_size;
1.32 daniel 10663: }
10664:
10665: /* If position is not at end, move elements out of the way */
1.55 daniel 10666: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10667: unsigned long i;
10668:
1.55 daniel 10669: for ( i = ctxt->node_seq.length; i > pos; i-- )
10670: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10671: }
10672:
10673: /* Copy element and increase length */
1.55 daniel 10674: ctxt->node_seq.buffer[pos] = *info;
10675: ctxt->node_seq.length++;
1.32 daniel 10676: }
10677: }
1.77 daniel 10678:
1.98 daniel 10679:
10680: /**
1.181 daniel 10681: * xmlSubstituteEntitiesDefault:
1.98 daniel 10682: * @val: int 0 or 1
10683: *
10684: * Set and return the previous value for default entity support.
10685: * Initially the parser always keep entity references instead of substituting
10686: * entity values in the output. This function has to be used to change the
10687: * default parser behaviour
10688: * SAX::subtituteEntities() has to be used for changing that on a file by
10689: * file basis.
10690: *
10691: * Returns the last value for 0 for no substitution, 1 for substitution.
10692: */
10693:
10694: int
10695: xmlSubstituteEntitiesDefault(int val) {
10696: int old = xmlSubstituteEntitiesDefaultValue;
10697:
10698: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10699: return(old);
10700: }
10701:
10702: /**
10703: * xmlKeepBlanksDefault:
10704: * @val: int 0 or 1
10705: *
10706: * Set and return the previous value for default blanks text nodes support.
10707: * The 1.x version of the parser used an heuristic to try to detect
10708: * ignorable white spaces. As a result the SAX callback was generating
10709: * ignorableWhitespace() callbacks instead of characters() one, and when
10710: * using the DOM output text nodes containing those blanks were not generated.
10711: * The 2.x and later version will switch to the XML standard way and
10712: * ignorableWhitespace() are only generated when running the parser in
10713: * validating mode and when the current element doesn't allow CDATA or
10714: * mixed content.
10715: * This function is provided as a way to force the standard behaviour
10716: * on 1.X libs and to switch back to the old mode for compatibility when
10717: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10718: * by using xmlIsBlankNode() commodity function to detect the "empty"
10719: * nodes generated.
10720: * This value also affect autogeneration of indentation when saving code
10721: * if blanks sections are kept, indentation is not generated.
10722: *
10723: * Returns the last value for 0 for no substitution, 1 for substitution.
10724: */
10725:
10726: int
10727: xmlKeepBlanksDefault(int val) {
10728: int old = xmlKeepBlanksDefaultValue;
10729:
10730: xmlKeepBlanksDefaultValue = val;
10731: xmlIndentTreeOutput = !val;
1.98 daniel 10732: return(old);
10733: }
1.77 daniel 10734:
Webmaster