Annotation of XML/parser.c, revision 1.196
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.188 daniel 50: const char *xmlParserVersion = LIBXML_VERSION_STRING;
1.160 daniel 51: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 52:
1.139 daniel 53: /*
54: * List of XML prefixed PI allowed by W3C specs
55: */
56:
57: const char *xmlW3CPIs[] = {
58: "xml-stylesheet",
59: NULL
60: };
1.91 daniel 61:
1.151 daniel 62: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
63: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
64: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
65: const xmlChar **str);
1.91 daniel 66: /************************************************************************
67: * *
68: * Input handling functions for progressive parsing *
69: * *
70: ************************************************************************/
71:
72: /* #define DEBUG_INPUT */
1.140 daniel 73: /* #define DEBUG_STACK */
74: /* #define DEBUG_PUSH */
75:
1.91 daniel 76:
1.110 daniel 77: #define INPUT_CHUNK 250
78: /* we need to keep enough input to show errors in context */
79: #define LINE_LEN 80
1.91 daniel 80:
81: #ifdef DEBUG_INPUT
82: #define CHECK_BUFFER(in) check_buffer(in)
83:
84: void check_buffer(xmlParserInputPtr in) {
85: if (in->base != in->buf->buffer->content) {
86: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
87: }
88: if (in->cur < in->base) {
89: fprintf(stderr, "xmlParserInput: cur < base problem\n");
90: }
91: if (in->cur > in->base + in->buf->buffer->use) {
92: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
93: }
94: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
95: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
96: in->buf->buffer->use, in->buf->buffer->size);
97: }
98:
1.110 daniel 99: #else
100: #define CHECK_BUFFER(in)
101: #endif
102:
1.91 daniel 103:
104: /**
105: * xmlParserInputRead:
106: * @in: an XML parser input
107: * @len: an indicative size for the lookahead
108: *
109: * This function refresh the input for the parser. It doesn't try to
110: * preserve pointers to the input buffer, and discard already read data
111: *
1.123 daniel 112: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 113: * end of this entity
114: */
115: int
116: xmlParserInputRead(xmlParserInputPtr in, int len) {
117: int ret;
118: int used;
119: int index;
120:
121: #ifdef DEBUG_INPUT
122: fprintf(stderr, "Read\n");
123: #endif
124: if (in->buf == NULL) return(-1);
125: if (in->base == NULL) return(-1);
126: if (in->cur == NULL) return(-1);
127: if (in->buf->buffer == NULL) return(-1);
128:
129: CHECK_BUFFER(in);
130:
131: used = in->cur - in->buf->buffer->content;
132: ret = xmlBufferShrink(in->buf->buffer, used);
133: if (ret > 0) {
134: in->cur -= ret;
135: in->consumed += ret;
136: }
137: ret = xmlParserInputBufferRead(in->buf, len);
138: if (in->base != in->buf->buffer->content) {
139: /*
140: * the buffer has been realloced
141: */
142: index = in->cur - in->base;
143: in->base = in->buf->buffer->content;
144: in->cur = &in->buf->buffer->content[index];
145: }
146:
147: CHECK_BUFFER(in);
148:
149: return(ret);
150: }
151:
152: /**
153: * xmlParserInputGrow:
154: * @in: an XML parser input
155: * @len: an indicative size for the lookahead
156: *
157: * This function increase the input for the parser. It tries to
158: * preserve pointers to the input buffer, and keep already read data
159: *
1.123 daniel 160: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 161: * end of this entity
162: */
163: int
164: xmlParserInputGrow(xmlParserInputPtr in, int len) {
165: int ret;
166: int index;
167:
168: #ifdef DEBUG_INPUT
169: fprintf(stderr, "Grow\n");
170: #endif
171: if (in->buf == NULL) return(-1);
172: if (in->base == NULL) return(-1);
173: if (in->cur == NULL) return(-1);
174: if (in->buf->buffer == NULL) return(-1);
175:
176: CHECK_BUFFER(in);
177:
178: index = in->cur - in->base;
179: if (in->buf->buffer->use > index + INPUT_CHUNK) {
180:
181: CHECK_BUFFER(in);
182:
183: return(0);
184: }
1.189 daniel 185: if (in->buf->readcallback != NULL)
1.140 daniel 186: ret = xmlParserInputBufferGrow(in->buf, len);
187: else
188: return(0);
1.135 daniel 189:
190: /*
191: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
192: * block, but we use it really as an integer to do some
193: * pointer arithmetic. Insure will raise it as a bug but in
194: * that specific case, that's not !
195: */
1.91 daniel 196: if (in->base != in->buf->buffer->content) {
197: /*
198: * the buffer has been realloced
199: */
200: index = in->cur - in->base;
201: in->base = in->buf->buffer->content;
202: in->cur = &in->buf->buffer->content[index];
203: }
204:
205: CHECK_BUFFER(in);
206:
207: return(ret);
208: }
209:
210: /**
211: * xmlParserInputShrink:
212: * @in: an XML parser input
213: *
214: * This function removes used input for the parser.
215: */
216: void
217: xmlParserInputShrink(xmlParserInputPtr in) {
218: int used;
219: int ret;
220: int index;
221:
222: #ifdef DEBUG_INPUT
223: fprintf(stderr, "Shrink\n");
224: #endif
225: if (in->buf == NULL) return;
226: if (in->base == NULL) return;
227: if (in->cur == NULL) return;
228: if (in->buf->buffer == NULL) return;
229:
230: CHECK_BUFFER(in);
231:
232: used = in->cur - in->buf->buffer->content;
233: if (used > INPUT_CHUNK) {
1.110 daniel 234: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 235: if (ret > 0) {
236: in->cur -= ret;
237: in->consumed += ret;
238: }
239: }
240:
241: CHECK_BUFFER(in);
242:
243: if (in->buf->buffer->use > INPUT_CHUNK) {
244: return;
245: }
246: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
247: if (in->base != in->buf->buffer->content) {
248: /*
249: * the buffer has been realloced
250: */
251: index = in->cur - in->base;
252: in->base = in->buf->buffer->content;
253: in->cur = &in->buf->buffer->content[index];
254: }
255:
256: CHECK_BUFFER(in);
257: }
258:
1.45 daniel 259: /************************************************************************
260: * *
261: * Parser stacks related functions and macros *
262: * *
263: ************************************************************************/
1.79 daniel 264:
265: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 266: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 267: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 268: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
269: const xmlChar ** str);
1.79 daniel 270:
1.1 veillard 271: /*
1.40 daniel 272: * Generic function for accessing stacks in the Parser Context
1.1 veillard 273: */
274:
1.140 daniel 275: #define PUSH_AND_POP(scope, type, name) \
276: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 277: if (ctxt->name##Nr >= ctxt->name##Max) { \
278: ctxt->name##Max *= 2; \
1.119 daniel 279: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 280: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
281: if (ctxt->name##Tab == NULL) { \
1.31 daniel 282: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 283: return(0); \
1.31 daniel 284: } \
285: } \
1.40 daniel 286: ctxt->name##Tab[ctxt->name##Nr] = value; \
287: ctxt->name = value; \
288: return(ctxt->name##Nr++); \
1.31 daniel 289: } \
1.140 daniel 290: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 291: type ret; \
1.40 daniel 292: if (ctxt->name##Nr <= 0) return(0); \
293: ctxt->name##Nr--; \
1.50 daniel 294: if (ctxt->name##Nr > 0) \
295: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
296: else \
297: ctxt->name = NULL; \
1.69 daniel 298: ret = ctxt->name##Tab[ctxt->name##Nr]; \
299: ctxt->name##Tab[ctxt->name##Nr] = 0; \
300: return(ret); \
1.31 daniel 301: } \
302:
1.140 daniel 303: PUSH_AND_POP(extern, xmlParserInputPtr, input)
304: PUSH_AND_POP(extern, xmlNodePtr, node)
305: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 306:
1.176 daniel 307: int spacePush(xmlParserCtxtPtr ctxt, int val) {
308: if (ctxt->spaceNr >= ctxt->spaceMax) {
309: ctxt->spaceMax *= 2;
310: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
311: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
312: if (ctxt->spaceTab == NULL) {
313: fprintf(stderr, "realloc failed !\n");
314: return(0);
315: }
316: }
317: ctxt->spaceTab[ctxt->spaceNr] = val;
318: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
319: return(ctxt->spaceNr++);
320: }
321:
322: int spacePop(xmlParserCtxtPtr ctxt) {
323: int ret;
324: if (ctxt->spaceNr <= 0) return(0);
325: ctxt->spaceNr--;
326: if (ctxt->spaceNr > 0)
327: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
328: else
329: ctxt->space = NULL;
330: ret = ctxt->spaceTab[ctxt->spaceNr];
331: ctxt->spaceTab[ctxt->spaceNr] = -1;
332: return(ret);
333: }
334:
1.55 daniel 335: /*
336: * Macros for accessing the content. Those should be used only by the parser,
337: * and not exported.
338: *
339: * Dirty macros, i.e. one need to make assumption on the context to use them
340: *
1.123 daniel 341: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 342: * To be used with extreme caution since operations consuming
343: * characters may move the input buffer to a different location !
1.123 daniel 344: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 345: * in ISO-Latin or UTF-8.
1.151 daniel 346: * This should be used internally by the parser
1.55 daniel 347: * only to compare to ASCII values otherwise it would break when
348: * running with UTF-8 encoding.
1.123 daniel 349: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 350: * to compare on ASCII based substring.
1.123 daniel 351: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 352: * strings within the parser.
353: *
1.77 daniel 354: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 355: *
356: * NEXT Skip to the next character, this does the proper decoding
357: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 358: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 359: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 360: */
1.45 daniel 361:
1.152 daniel 362: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 363: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 364: #define NXT(val) ctxt->input->cur[(val)]
365: #define CUR_PTR ctxt->input->cur
1.154 daniel 366:
1.164 daniel 367: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
368: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 369: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
370: if ((*ctxt->input->cur == 0) && \
371: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
372: xmlPopInput(ctxt)
1.164 daniel 373:
1.97 daniel 374: #define SHRINK xmlParserInputShrink(ctxt->input); \
375: if ((*ctxt->input->cur == 0) && \
376: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
377: xmlPopInput(ctxt)
378:
379: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
380: if ((*ctxt->input->cur == 0) && \
381: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
382: xmlPopInput(ctxt)
1.55 daniel 383:
1.155 daniel 384: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 385:
1.151 daniel 386: #define NEXT xmlNextChar(ctxt);
1.154 daniel 387:
1.153 daniel 388: #define NEXTL(l) \
389: if (*(ctxt->input->cur) == '\n') { \
390: ctxt->input->line++; ctxt->input->col = 1; \
391: } else ctxt->input->col++; \
1.154 daniel 392: ctxt->token = 0; ctxt->input->cur += l; \
393: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
394: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
395:
1.152 daniel 396: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 397: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 398:
1.152 daniel 399: #define COPY_BUF(l,b,i,v) \
400: if (l == 1) b[i++] = (xmlChar) v; \
401: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 402:
403: /**
404: * xmlNextChar:
405: * @ctxt: the XML parser context
406: *
407: * Skip to the next char input char.
408: */
1.55 daniel 409:
1.151 daniel 410: void
411: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.176 daniel 412: /*
413: * TODO: 2.11 End-of-Line Handling
414: * the literal two-character sequence "#xD#xA" or a standalone
415: * literal #xD, an XML processor must pass to the application
416: * the single character #xA.
417: */
1.151 daniel 418: if (ctxt->token != 0) ctxt->token = 0;
419: else {
420: if ((*ctxt->input->cur == 0) &&
421: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
422: (ctxt->instate != XML_PARSER_COMMENT)) {
423: /*
424: * If we are at the end of the current entity and
425: * the context allows it, we pop consumed entities
426: * automatically.
427: * TODO: the auto closing should be blocked in other cases
428: */
429: xmlPopInput(ctxt);
430: } else {
431: if (*(ctxt->input->cur) == '\n') {
432: ctxt->input->line++; ctxt->input->col = 1;
433: } else ctxt->input->col++;
434: if (ctxt->encoding == NULL) {
435: /*
436: * We are supposed to handle UTF8, check it's valid
437: * From rfc2044: encoding of the Unicode values on UTF-8:
438: *
439: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
440: * 0000 0000-0000 007F 0xxxxxxx
441: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
442: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
443: *
1.160 daniel 444: * Check for the 0x110000 limit too
1.151 daniel 445: */
446: const unsigned char *cur = ctxt->input->cur;
447: unsigned char c;
1.91 daniel 448:
1.151 daniel 449: c = *cur;
450: if (c & 0x80) {
451: if (cur[1] == 0)
452: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
453: if ((cur[1] & 0xc0) != 0x80)
454: goto encoding_error;
455: if ((c & 0xe0) == 0xe0) {
456: unsigned int val;
457:
458: if (cur[2] == 0)
459: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
460: if ((cur[2] & 0xc0) != 0x80)
461: goto encoding_error;
462: if ((c & 0xf0) == 0xf0) {
463: if (cur[3] == 0)
464: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
465: if (((c & 0xf8) != 0xf0) ||
466: ((cur[3] & 0xc0) != 0x80))
467: goto encoding_error;
468: /* 4-byte code */
469: ctxt->input->cur += 4;
470: val = (cur[0] & 0x7) << 18;
471: val |= (cur[1] & 0x3f) << 12;
472: val |= (cur[2] & 0x3f) << 6;
473: val |= cur[3] & 0x3f;
474: } else {
475: /* 3-byte code */
476: ctxt->input->cur += 3;
477: val = (cur[0] & 0xf) << 12;
478: val |= (cur[1] & 0x3f) << 6;
479: val |= cur[2] & 0x3f;
480: }
481: if (((val > 0xd7ff) && (val < 0xe000)) ||
482: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 483: (val >= 0x110000)) {
1.151 daniel 484: if ((ctxt->sax != NULL) &&
485: (ctxt->sax->error != NULL))
486: ctxt->sax->error(ctxt->userData,
1.196 ! daniel 487: "Char 0x%X out of allowed range\n", val);
1.151 daniel 488: ctxt->errNo = XML_ERR_INVALID_ENCODING;
489: ctxt->wellFormed = 0;
1.180 daniel 490: ctxt->disableSAX = 1;
1.151 daniel 491: }
492: } else
493: /* 2-byte code */
494: ctxt->input->cur += 2;
495: } else
496: /* 1-byte code */
497: ctxt->input->cur++;
498: } else {
499: /*
500: * Assume it's a fixed lenght encoding (1) with
501: * a compatibke encoding for the ASCII set, since
502: * XML constructs only use < 128 chars
503: */
504: ctxt->input->cur++;
505: }
506: ctxt->nbChars++;
507: if (*ctxt->input->cur == 0)
508: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
509: }
510: }
1.154 daniel 511: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
512: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 513: if ((*ctxt->input->cur == 0) &&
514: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
515: xmlPopInput(ctxt);
1.151 daniel 516: return;
517: encoding_error:
518: /*
519: * If we detect an UTF8 error that probably mean that the
520: * input encoding didn't get properly advertized in the
521: * declaration header. Report the error and switch the encoding
522: * to ISO-Latin-1 (if you don't like this policy, just declare the
523: * encoding !)
524: */
525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
526: ctxt->sax->error(ctxt->userData,
527: "Input is not proper UTF-8, indicate encoding !\n");
528: ctxt->errNo = XML_ERR_INVALID_ENCODING;
529:
530: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
531: ctxt->input->cur++;
532: return;
533: }
1.42 daniel 534:
1.152 daniel 535: /**
536: * xmlCurrentChar:
537: * @ctxt: the XML parser context
538: * @len: pointer to the length of the char read
539: *
540: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 541: * bytes in the input buffer. Implement the end of line normalization:
542: * 2.11 End-of-Line Handling
543: * Wherever an external parsed entity or the literal entity value
544: * of an internal parsed entity contains either the literal two-character
545: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
546: * must pass to the application the single character #xA.
547: * This behavior can conveniently be produced by normalizing all
548: * line breaks to #xA on input, before parsing.)
1.152 daniel 549: *
550: * Returns the current char value and its lenght
551: */
552:
553: int
554: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
555: if (ctxt->token != 0) {
556: *len = 0;
557: return(ctxt->token);
558: }
559: if (ctxt->encoding == NULL) {
560: /*
561: * We are supposed to handle UTF8, check it's valid
562: * From rfc2044: encoding of the Unicode values on UTF-8:
563: *
564: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
565: * 0000 0000-0000 007F 0xxxxxxx
566: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
567: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
568: *
1.160 daniel 569: * Check for the 0x110000 limit too
1.152 daniel 570: */
571: const unsigned char *cur = ctxt->input->cur;
572: unsigned char c;
573: unsigned int val;
574:
575: c = *cur;
576: if (c & 0x80) {
577: if (cur[1] == 0)
578: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
579: if ((cur[1] & 0xc0) != 0x80)
580: goto encoding_error;
581: if ((c & 0xe0) == 0xe0) {
582:
583: if (cur[2] == 0)
584: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
585: if ((cur[2] & 0xc0) != 0x80)
586: goto encoding_error;
587: if ((c & 0xf0) == 0xf0) {
588: if (cur[3] == 0)
589: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
590: if (((c & 0xf8) != 0xf0) ||
591: ((cur[3] & 0xc0) != 0x80))
592: goto encoding_error;
593: /* 4-byte code */
594: *len = 4;
595: val = (cur[0] & 0x7) << 18;
596: val |= (cur[1] & 0x3f) << 12;
597: val |= (cur[2] & 0x3f) << 6;
598: val |= cur[3] & 0x3f;
599: } else {
600: /* 3-byte code */
601: *len = 3;
602: val = (cur[0] & 0xf) << 12;
603: val |= (cur[1] & 0x3f) << 6;
604: val |= cur[2] & 0x3f;
605: }
606: } else {
607: /* 2-byte code */
608: *len = 2;
609: val = (cur[0] & 0x1f) << 6;
1.168 daniel 610: val |= cur[1] & 0x3f;
1.152 daniel 611: }
612: if (!IS_CHAR(val)) {
613: if ((ctxt->sax != NULL) &&
614: (ctxt->sax->error != NULL))
615: ctxt->sax->error(ctxt->userData,
1.196 ! daniel 616: "Char 0x%X out of allowed range\n", val);
1.152 daniel 617: ctxt->errNo = XML_ERR_INVALID_ENCODING;
618: ctxt->wellFormed = 0;
1.180 daniel 619: ctxt->disableSAX = 1;
1.152 daniel 620: }
621: return(val);
622: } else {
623: /* 1-byte code */
624: *len = 1;
1.180 daniel 625: if (*ctxt->input->cur == 0xD) {
626: if (ctxt->input->cur[1] == 0xA) {
627: ctxt->nbChars++;
628: ctxt->input->cur++;
629: }
630: return(0xA);
631: }
1.152 daniel 632: return((int) *ctxt->input->cur);
633: }
634: }
635: /*
636: * Assume it's a fixed lenght encoding (1) with
637: * a compatibke encoding for the ASCII set, since
638: * XML constructs only use < 128 chars
639: */
640: *len = 1;
1.180 daniel 641: if (*ctxt->input->cur == 0xD) {
642: if (ctxt->input->cur[1] == 0xA) {
643: ctxt->nbChars++;
644: ctxt->input->cur++;
645: }
646: return(0xA);
647: }
1.152 daniel 648: return((int) *ctxt->input->cur);
649: encoding_error:
650: /*
651: * If we detect an UTF8 error that probably mean that the
652: * input encoding didn't get properly advertized in the
653: * declaration header. Report the error and switch the encoding
654: * to ISO-Latin-1 (if you don't like this policy, just declare the
655: * encoding !)
656: */
657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
658: ctxt->sax->error(ctxt->userData,
659: "Input is not proper UTF-8, indicate encoding !\n");
660: ctxt->errNo = XML_ERR_INVALID_ENCODING;
661:
662: ctxt->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
663: *len = 1;
664: return((int) *ctxt->input->cur);
665: }
666:
667: /**
1.162 daniel 668: * xmlStringCurrentChar:
669: * @ctxt: the XML parser context
670: * @cur: pointer to the beginning of the char
671: * @len: pointer to the length of the char read
672: *
673: * The current char value, if using UTF-8 this may actaully span multiple
674: * bytes in the input buffer.
675: *
676: * Returns the current char value and its lenght
677: */
678:
679: int
680: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
681: if (ctxt->encoding == NULL) {
682: /*
683: * We are supposed to handle UTF8, check it's valid
684: * From rfc2044: encoding of the Unicode values on UTF-8:
685: *
686: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
687: * 0000 0000-0000 007F 0xxxxxxx
688: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
689: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
690: *
691: * Check for the 0x110000 limit too
692: */
693: unsigned char c;
694: unsigned int val;
695:
696: c = *cur;
697: if (c & 0x80) {
698: if ((cur[1] & 0xc0) != 0x80)
699: goto encoding_error;
700: if ((c & 0xe0) == 0xe0) {
701:
702: if ((cur[2] & 0xc0) != 0x80)
703: goto encoding_error;
704: if ((c & 0xf0) == 0xf0) {
705: if (((c & 0xf8) != 0xf0) ||
706: ((cur[3] & 0xc0) != 0x80))
707: goto encoding_error;
708: /* 4-byte code */
709: *len = 4;
710: val = (cur[0] & 0x7) << 18;
711: val |= (cur[1] & 0x3f) << 12;
712: val |= (cur[2] & 0x3f) << 6;
713: val |= cur[3] & 0x3f;
714: } else {
715: /* 3-byte code */
716: *len = 3;
717: val = (cur[0] & 0xf) << 12;
718: val |= (cur[1] & 0x3f) << 6;
719: val |= cur[2] & 0x3f;
720: }
721: } else {
722: /* 2-byte code */
723: *len = 2;
724: val = (cur[0] & 0x1f) << 6;
725: val |= cur[2] & 0x3f;
726: }
727: if (!IS_CHAR(val)) {
728: if ((ctxt->sax != NULL) &&
729: (ctxt->sax->error != NULL))
730: ctxt->sax->error(ctxt->userData,
1.196 ! daniel 731: "Char 0x%X out of allowed range\n", val);
1.162 daniel 732: ctxt->errNo = XML_ERR_INVALID_ENCODING;
733: ctxt->wellFormed = 0;
1.180 daniel 734: ctxt->disableSAX = 1;
1.162 daniel 735: }
736: return(val);
737: } else {
738: /* 1-byte code */
739: *len = 1;
740: return((int) *cur);
741: }
742: }
743: /*
744: * Assume it's a fixed lenght encoding (1) with
745: * a compatibke encoding for the ASCII set, since
746: * XML constructs only use < 128 chars
747: */
748: *len = 1;
749: return((int) *cur);
750: encoding_error:
751: /*
752: * If we detect an UTF8 error that probably mean that the
753: * input encoding didn't get properly advertized in the
754: * declaration header. Report the error and switch the encoding
755: * to ISO-Latin-1 (if you don't like this policy, just declare the
756: * encoding !)
757: */
758: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
759: ctxt->sax->error(ctxt->userData,
760: "Input is not proper UTF-8, indicate encoding !\n");
761: ctxt->errNo = XML_ERR_INVALID_ENCODING;
762:
763: *len = 1;
764: return((int) *cur);
765: }
766:
767: /**
1.152 daniel 768: * xmlCopyChar:
769: * @len: pointer to the length of the char read (or zero)
770: * @array: pointer to an arry of xmlChar
771: * @val: the char value
772: *
773: * append the char value in the array
774: *
775: * Returns the number of xmlChar written
776: */
777:
778: int
779: xmlCopyChar(int len, xmlChar *out, int val) {
780: /*
781: * We are supposed to handle UTF8, check it's valid
782: * From rfc2044: encoding of the Unicode values on UTF-8:
783: *
784: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
785: * 0000 0000-0000 007F 0xxxxxxx
786: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
787: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
788: */
789: if (len == 0) {
790: if (val < 0) len = 0;
1.160 daniel 791: else if (val < 0x80) len = 1;
792: else if (val < 0x800) len = 2;
793: else if (val < 0x10000) len = 3;
794: else if (val < 0x110000) len = 4;
1.152 daniel 795: if (len == 0) {
796: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
797: val);
798: return(0);
799: }
800: }
801: if (len > 1) {
802: int bits;
803:
804: if (val < 0x80) { *out++= val; bits= -6; }
805: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
806: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
807: else { *out++= (val >> 18) | 0xF0; bits= 12; }
808:
809: for ( ; bits >= 0; bits-= 6)
810: *out++= ((val >> bits) & 0x3F) | 0x80 ;
811:
812: return(len);
813: }
814: *out = (xmlChar) val;
815: return(1);
1.155 daniel 816: }
817:
818: /**
819: * xmlSkipBlankChars:
820: * @ctxt: the XML parser context
821: *
822: * skip all blanks character found at that point in the input streams.
823: * It pops up finished entities in the process if allowable at that point.
824: *
825: * Returns the number of space chars skipped
826: */
827:
828: int
829: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
830: int cur, res = 0;
831:
832: do {
833: cur = CUR;
834: while (IS_BLANK(cur)) {
835: NEXT;
836: cur = CUR;
837: res++;
838: }
839: while ((cur == 0) && (ctxt->inputNr > 1) &&
840: (ctxt->instate != XML_PARSER_COMMENT)) {
841: xmlPopInput(ctxt);
842: cur = CUR;
843: }
844: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
845: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
846: } while (IS_BLANK(cur));
847: return(res);
1.152 daniel 848: }
849:
1.97 daniel 850: /************************************************************************
851: * *
852: * Commodity functions to handle entities processing *
853: * *
854: ************************************************************************/
1.40 daniel 855:
1.50 daniel 856: /**
857: * xmlPopInput:
858: * @ctxt: an XML parser context
859: *
1.40 daniel 860: * xmlPopInput: the current input pointed by ctxt->input came to an end
861: * pop it and return the next char.
1.45 daniel 862: *
1.123 daniel 863: * Returns the current xmlChar in the parser context
1.40 daniel 864: */
1.123 daniel 865: xmlChar
1.55 daniel 866: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 867: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 868: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 869: if ((*ctxt->input->cur == 0) &&
870: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
871: return(xmlPopInput(ctxt));
1.40 daniel 872: return(CUR);
873: }
874:
1.50 daniel 875: /**
876: * xmlPushInput:
877: * @ctxt: an XML parser context
878: * @input: an XML parser input fragment (entity, XML fragment ...).
879: *
1.40 daniel 880: * xmlPushInput: switch to a new input stream which is stacked on top
881: * of the previous one(s).
882: */
1.55 daniel 883: void
884: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 885: if (input == NULL) return;
886: inputPush(ctxt, input);
1.164 daniel 887: GROW;
1.40 daniel 888: }
889:
1.50 daniel 890: /**
1.69 daniel 891: * xmlFreeInputStream:
1.127 daniel 892: * @input: an xmlParserInputPtr
1.69 daniel 893: *
894: * Free up an input stream.
895: */
896: void
897: xmlFreeInputStream(xmlParserInputPtr input) {
898: if (input == NULL) return;
899:
1.119 daniel 900: if (input->filename != NULL) xmlFree((char *) input->filename);
901: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 902: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 903: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 904: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 905: input->free((xmlChar *) input->base);
1.93 veillard 906: if (input->buf != NULL)
907: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 908: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 909: xmlFree(input);
1.69 daniel 910: }
911:
912: /**
1.96 daniel 913: * xmlNewInputStream:
914: * @ctxt: an XML parser context
915: *
916: * Create a new input stream structure
917: * Returns the new input stream or NULL
918: */
919: xmlParserInputPtr
920: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
921: xmlParserInputPtr input;
922:
1.119 daniel 923: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 924: if (input == NULL) {
1.190 daniel 925: if (ctxt != NULL) {
926: ctxt->errNo = XML_ERR_NO_MEMORY;
927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
928: ctxt->sax->error(ctxt->userData,
929: "malloc: couldn't allocate a new input stream\n");
930: ctxt->errNo = XML_ERR_NO_MEMORY;
931: }
1.96 daniel 932: return(NULL);
933: }
1.165 daniel 934: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 935: input->line = 1;
936: input->col = 1;
1.167 daniel 937: input->standalone = -1;
1.96 daniel 938: return(input);
939: }
940:
941: /**
1.190 daniel 942: * xmlNewIOInputStream:
943: * @ctxt: an XML parser context
944: * @input: an I/O Input
945: * @enc: the charset encoding if known
946: *
947: * Create a new input stream structure encapsulating the @input into
948: * a stream suitable for the parser.
949: *
950: * Returns the new input stream or NULL
951: */
952: xmlParserInputPtr
953: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
954: xmlCharEncoding enc) {
955: xmlParserInputPtr inputStream;
956:
957: inputStream = xmlNewInputStream(ctxt);
958: if (inputStream == NULL) {
959: return(NULL);
960: }
961: inputStream->filename = NULL;
962: inputStream->buf = input;
963: inputStream->base = inputStream->buf->buffer->content;
964: inputStream->cur = inputStream->buf->buffer->content;
965: if (enc != XML_CHAR_ENCODING_NONE) {
966: xmlSwitchEncoding(ctxt, enc);
967: }
968:
969: return(inputStream);
970: }
971:
972: /**
1.50 daniel 973: * xmlNewEntityInputStream:
974: * @ctxt: an XML parser context
975: * @entity: an Entity pointer
976: *
1.82 daniel 977: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 978: *
979: * Returns the new input stream or NULL
1.45 daniel 980: */
1.50 daniel 981: xmlParserInputPtr
982: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 983: xmlParserInputPtr input;
984:
985: if (entity == NULL) {
1.123 daniel 986: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 988: ctxt->sax->error(ctxt->userData,
1.45 daniel 989: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 990: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 991: return(NULL);
1.45 daniel 992: }
993: if (entity->content == NULL) {
1.159 daniel 994: switch (entity->etype) {
1.113 daniel 995: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 996: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
998: ctxt->sax->error(ctxt->userData,
999: "xmlNewEntityInputStream unparsed entity !\n");
1000: break;
1001: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1002: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1003: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1004: (char *) entity->ExternalID, ctxt));
1.113 daniel 1005: case XML_INTERNAL_GENERAL_ENTITY:
1006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1007: ctxt->sax->error(ctxt->userData,
1008: "Internal entity %s without content !\n", entity->name);
1009: break;
1010: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1011: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1013: ctxt->sax->error(ctxt->userData,
1014: "Internal parameter entity %s without content !\n", entity->name);
1015: break;
1016: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1017: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1019: ctxt->sax->error(ctxt->userData,
1020: "Predefined entity %s without content !\n", entity->name);
1021: break;
1022: }
1.50 daniel 1023: return(NULL);
1.45 daniel 1024: }
1.96 daniel 1025: input = xmlNewInputStream(ctxt);
1.45 daniel 1026: if (input == NULL) {
1.50 daniel 1027: return(NULL);
1.45 daniel 1028: }
1.156 daniel 1029: input->filename = (char *) entity->SystemID;
1.45 daniel 1030: input->base = entity->content;
1031: input->cur = entity->content;
1.140 daniel 1032: input->length = entity->length;
1.50 daniel 1033: return(input);
1.45 daniel 1034: }
1035:
1.59 daniel 1036: /**
1037: * xmlNewStringInputStream:
1038: * @ctxt: an XML parser context
1.96 daniel 1039: * @buffer: an memory buffer
1.59 daniel 1040: *
1041: * Create a new input stream based on a memory buffer.
1.68 daniel 1042: * Returns the new input stream
1.59 daniel 1043: */
1044: xmlParserInputPtr
1.123 daniel 1045: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1046: xmlParserInputPtr input;
1047:
1.96 daniel 1048: if (buffer == NULL) {
1.123 daniel 1049: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1050: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1051: ctxt->sax->error(ctxt->userData,
1.59 daniel 1052: "internal: xmlNewStringInputStream string = NULL\n");
1053: return(NULL);
1054: }
1.96 daniel 1055: input = xmlNewInputStream(ctxt);
1.59 daniel 1056: if (input == NULL) {
1057: return(NULL);
1058: }
1.96 daniel 1059: input->base = buffer;
1060: input->cur = buffer;
1.140 daniel 1061: input->length = xmlStrlen(buffer);
1.59 daniel 1062: return(input);
1063: }
1064:
1.76 daniel 1065: /**
1066: * xmlNewInputFromFile:
1067: * @ctxt: an XML parser context
1068: * @filename: the filename to use as entity
1069: *
1070: * Create a new input stream based on a file.
1071: *
1072: * Returns the new input stream or NULL in case of error
1073: */
1074: xmlParserInputPtr
1.79 daniel 1075: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1076: xmlParserInputBufferPtr buf;
1.76 daniel 1077: xmlParserInputPtr inputStream;
1.111 daniel 1078: char *directory = NULL;
1.76 daniel 1079:
1.96 daniel 1080: if (ctxt == NULL) return(NULL);
1.91 daniel 1081: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1082: if (buf == NULL) {
1.140 daniel 1083: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1084:
1.94 daniel 1085: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1086: #ifdef WIN32
1087: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1088: #else
1089: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1090: #endif
1091: buf = xmlParserInputBufferCreateFilename(name,
1092: XML_CHAR_ENCODING_NONE);
1.106 daniel 1093: if (buf != NULL)
1.142 daniel 1094: directory = xmlParserGetDirectory(name);
1.106 daniel 1095: }
1096: if ((buf == NULL) && (ctxt->directory != NULL)) {
1097: #ifdef WIN32
1098: sprintf(name, "%s\\%s", ctxt->directory, filename);
1099: #else
1100: sprintf(name, "%s/%s", ctxt->directory, filename);
1101: #endif
1102: buf = xmlParserInputBufferCreateFilename(name,
1103: XML_CHAR_ENCODING_NONE);
1104: if (buf != NULL)
1.142 daniel 1105: directory = xmlParserGetDirectory(name);
1.106 daniel 1106: }
1107: if (buf == NULL)
1.94 daniel 1108: return(NULL);
1109: }
1110: if (directory == NULL)
1111: directory = xmlParserGetDirectory(filename);
1.76 daniel 1112:
1.96 daniel 1113: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1114: if (inputStream == NULL) {
1.119 daniel 1115: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1116: return(NULL);
1117: }
1118:
1.119 daniel 1119: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1120: inputStream->directory = directory;
1.91 daniel 1121: inputStream->buf = buf;
1.76 daniel 1122:
1.91 daniel 1123: inputStream->base = inputStream->buf->buffer->content;
1124: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1125: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1126: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1127: return(inputStream);
1128: }
1129:
1.77 daniel 1130: /************************************************************************
1131: * *
1.97 daniel 1132: * Commodity functions to handle parser contexts *
1133: * *
1134: ************************************************************************/
1135:
1136: /**
1137: * xmlInitParserCtxt:
1138: * @ctxt: an XML parser context
1139: *
1140: * Initialize a parser context
1141: */
1142:
1143: void
1144: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1145: {
1146: xmlSAXHandler *sax;
1147:
1.168 daniel 1148: xmlDefaultSAXHandlerInit();
1149:
1.119 daniel 1150: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1151: if (sax == NULL) {
1152: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1153: }
1.180 daniel 1154: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1155:
1156: /* Allocate the Input stack */
1.119 daniel 1157: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1158: ctxt->inputNr = 0;
1159: ctxt->inputMax = 5;
1160: ctxt->input = NULL;
1.165 daniel 1161:
1.97 daniel 1162: ctxt->version = NULL;
1163: ctxt->encoding = NULL;
1164: ctxt->standalone = -1;
1.98 daniel 1165: ctxt->hasExternalSubset = 0;
1166: ctxt->hasPErefs = 0;
1.97 daniel 1167: ctxt->html = 0;
1.98 daniel 1168: ctxt->external = 0;
1.140 daniel 1169: ctxt->instate = XML_PARSER_START;
1.97 daniel 1170: ctxt->token = 0;
1.106 daniel 1171: ctxt->directory = NULL;
1.97 daniel 1172:
1173: /* Allocate the Node stack */
1.119 daniel 1174: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1175: ctxt->nodeNr = 0;
1176: ctxt->nodeMax = 10;
1177: ctxt->node = NULL;
1178:
1.140 daniel 1179: /* Allocate the Name stack */
1180: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1181: ctxt->nameNr = 0;
1182: ctxt->nameMax = 10;
1183: ctxt->name = NULL;
1184:
1.176 daniel 1185: /* Allocate the space stack */
1186: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1187: ctxt->spaceNr = 1;
1188: ctxt->spaceMax = 10;
1189: ctxt->spaceTab[0] = -1;
1190: ctxt->space = &ctxt->spaceTab[0];
1191:
1.160 daniel 1192: if (sax == NULL) {
1193: ctxt->sax = &xmlDefaultSAXHandler;
1194: } else {
1.97 daniel 1195: ctxt->sax = sax;
1196: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1197: }
1198: ctxt->userData = ctxt;
1199: ctxt->myDoc = NULL;
1200: ctxt->wellFormed = 1;
1.99 daniel 1201: ctxt->valid = 1;
1.100 daniel 1202: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1203: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1204: ctxt->vctxt.userData = ctxt;
1.149 daniel 1205: if (ctxt->validate) {
1206: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1207: if (xmlGetWarningsDefaultValue == 0)
1208: ctxt->vctxt.warning = NULL;
1209: else
1210: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1211: /* Allocate the Node stack */
1212: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1213: ctxt->vctxt.nodeNr = 0;
1214: ctxt->vctxt.nodeMax = 4;
1215: ctxt->vctxt.node = NULL;
1.149 daniel 1216: } else {
1217: ctxt->vctxt.error = NULL;
1218: ctxt->vctxt.warning = NULL;
1219: }
1.97 daniel 1220: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1221: ctxt->record_info = 0;
1.135 daniel 1222: ctxt->nbChars = 0;
1.140 daniel 1223: ctxt->checkIndex = 0;
1.180 daniel 1224: ctxt->inSubset = 0;
1.140 daniel 1225: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1226: ctxt->depth = 0;
1.97 daniel 1227: xmlInitNodeInfoSeq(&ctxt->node_seq);
1228: }
1229:
1230: /**
1231: * xmlFreeParserCtxt:
1232: * @ctxt: an XML parser context
1233: *
1234: * Free all the memory used by a parser context. However the parsed
1235: * document in ctxt->myDoc is not freed.
1236: */
1237:
1238: void
1239: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1240: {
1241: xmlParserInputPtr input;
1.140 daniel 1242: xmlChar *oldname;
1.97 daniel 1243:
1244: if (ctxt == NULL) return;
1245:
1246: while ((input = inputPop(ctxt)) != NULL) {
1247: xmlFreeInputStream(input);
1248: }
1.140 daniel 1249: while ((oldname = namePop(ctxt)) != NULL) {
1250: xmlFree(oldname);
1251: }
1.176 daniel 1252: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1253: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1254: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1255: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1256: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1257: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1258: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1259: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1260: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1261: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1262: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1263: xmlFree(ctxt->sax);
1264: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1265: xmlFree(ctxt);
1.97 daniel 1266: }
1267:
1268: /**
1269: * xmlNewParserCtxt:
1270: *
1271: * Allocate and initialize a new parser context.
1272: *
1273: * Returns the xmlParserCtxtPtr or NULL
1274: */
1275:
1276: xmlParserCtxtPtr
1277: xmlNewParserCtxt()
1278: {
1279: xmlParserCtxtPtr ctxt;
1280:
1.119 daniel 1281: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1282: if (ctxt == NULL) {
1283: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1284: perror("malloc");
1285: return(NULL);
1286: }
1.165 daniel 1287: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1288: xmlInitParserCtxt(ctxt);
1289: return(ctxt);
1290: }
1291:
1292: /**
1293: * xmlClearParserCtxt:
1294: * @ctxt: an XML parser context
1295: *
1296: * Clear (release owned resources) and reinitialize a parser context
1297: */
1298:
1299: void
1300: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1301: {
1302: xmlClearNodeInfoSeq(&ctxt->node_seq);
1303: xmlInitParserCtxt(ctxt);
1304: }
1305:
1306: /************************************************************************
1307: * *
1.77 daniel 1308: * Commodity functions to handle entities *
1309: * *
1310: ************************************************************************/
1311:
1.174 daniel 1312: /**
1313: * xmlCheckEntity:
1314: * @ctxt: an XML parser context
1315: * @content: the entity content string
1316: *
1317: * Parse an entity content and checks the WF constraints
1318: *
1319: */
1320:
1321: void
1322: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1323: }
1.97 daniel 1324:
1325: /**
1326: * xmlParseCharRef:
1327: * @ctxt: an XML parser context
1328: *
1329: * parse Reference declarations
1330: *
1331: * [66] CharRef ::= '&#' [0-9]+ ';' |
1332: * '&#x' [0-9a-fA-F]+ ';'
1333: *
1.98 daniel 1334: * [ WFC: Legal Character ]
1335: * Characters referred to using character references must match the
1336: * production for Char.
1337: *
1.135 daniel 1338: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1339: */
1.97 daniel 1340: int
1341: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1342: int val = 0;
1343:
1.111 daniel 1344: if (ctxt->token != 0) {
1345: val = ctxt->token;
1346: ctxt->token = 0;
1347: return(val);
1348: }
1.152 daniel 1349: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1350: (NXT(2) == 'x')) {
1351: SKIP(3);
1.152 daniel 1352: while (RAW != ';') {
1353: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1354: val = val * 16 + (CUR - '0');
1.152 daniel 1355: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1356: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1357: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1358: val = val * 16 + (CUR - 'A') + 10;
1359: else {
1.123 daniel 1360: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1362: ctxt->sax->error(ctxt->userData,
1363: "xmlParseCharRef: invalid hexadecimal value\n");
1364: ctxt->wellFormed = 0;
1.180 daniel 1365: ctxt->disableSAX = 1;
1.97 daniel 1366: val = 0;
1367: break;
1368: }
1369: NEXT;
1370: }
1.164 daniel 1371: if (RAW == ';') {
1372: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1373: ctxt->nbChars ++;
1374: ctxt->input->cur++;
1375: }
1.152 daniel 1376: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1377: SKIP(2);
1.152 daniel 1378: while (RAW != ';') {
1379: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1380: val = val * 10 + (CUR - '0');
1381: else {
1.123 daniel 1382: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1384: ctxt->sax->error(ctxt->userData,
1385: "xmlParseCharRef: invalid decimal value\n");
1386: ctxt->wellFormed = 0;
1.180 daniel 1387: ctxt->disableSAX = 1;
1.97 daniel 1388: val = 0;
1389: break;
1390: }
1391: NEXT;
1392: }
1.164 daniel 1393: if (RAW == ';') {
1394: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1395: ctxt->nbChars ++;
1396: ctxt->input->cur++;
1397: }
1.97 daniel 1398: } else {
1.123 daniel 1399: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1401: ctxt->sax->error(ctxt->userData,
1402: "xmlParseCharRef: invalid value\n");
1.97 daniel 1403: ctxt->wellFormed = 0;
1.180 daniel 1404: ctxt->disableSAX = 1;
1.97 daniel 1405: }
1.98 daniel 1406:
1.97 daniel 1407: /*
1.98 daniel 1408: * [ WFC: Legal Character ]
1409: * Characters referred to using character references must match the
1410: * production for Char.
1.97 daniel 1411: */
1412: if (IS_CHAR(val)) {
1413: return(val);
1414: } else {
1.123 daniel 1415: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1417: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1418: val);
1419: ctxt->wellFormed = 0;
1.180 daniel 1420: ctxt->disableSAX = 1;
1.97 daniel 1421: }
1422: return(0);
1.77 daniel 1423: }
1424:
1.96 daniel 1425: /**
1.135 daniel 1426: * xmlParseStringCharRef:
1427: * @ctxt: an XML parser context
1428: * @str: a pointer to an index in the string
1429: *
1430: * parse Reference declarations, variant parsing from a string rather
1431: * than an an input flow.
1432: *
1433: * [66] CharRef ::= '&#' [0-9]+ ';' |
1434: * '&#x' [0-9a-fA-F]+ ';'
1435: *
1436: * [ WFC: Legal Character ]
1437: * Characters referred to using character references must match the
1438: * production for Char.
1439: *
1440: * Returns the value parsed (as an int), 0 in case of error, str will be
1441: * updated to the current value of the index
1442: */
1443: int
1444: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1445: const xmlChar *ptr;
1446: xmlChar cur;
1447: int val = 0;
1448:
1449: if ((str == NULL) || (*str == NULL)) return(0);
1450: ptr = *str;
1451: cur = *ptr;
1.137 daniel 1452: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1453: ptr += 3;
1454: cur = *ptr;
1455: while (cur != ';') {
1456: if ((cur >= '0') && (cur <= '9'))
1457: val = val * 16 + (cur - '0');
1458: else if ((cur >= 'a') && (cur <= 'f'))
1459: val = val * 16 + (cur - 'a') + 10;
1460: else if ((cur >= 'A') && (cur <= 'F'))
1461: val = val * 16 + (cur - 'A') + 10;
1462: else {
1463: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1465: ctxt->sax->error(ctxt->userData,
1466: "xmlParseCharRef: invalid hexadecimal value\n");
1467: ctxt->wellFormed = 0;
1.180 daniel 1468: ctxt->disableSAX = 1;
1.135 daniel 1469: val = 0;
1470: break;
1471: }
1472: ptr++;
1473: cur = *ptr;
1474: }
1475: if (cur == ';')
1476: ptr++;
1.145 daniel 1477: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1478: ptr += 2;
1479: cur = *ptr;
1480: while (cur != ';') {
1481: if ((cur >= '0') && (cur <= '9'))
1482: val = val * 10 + (cur - '0');
1483: else {
1484: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1486: ctxt->sax->error(ctxt->userData,
1487: "xmlParseCharRef: invalid decimal value\n");
1488: ctxt->wellFormed = 0;
1.180 daniel 1489: ctxt->disableSAX = 1;
1.135 daniel 1490: val = 0;
1491: break;
1492: }
1493: ptr++;
1494: cur = *ptr;
1495: }
1496: if (cur == ';')
1497: ptr++;
1498: } else {
1499: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1501: ctxt->sax->error(ctxt->userData,
1502: "xmlParseCharRef: invalid value\n");
1503: ctxt->wellFormed = 0;
1.180 daniel 1504: ctxt->disableSAX = 1;
1.135 daniel 1505: return(0);
1506: }
1507: *str = ptr;
1508:
1509: /*
1510: * [ WFC: Legal Character ]
1511: * Characters referred to using character references must match the
1512: * production for Char.
1513: */
1514: if (IS_CHAR(val)) {
1515: return(val);
1516: } else {
1517: ctxt->errNo = XML_ERR_INVALID_CHAR;
1518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1519: ctxt->sax->error(ctxt->userData,
1520: "CharRef: invalid xmlChar value %d\n", val);
1521: ctxt->wellFormed = 0;
1.180 daniel 1522: ctxt->disableSAX = 1;
1.135 daniel 1523: }
1524: return(0);
1525: }
1526:
1527: /**
1.96 daniel 1528: * xmlParserHandleReference:
1529: * @ctxt: the parser context
1530: *
1.97 daniel 1531: * [67] Reference ::= EntityRef | CharRef
1532: *
1.96 daniel 1533: * [68] EntityRef ::= '&' Name ';'
1534: *
1.98 daniel 1535: * [ WFC: Entity Declared ]
1536: * the Name given in the entity reference must match that in an entity
1537: * declaration, except that well-formed documents need not declare any
1538: * of the following entities: amp, lt, gt, apos, quot.
1539: *
1540: * [ WFC: Parsed Entity ]
1541: * An entity reference must not contain the name of an unparsed entity
1542: *
1.97 daniel 1543: * [66] CharRef ::= '&#' [0-9]+ ';' |
1544: * '&#x' [0-9a-fA-F]+ ';'
1545: *
1.96 daniel 1546: * A PEReference may have been detectect in the current input stream
1547: * the handling is done accordingly to
1548: * http://www.w3.org/TR/REC-xml#entproc
1549: */
1550: void
1551: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1552: xmlParserInputPtr input;
1.123 daniel 1553: xmlChar *name;
1.97 daniel 1554: xmlEntityPtr ent = NULL;
1555:
1.126 daniel 1556: if (ctxt->token != 0) {
1557: return;
1558: }
1.152 daniel 1559: if (RAW != '&') return;
1.97 daniel 1560: GROW;
1.152 daniel 1561: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1562: switch(ctxt->instate) {
1.140 daniel 1563: case XML_PARSER_ENTITY_DECL:
1564: case XML_PARSER_PI:
1.109 daniel 1565: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1566: case XML_PARSER_COMMENT:
1.168 daniel 1567: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1568: /* we just ignore it there */
1569: return;
1570: case XML_PARSER_START_TAG:
1.109 daniel 1571: return;
1.140 daniel 1572: case XML_PARSER_END_TAG:
1.97 daniel 1573: return;
1574: case XML_PARSER_EOF:
1.123 daniel 1575: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1577: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1578: ctxt->wellFormed = 0;
1.180 daniel 1579: ctxt->disableSAX = 1;
1.97 daniel 1580: return;
1581: case XML_PARSER_PROLOG:
1.140 daniel 1582: case XML_PARSER_START:
1583: case XML_PARSER_MISC:
1.123 daniel 1584: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1586: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1587: ctxt->wellFormed = 0;
1.180 daniel 1588: ctxt->disableSAX = 1;
1.97 daniel 1589: return;
1590: case XML_PARSER_EPILOG:
1.123 daniel 1591: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1593: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1594: ctxt->wellFormed = 0;
1.180 daniel 1595: ctxt->disableSAX = 1;
1.97 daniel 1596: return;
1597: case XML_PARSER_DTD:
1.123 daniel 1598: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1600: ctxt->sax->error(ctxt->userData,
1601: "CharRef are forbiden in DTDs!\n");
1602: ctxt->wellFormed = 0;
1.180 daniel 1603: ctxt->disableSAX = 1;
1.97 daniel 1604: return;
1605: case XML_PARSER_ENTITY_VALUE:
1606: /*
1607: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1608: * substitution here since we need the literal
1.97 daniel 1609: * entity value to be able to save the internal
1610: * subset of the document.
1611: * This will be handled by xmlDecodeEntities
1612: */
1613: return;
1614: case XML_PARSER_CONTENT:
1615: case XML_PARSER_ATTRIBUTE_VALUE:
1616: ctxt->token = xmlParseCharRef(ctxt);
1617: return;
1618: }
1619: return;
1620: }
1621:
1622: switch(ctxt->instate) {
1.109 daniel 1623: case XML_PARSER_CDATA_SECTION:
1624: return;
1.140 daniel 1625: case XML_PARSER_PI:
1.97 daniel 1626: case XML_PARSER_COMMENT:
1.168 daniel 1627: case XML_PARSER_SYSTEM_LITERAL:
1628: case XML_PARSER_CONTENT:
1.97 daniel 1629: return;
1.140 daniel 1630: case XML_PARSER_START_TAG:
1631: return;
1632: case XML_PARSER_END_TAG:
1633: return;
1.97 daniel 1634: case XML_PARSER_EOF:
1.123 daniel 1635: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1637: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1638: ctxt->wellFormed = 0;
1.180 daniel 1639: ctxt->disableSAX = 1;
1.97 daniel 1640: return;
1641: case XML_PARSER_PROLOG:
1.140 daniel 1642: case XML_PARSER_START:
1643: case XML_PARSER_MISC:
1.123 daniel 1644: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1646: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1647: ctxt->wellFormed = 0;
1.180 daniel 1648: ctxt->disableSAX = 1;
1.97 daniel 1649: return;
1650: case XML_PARSER_EPILOG:
1.123 daniel 1651: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1653: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1654: ctxt->wellFormed = 0;
1.180 daniel 1655: ctxt->disableSAX = 1;
1.97 daniel 1656: return;
1657: case XML_PARSER_ENTITY_VALUE:
1658: /*
1659: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1660: * substitution here since we need the literal
1.97 daniel 1661: * entity value to be able to save the internal
1662: * subset of the document.
1663: * This will be handled by xmlDecodeEntities
1664: */
1665: return;
1666: case XML_PARSER_ATTRIBUTE_VALUE:
1667: /*
1668: * NOTE: in the case of attributes values, we don't do the
1669: * substitution here unless we are in a mode where
1670: * the parser is explicitely asked to substitute
1671: * entities. The SAX callback is called with values
1672: * without entity substitution.
1673: * This will then be handled by xmlDecodeEntities
1674: */
1.113 daniel 1675: return;
1.97 daniel 1676: case XML_PARSER_ENTITY_DECL:
1677: /*
1678: * we just ignore it there
1679: * the substitution will be done once the entity is referenced
1680: */
1681: return;
1682: case XML_PARSER_DTD:
1.123 daniel 1683: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1684: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1685: ctxt->sax->error(ctxt->userData,
1686: "Entity references are forbiden in DTDs!\n");
1687: ctxt->wellFormed = 0;
1.180 daniel 1688: ctxt->disableSAX = 1;
1.97 daniel 1689: return;
1690: }
1691:
1692: NEXT;
1693: name = xmlScanName(ctxt);
1694: if (name == NULL) {
1.123 daniel 1695: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1697: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1698: ctxt->wellFormed = 0;
1.180 daniel 1699: ctxt->disableSAX = 1;
1.97 daniel 1700: ctxt->token = '&';
1701: return;
1702: }
1703: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1704: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706: ctxt->sax->error(ctxt->userData,
1707: "Entity reference: ';' expected\n");
1708: ctxt->wellFormed = 0;
1.180 daniel 1709: ctxt->disableSAX = 1;
1.97 daniel 1710: ctxt->token = '&';
1.119 daniel 1711: xmlFree(name);
1.97 daniel 1712: return;
1713: }
1714: SKIP(xmlStrlen(name) + 1);
1715: if (ctxt->sax != NULL) {
1716: if (ctxt->sax->getEntity != NULL)
1717: ent = ctxt->sax->getEntity(ctxt->userData, name);
1718: }
1.98 daniel 1719:
1720: /*
1721: * [ WFC: Entity Declared ]
1722: * the Name given in the entity reference must match that in an entity
1723: * declaration, except that well-formed documents need not declare any
1724: * of the following entities: amp, lt, gt, apos, quot.
1725: */
1.97 daniel 1726: if (ent == NULL)
1727: ent = xmlGetPredefinedEntity(name);
1728: if (ent == NULL) {
1.123 daniel 1729: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1730: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1731: ctxt->sax->error(ctxt->userData,
1.98 daniel 1732: "Entity reference: entity %s not declared\n",
1733: name);
1.97 daniel 1734: ctxt->wellFormed = 0;
1.180 daniel 1735: ctxt->disableSAX = 1;
1.119 daniel 1736: xmlFree(name);
1.97 daniel 1737: return;
1738: }
1.98 daniel 1739:
1740: /*
1741: * [ WFC: Parsed Entity ]
1742: * An entity reference must not contain the name of an unparsed entity
1743: */
1.159 daniel 1744: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1745: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1747: ctxt->sax->error(ctxt->userData,
1748: "Entity reference to unparsed entity %s\n", name);
1749: ctxt->wellFormed = 0;
1.180 daniel 1750: ctxt->disableSAX = 1;
1.98 daniel 1751: }
1752:
1.159 daniel 1753: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1754: ctxt->token = ent->content[0];
1.119 daniel 1755: xmlFree(name);
1.97 daniel 1756: return;
1757: }
1758: input = xmlNewEntityInputStream(ctxt, ent);
1759: xmlPushInput(ctxt, input);
1.119 daniel 1760: xmlFree(name);
1.96 daniel 1761: return;
1762: }
1763:
1764: /**
1765: * xmlParserHandlePEReference:
1766: * @ctxt: the parser context
1767: *
1768: * [69] PEReference ::= '%' Name ';'
1769: *
1.98 daniel 1770: * [ WFC: No Recursion ]
1771: * TODO A parsed entity must not contain a recursive
1772: * reference to itself, either directly or indirectly.
1773: *
1774: * [ WFC: Entity Declared ]
1775: * In a document without any DTD, a document with only an internal DTD
1776: * subset which contains no parameter entity references, or a document
1777: * with "standalone='yes'", ... ... The declaration of a parameter
1778: * entity must precede any reference to it...
1779: *
1780: * [ VC: Entity Declared ]
1781: * In a document with an external subset or external parameter entities
1782: * with "standalone='no'", ... ... The declaration of a parameter entity
1783: * must precede any reference to it...
1784: *
1785: * [ WFC: In DTD ]
1786: * Parameter-entity references may only appear in the DTD.
1787: * NOTE: misleading but this is handled.
1788: *
1789: * A PEReference may have been detected in the current input stream
1.96 daniel 1790: * the handling is done accordingly to
1791: * http://www.w3.org/TR/REC-xml#entproc
1792: * i.e.
1793: * - Included in literal in entity values
1794: * - Included as Paraemeter Entity reference within DTDs
1795: */
1796: void
1797: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1798: xmlChar *name;
1.96 daniel 1799: xmlEntityPtr entity = NULL;
1800: xmlParserInputPtr input;
1801:
1.126 daniel 1802: if (ctxt->token != 0) {
1803: return;
1804: }
1.152 daniel 1805: if (RAW != '%') return;
1.96 daniel 1806: switch(ctxt->instate) {
1.109 daniel 1807: case XML_PARSER_CDATA_SECTION:
1808: return;
1.97 daniel 1809: case XML_PARSER_COMMENT:
1810: return;
1.140 daniel 1811: case XML_PARSER_START_TAG:
1812: return;
1813: case XML_PARSER_END_TAG:
1814: return;
1.96 daniel 1815: case XML_PARSER_EOF:
1.123 daniel 1816: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1818: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1819: ctxt->wellFormed = 0;
1.180 daniel 1820: ctxt->disableSAX = 1;
1.96 daniel 1821: return;
1822: case XML_PARSER_PROLOG:
1.140 daniel 1823: case XML_PARSER_START:
1824: case XML_PARSER_MISC:
1.123 daniel 1825: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1827: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1828: ctxt->wellFormed = 0;
1.180 daniel 1829: ctxt->disableSAX = 1;
1.96 daniel 1830: return;
1.97 daniel 1831: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1832: case XML_PARSER_CONTENT:
1833: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1834: case XML_PARSER_PI:
1.168 daniel 1835: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1836: /* we just ignore it there */
1837: return;
1838: case XML_PARSER_EPILOG:
1.123 daniel 1839: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1840: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1841: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1842: ctxt->wellFormed = 0;
1.180 daniel 1843: ctxt->disableSAX = 1;
1.96 daniel 1844: return;
1.97 daniel 1845: case XML_PARSER_ENTITY_VALUE:
1846: /*
1847: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1848: * substitution here since we need the literal
1.97 daniel 1849: * entity value to be able to save the internal
1850: * subset of the document.
1851: * This will be handled by xmlDecodeEntities
1852: */
1853: return;
1.96 daniel 1854: case XML_PARSER_DTD:
1.98 daniel 1855: /*
1856: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1857: * In the internal DTD subset, parameter-entity references
1858: * can occur only where markup declarations can occur, not
1859: * within markup declarations.
1860: * In that case this is handled in xmlParseMarkupDecl
1861: */
1862: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1863: return;
1.96 daniel 1864: }
1865:
1866: NEXT;
1867: name = xmlParseName(ctxt);
1868: if (name == NULL) {
1.123 daniel 1869: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1871: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1872: ctxt->wellFormed = 0;
1.180 daniel 1873: ctxt->disableSAX = 1;
1.96 daniel 1874: } else {
1.152 daniel 1875: if (RAW == ';') {
1.96 daniel 1876: NEXT;
1.98 daniel 1877: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1878: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1879: if (entity == NULL) {
1.98 daniel 1880:
1881: /*
1882: * [ WFC: Entity Declared ]
1883: * In a document without any DTD, a document with only an
1884: * internal DTD subset which contains no parameter entity
1885: * references, or a document with "standalone='yes'", ...
1886: * ... The declaration of a parameter entity must precede
1887: * any reference to it...
1888: */
1889: if ((ctxt->standalone == 1) ||
1890: ((ctxt->hasExternalSubset == 0) &&
1891: (ctxt->hasPErefs == 0))) {
1892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1893: ctxt->sax->error(ctxt->userData,
1894: "PEReference: %%%s; not found\n", name);
1895: ctxt->wellFormed = 0;
1.180 daniel 1896: ctxt->disableSAX = 1;
1.98 daniel 1897: } else {
1898: /*
1899: * [ VC: Entity Declared ]
1900: * In a document with an external subset or external
1901: * parameter entities with "standalone='no'", ...
1902: * ... The declaration of a parameter entity must precede
1903: * any reference to it...
1904: */
1905: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1906: ctxt->sax->warning(ctxt->userData,
1907: "PEReference: %%%s; not found\n", name);
1908: ctxt->valid = 0;
1909: }
1.96 daniel 1910: } else {
1.159 daniel 1911: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1912: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1913: /*
1.156 daniel 1914: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1915: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1916: */
1917: input = xmlNewEntityInputStream(ctxt, entity);
1918: xmlPushInput(ctxt, input);
1.164 daniel 1919: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1920: (RAW == '<') && (NXT(1) == '?') &&
1921: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1922: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1923: xmlParseTextDecl(ctxt);
1.164 daniel 1924: }
1925: if (ctxt->token == 0)
1926: ctxt->token = ' ';
1.96 daniel 1927: } else {
1928: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1929: ctxt->sax->error(ctxt->userData,
1930: "xmlHandlePEReference: %s is not a parameter entity\n",
1931: name);
1932: ctxt->wellFormed = 0;
1.180 daniel 1933: ctxt->disableSAX = 1;
1.96 daniel 1934: }
1935: }
1936: } else {
1.123 daniel 1937: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1939: ctxt->sax->error(ctxt->userData,
1940: "xmlHandlePEReference: expecting ';'\n");
1941: ctxt->wellFormed = 0;
1.180 daniel 1942: ctxt->disableSAX = 1;
1.96 daniel 1943: }
1.119 daniel 1944: xmlFree(name);
1.97 daniel 1945: }
1946: }
1947:
1948: /*
1949: * Macro used to grow the current buffer.
1950: */
1951: #define growBuffer(buffer) { \
1952: buffer##_size *= 2; \
1.145 daniel 1953: buffer = (xmlChar *) \
1954: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1955: if (buffer == NULL) { \
1956: perror("realloc failed"); \
1.145 daniel 1957: return(NULL); \
1.97 daniel 1958: } \
1.96 daniel 1959: }
1.77 daniel 1960:
1961: /**
1962: * xmlDecodeEntities:
1963: * @ctxt: the parser context
1964: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1965: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 1966: * @end: an end marker xmlChar, 0 if none
1967: * @end2: an end marker xmlChar, 0 if none
1968: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 1969: *
1970: * [67] Reference ::= EntityRef | CharRef
1971: *
1972: * [69] PEReference ::= '%' Name ';'
1973: *
1974: * Returns A newly allocated string with the substitution done. The caller
1975: * must deallocate it !
1976: */
1.123 daniel 1977: xmlChar *
1.77 daniel 1978: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 1979: xmlChar end, xmlChar end2, xmlChar end3) {
1980: xmlChar *buffer = NULL;
1.78 daniel 1981: int buffer_size = 0;
1.161 daniel 1982: int nbchars = 0;
1.78 daniel 1983:
1.123 daniel 1984: xmlChar *current = NULL;
1.77 daniel 1985: xmlEntityPtr ent;
1986: unsigned int max = (unsigned int) len;
1.161 daniel 1987: int c,l;
1.77 daniel 1988:
1.185 daniel 1989: if (ctxt->depth > 40) {
1990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1991: ctxt->sax->error(ctxt->userData,
1992: "Detected entity reference loop\n");
1993: ctxt->wellFormed = 0;
1994: ctxt->disableSAX = 1;
1995: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1996: return(NULL);
1997: }
1998:
1.77 daniel 1999: /*
2000: * allocate a translation buffer.
2001: */
1.140 daniel 2002: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2003: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2004: if (buffer == NULL) {
2005: perror("xmlDecodeEntities: malloc failed");
2006: return(NULL);
2007: }
2008:
1.78 daniel 2009: /*
2010: * Ok loop until we reach one of the ending char or a size limit.
2011: */
1.161 daniel 2012: c = CUR_CHAR(l);
2013: while ((nbchars < max) && (c != end) &&
2014: (c != end2) && (c != end3)) {
1.77 daniel 2015:
1.161 daniel 2016: if (c == 0) break;
2017: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2018: int val = xmlParseCharRef(ctxt);
1.161 daniel 2019: COPY_BUF(0,buffer,nbchars,val);
2020: NEXTL(l);
2021: } else if ((c == '&') && (ctxt->token != '&') &&
2022: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2023: ent = xmlParseEntityRef(ctxt);
2024: if ((ent != NULL) &&
2025: (ctxt->replaceEntities != 0)) {
2026: current = ent->content;
2027: while (*current != 0) {
1.161 daniel 2028: buffer[nbchars++] = *current++;
2029: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2030: growBuffer(buffer);
1.77 daniel 2031: }
2032: }
1.98 daniel 2033: } else if (ent != NULL) {
1.123 daniel 2034: const xmlChar *cur = ent->name;
1.98 daniel 2035:
1.161 daniel 2036: buffer[nbchars++] = '&';
2037: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2038: growBuffer(buffer);
2039: }
1.161 daniel 2040: while (*cur != 0) {
2041: buffer[nbchars++] = *cur++;
2042: }
2043: buffer[nbchars++] = ';';
1.77 daniel 2044: }
1.161 daniel 2045: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2046: /*
1.77 daniel 2047: * a PEReference induce to switch the entity flow,
2048: * we break here to flush the current set of chars
2049: * parsed if any. We will be called back later.
1.97 daniel 2050: */
1.91 daniel 2051: if (nbchars != 0) break;
1.77 daniel 2052:
2053: xmlParsePEReference(ctxt);
1.79 daniel 2054:
1.97 daniel 2055: /*
1.79 daniel 2056: * Pop-up of finished entities.
1.97 daniel 2057: */
1.152 daniel 2058: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2059: xmlPopInput(ctxt);
2060:
1.98 daniel 2061: break;
1.77 daniel 2062: } else {
1.161 daniel 2063: COPY_BUF(l,buffer,nbchars,c);
2064: NEXTL(l);
2065: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2066: growBuffer(buffer);
2067: }
1.77 daniel 2068: }
1.161 daniel 2069: c = CUR_CHAR(l);
1.77 daniel 2070: }
1.161 daniel 2071: buffer[nbchars++] = 0;
1.77 daniel 2072: return(buffer);
2073: }
2074:
1.135 daniel 2075: /**
2076: * xmlStringDecodeEntities:
2077: * @ctxt: the parser context
2078: * @str: the input string
2079: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2080: * @end: an end marker xmlChar, 0 if none
2081: * @end2: an end marker xmlChar, 0 if none
2082: * @end3: an end marker xmlChar, 0 if none
2083: *
2084: * [67] Reference ::= EntityRef | CharRef
2085: *
2086: * [69] PEReference ::= '%' Name ';'
2087: *
2088: * Returns A newly allocated string with the substitution done. The caller
2089: * must deallocate it !
2090: */
2091: xmlChar *
2092: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2093: xmlChar end, xmlChar end2, xmlChar end3) {
2094: xmlChar *buffer = NULL;
2095: int buffer_size = 0;
2096:
2097: xmlChar *current = NULL;
2098: xmlEntityPtr ent;
1.176 daniel 2099: int c,l;
2100: int nbchars = 0;
1.135 daniel 2101:
1.185 daniel 2102: if (ctxt->depth > 40) {
2103: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2104: ctxt->sax->error(ctxt->userData,
2105: "Detected entity reference loop\n");
2106: ctxt->wellFormed = 0;
2107: ctxt->disableSAX = 1;
2108: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2109: return(NULL);
2110: }
2111:
1.135 daniel 2112: /*
2113: * allocate a translation buffer.
2114: */
1.140 daniel 2115: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2116: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2117: if (buffer == NULL) {
2118: perror("xmlDecodeEntities: malloc failed");
2119: return(NULL);
2120: }
2121:
2122: /*
2123: * Ok loop until we reach one of the ending char or a size limit.
2124: */
1.176 daniel 2125: c = CUR_SCHAR(str, l);
2126: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2127:
1.176 daniel 2128: if (c == 0) break;
2129: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2130: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2131: if (val != 0) {
2132: COPY_BUF(0,buffer,nbchars,val);
2133: }
2134: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2135: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2136: if ((ent != NULL) && (ent->content != NULL)) {
2137: xmlChar *rep;
2138:
2139: ctxt->depth++;
2140: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2141: 0, 0, 0);
2142: ctxt->depth--;
2143: if (rep != NULL) {
2144: current = rep;
2145: while (*current != 0) {
2146: buffer[nbchars++] = *current++;
2147: if (nbchars >
2148: buffer_size - XML_PARSER_BUFFER_SIZE) {
2149: growBuffer(buffer);
2150: }
1.135 daniel 2151: }
1.185 daniel 2152: xmlFree(rep);
1.135 daniel 2153: }
2154: } else if (ent != NULL) {
2155: int i = xmlStrlen(ent->name);
2156: const xmlChar *cur = ent->name;
2157:
1.176 daniel 2158: buffer[nbchars++] = '&';
2159: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2160: growBuffer(buffer);
2161: }
2162: for (;i > 0;i--)
1.176 daniel 2163: buffer[nbchars++] = *cur++;
2164: buffer[nbchars++] = ';';
1.135 daniel 2165: }
1.176 daniel 2166: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2167: ent = xmlParseStringPEReference(ctxt, &str);
2168: if (ent != NULL) {
1.185 daniel 2169: xmlChar *rep;
2170:
2171: ctxt->depth++;
2172: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2173: 0, 0, 0);
2174: ctxt->depth--;
2175: if (rep != NULL) {
2176: current = rep;
2177: while (*current != 0) {
2178: buffer[nbchars++] = *current++;
2179: if (nbchars >
2180: buffer_size - XML_PARSER_BUFFER_SIZE) {
2181: growBuffer(buffer);
2182: }
1.135 daniel 2183: }
1.185 daniel 2184: xmlFree(rep);
1.135 daniel 2185: }
2186: }
2187: } else {
1.176 daniel 2188: COPY_BUF(l,buffer,nbchars,c);
2189: str += l;
2190: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2191: growBuffer(buffer);
2192: }
2193: }
1.176 daniel 2194: c = CUR_SCHAR(str, l);
1.135 daniel 2195: }
1.176 daniel 2196: buffer[nbchars++] = 0;
1.135 daniel 2197: return(buffer);
2198: }
2199:
1.1 veillard 2200:
1.28 daniel 2201: /************************************************************************
2202: * *
1.75 daniel 2203: * Commodity functions to handle encodings *
2204: * *
2205: ************************************************************************/
2206:
1.172 daniel 2207: /*
2208: * xmlCheckLanguageID
2209: * @lang: pointer to the string value
2210: *
2211: * Checks that the value conforms to the LanguageID production:
2212: *
2213: * [33] LanguageID ::= Langcode ('-' Subcode)*
2214: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2215: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2216: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2217: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2218: * [38] Subcode ::= ([a-z] | [A-Z])+
2219: *
2220: * Returns 1 if correct 0 otherwise
2221: **/
2222: int
2223: xmlCheckLanguageID(const xmlChar *lang) {
2224: const xmlChar *cur = lang;
2225:
2226: if (cur == NULL)
2227: return(0);
2228: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2229: ((cur[0] == 'I') && (cur[1] == '-'))) {
2230: /*
2231: * IANA code
2232: */
2233: cur += 2;
2234: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2235: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2236: cur++;
2237: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2238: ((cur[0] == 'X') && (cur[1] == '-'))) {
2239: /*
2240: * User code
2241: */
2242: cur += 2;
2243: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2244: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2245: cur++;
2246: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2247: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2248: /*
2249: * ISO639
2250: */
2251: cur++;
2252: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2253: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2254: cur++;
2255: else
2256: return(0);
2257: } else
2258: return(0);
2259: while (cur[0] != 0) {
2260: if (cur[0] != '-')
2261: return(0);
2262: cur++;
2263: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2264: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2265: cur++;
2266: else
2267: return(0);
2268: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2269: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2270: cur++;
2271: }
2272: return(1);
2273: }
2274:
1.75 daniel 2275: /**
2276: * xmlSwitchEncoding:
2277: * @ctxt: the parser context
1.124 daniel 2278: * @enc: the encoding value (number)
1.75 daniel 2279: *
2280: * change the input functions when discovering the character encoding
2281: * of a given entity.
1.193 daniel 2282: *
2283: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2284: */
1.193 daniel 2285: int
1.75 daniel 2286: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2287: {
1.156 daniel 2288: xmlCharEncodingHandlerPtr handler;
2289:
1.193 daniel 2290: switch (enc) {
2291: case XML_CHAR_ENCODING_ERROR:
2292: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2294: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2295: ctxt->wellFormed = 0;
2296: ctxt->disableSAX = 1;
2297: break;
2298: case XML_CHAR_ENCODING_NONE:
2299: /* let's assume it's UTF-8 without the XML decl */
2300: return(0);
2301: case XML_CHAR_ENCODING_UTF8:
2302: /* default encoding, no conversion should be needed */
2303: return(0);
2304: default:
2305: break;
2306: }
1.156 daniel 2307: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2308: if (handler == NULL) {
2309: /*
2310: * Default handlers.
2311: */
2312: switch (enc) {
2313: case XML_CHAR_ENCODING_ERROR:
2314: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2316: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2317: ctxt->wellFormed = 0;
2318: ctxt->disableSAX = 1;
2319: break;
2320: case XML_CHAR_ENCODING_NONE:
2321: /* let's assume it's UTF-8 without the XML decl */
2322: return(0);
2323: case XML_CHAR_ENCODING_UTF8:
2324: /* default encoding, no conversion should be needed */
2325: return(0);
2326: case XML_CHAR_ENCODING_UTF16LE:
2327: break;
2328: case XML_CHAR_ENCODING_UTF16BE:
2329: break;
2330: case XML_CHAR_ENCODING_UCS4LE:
2331: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2333: ctxt->sax->error(ctxt->userData,
2334: "char encoding USC4 little endian not supported\n");
2335: break;
2336: case XML_CHAR_ENCODING_UCS4BE:
2337: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2339: ctxt->sax->error(ctxt->userData,
2340: "char encoding USC4 big endian not supported\n");
2341: break;
2342: case XML_CHAR_ENCODING_EBCDIC:
2343: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2345: ctxt->sax->error(ctxt->userData,
2346: "char encoding EBCDIC not supported\n");
2347: break;
2348: case XML_CHAR_ENCODING_UCS4_2143:
2349: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2351: ctxt->sax->error(ctxt->userData,
2352: "char encoding UCS4 2143 not supported\n");
2353: break;
2354: case XML_CHAR_ENCODING_UCS4_3412:
2355: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2357: ctxt->sax->error(ctxt->userData,
2358: "char encoding UCS4 3412 not supported\n");
2359: break;
2360: case XML_CHAR_ENCODING_UCS2:
2361: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2363: ctxt->sax->error(ctxt->userData,
2364: "char encoding UCS2 not supported\n");
2365: break;
2366: case XML_CHAR_ENCODING_8859_1:
2367: case XML_CHAR_ENCODING_8859_2:
2368: case XML_CHAR_ENCODING_8859_3:
2369: case XML_CHAR_ENCODING_8859_4:
2370: case XML_CHAR_ENCODING_8859_5:
2371: case XML_CHAR_ENCODING_8859_6:
2372: case XML_CHAR_ENCODING_8859_7:
2373: case XML_CHAR_ENCODING_8859_8:
2374: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2375: /*
2376: * Keep the internal content in the document encoding
2377: */
2378: if ((ctxt->inputNr == 1) &&
2379: (ctxt->encoding == NULL) &&
2380: (ctxt->input->encoding != NULL)) {
2381: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2382: }
2383: return(0);
1.193 daniel 2384: case XML_CHAR_ENCODING_2022_JP:
2385: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2387: ctxt->sax->error(ctxt->userData,
2388: "char encoding ISO-2022-JPnot supported\n");
2389: break;
2390: case XML_CHAR_ENCODING_SHIFT_JIS:
2391: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2393: ctxt->sax->error(ctxt->userData,
2394: "char encoding Shift_JIS not supported\n");
2395: break;
2396: case XML_CHAR_ENCODING_EUC_JP:
2397: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2398: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399: ctxt->sax->error(ctxt->userData,
2400: "char encoding EUC-JPnot supported\n");
2401: break;
2402: }
2403: }
2404: if (handler == NULL)
2405: return(-1);
2406: return(xmlSwitchToEncoding(ctxt, handler));
2407: }
2408:
2409: /**
2410: * xmlSwitchToEncoding:
2411: * @ctxt: the parser context
2412: * @handler: the encoding handler
2413: *
2414: * change the input functions when discovering the character encoding
2415: * of a given entity.
2416: *
2417: * Returns 0 in case of success, -1 otherwise
2418: */
2419: int
2420: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2421: {
1.194 daniel 2422: int nbchars;
2423:
1.156 daniel 2424: if (handler != NULL) {
2425: if (ctxt->input != NULL) {
2426: if (ctxt->input->buf != NULL) {
2427: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2428: if (ctxt->input->buf->encoder == handler)
2429: return(0);
1.156 daniel 2430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2431: ctxt->sax->error(ctxt->userData,
2432: "xmlSwitchEncoding : encoder already regitered\n");
1.193 daniel 2433: return(-1);
1.156 daniel 2434: }
2435: ctxt->input->buf->encoder = handler;
2436:
2437: /*
1.194 daniel 2438: * Is there already some content down the pipe to convert ?
1.156 daniel 2439: */
2440: if ((ctxt->input->buf->buffer != NULL) &&
2441: (ctxt->input->buf->buffer->use > 0)) {
2442: int processed;
2443:
2444: /*
2445: * Specific handling of the Byte Order Mark for
2446: * UTF-16
2447: */
1.195 daniel 2448: if ((handler->name != NULL) &&
2449: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2450: (ctxt->input->cur[0] == 0xFF) &&
2451: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2452: ctxt->input->cur += 2;
1.156 daniel 2453: }
1.195 daniel 2454: if ((handler->name != NULL) &&
2455: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2456: (ctxt->input->cur[0] == 0xFE) &&
2457: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2458: ctxt->input->cur += 2;
1.156 daniel 2459: }
2460:
2461: /*
1.194 daniel 2462: * Shring the current input buffer.
2463: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2464: */
2465: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2466: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2467: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2468: ctxt->input->buf->buffer = xmlBufferCreate();
2469:
2470: /*
2471: * convert as much as possible of the raw input
2472: * to the parser reading buffer.
2473: */
2474: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2475: ctxt->input->buf->buffer,
2476: ctxt->input->buf->raw);
2477: if (nbchars < 0) {
2478: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2479: return(-1);
1.156 daniel 2480: }
1.194 daniel 2481: ctxt->input->base =
2482: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2483: }
1.193 daniel 2484: return(0);
1.156 daniel 2485: } else {
2486: if (ctxt->input->length == 0) {
2487: /*
2488: * When parsing a static memory array one must know the
2489: * size to be able to convert the buffer.
2490: */
2491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2492: ctxt->sax->error(ctxt->userData,
2493: "xmlSwitchEncoding : no input\n");
1.193 daniel 2494: return(-1);
1.156 daniel 2495: } else {
1.194 daniel 2496: int processed;
2497:
2498: /*
2499: * Shring the current input buffer.
2500: * Move it as the raw buffer and create a new input buffer
2501: */
2502: processed = ctxt->input->cur - ctxt->input->base;
2503: ctxt->input->buf->raw = xmlBufferCreate();
2504: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
2505: ctxt->input->length - processed);
2506: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2507:
2508: /*
1.194 daniel 2509: * convert as much as possible of the raw input
2510: * to the parser reading buffer.
2511: */
2512: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2513: ctxt->input->buf->buffer,
2514: ctxt->input->buf->raw);
2515: if (nbchars < 0) {
2516: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2517: return(-1);
1.156 daniel 2518: }
1.194 daniel 2519:
1.156 daniel 2520: /*
2521: * Conversion succeeded, get rid of the old buffer
2522: */
2523: if ((ctxt->input->free != NULL) &&
2524: (ctxt->input->base != NULL))
2525: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2526: ctxt->input->base =
2527: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2528: }
2529: }
2530: } else {
2531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2532: ctxt->sax->error(ctxt->userData,
2533: "xmlSwitchEncoding : no input\n");
1.193 daniel 2534: return(-1);
1.156 daniel 2535: }
1.195 daniel 2536: /*
2537: * The parsing is now done in UTF8 natively
2538: */
2539: if (ctxt->encoding != NULL) {
2540: xmlFree((xmlChar *) ctxt->encoding);
2541: ctxt->encoding = NULL;
2542: }
1.193 daniel 2543: } else
2544: return(-1);
2545: return(0);
1.156 daniel 2546:
1.75 daniel 2547: }
2548:
2549: /************************************************************************
2550: * *
1.123 daniel 2551: * Commodity functions to handle xmlChars *
1.28 daniel 2552: * *
2553: ************************************************************************/
2554:
1.50 daniel 2555: /**
2556: * xmlStrndup:
1.123 daniel 2557: * @cur: the input xmlChar *
1.50 daniel 2558: * @len: the len of @cur
2559: *
1.123 daniel 2560: * a strndup for array of xmlChar's
1.68 daniel 2561: *
1.123 daniel 2562: * Returns a new xmlChar * or NULL
1.1 veillard 2563: */
1.123 daniel 2564: xmlChar *
2565: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2566: xmlChar *ret;
2567:
2568: if ((cur == NULL) || (len < 0)) return(NULL);
2569: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2570: if (ret == NULL) {
1.86 daniel 2571: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2572: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2573: return(NULL);
2574: }
1.123 daniel 2575: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2576: ret[len] = 0;
2577: return(ret);
2578: }
2579:
1.50 daniel 2580: /**
2581: * xmlStrdup:
1.123 daniel 2582: * @cur: the input xmlChar *
1.50 daniel 2583: *
1.152 daniel 2584: * a strdup for array of xmlChar's. Since they are supposed to be
2585: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2586: * a termination mark of '0'.
1.68 daniel 2587: *
1.123 daniel 2588: * Returns a new xmlChar * or NULL
1.1 veillard 2589: */
1.123 daniel 2590: xmlChar *
2591: xmlStrdup(const xmlChar *cur) {
2592: const xmlChar *p = cur;
1.1 veillard 2593:
1.135 daniel 2594: if (cur == NULL) return(NULL);
1.152 daniel 2595: while (*p != 0) p++;
1.1 veillard 2596: return(xmlStrndup(cur, p - cur));
2597: }
2598:
1.50 daniel 2599: /**
2600: * xmlCharStrndup:
2601: * @cur: the input char *
2602: * @len: the len of @cur
2603: *
1.123 daniel 2604: * a strndup for char's to xmlChar's
1.68 daniel 2605: *
1.123 daniel 2606: * Returns a new xmlChar * or NULL
1.45 daniel 2607: */
2608:
1.123 daniel 2609: xmlChar *
1.55 daniel 2610: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2611: int i;
1.135 daniel 2612: xmlChar *ret;
2613:
2614: if ((cur == NULL) || (len < 0)) return(NULL);
2615: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2616: if (ret == NULL) {
1.86 daniel 2617: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2618: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2619: return(NULL);
2620: }
2621: for (i = 0;i < len;i++)
1.123 daniel 2622: ret[i] = (xmlChar) cur[i];
1.45 daniel 2623: ret[len] = 0;
2624: return(ret);
2625: }
2626:
1.50 daniel 2627: /**
2628: * xmlCharStrdup:
2629: * @cur: the input char *
2630: * @len: the len of @cur
2631: *
1.123 daniel 2632: * a strdup for char's to xmlChar's
1.68 daniel 2633: *
1.123 daniel 2634: * Returns a new xmlChar * or NULL
1.45 daniel 2635: */
2636:
1.123 daniel 2637: xmlChar *
1.55 daniel 2638: xmlCharStrdup(const char *cur) {
1.45 daniel 2639: const char *p = cur;
2640:
1.135 daniel 2641: if (cur == NULL) return(NULL);
1.45 daniel 2642: while (*p != '\0') p++;
2643: return(xmlCharStrndup(cur, p - cur));
2644: }
2645:
1.50 daniel 2646: /**
2647: * xmlStrcmp:
1.123 daniel 2648: * @str1: the first xmlChar *
2649: * @str2: the second xmlChar *
1.50 daniel 2650: *
1.123 daniel 2651: * a strcmp for xmlChar's
1.68 daniel 2652: *
2653: * Returns the integer result of the comparison
1.14 veillard 2654: */
2655:
1.55 daniel 2656: int
1.123 daniel 2657: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2658: register int tmp;
2659:
1.135 daniel 2660: if ((str1 == NULL) && (str2 == NULL)) return(0);
2661: if (str1 == NULL) return(-1);
2662: if (str2 == NULL) return(1);
1.14 veillard 2663: do {
2664: tmp = *str1++ - *str2++;
2665: if (tmp != 0) return(tmp);
2666: } while ((*str1 != 0) && (*str2 != 0));
2667: return (*str1 - *str2);
2668: }
2669:
1.50 daniel 2670: /**
2671: * xmlStrncmp:
1.123 daniel 2672: * @str1: the first xmlChar *
2673: * @str2: the second xmlChar *
1.50 daniel 2674: * @len: the max comparison length
2675: *
1.123 daniel 2676: * a strncmp for xmlChar's
1.68 daniel 2677: *
2678: * Returns the integer result of the comparison
1.14 veillard 2679: */
2680:
1.55 daniel 2681: int
1.123 daniel 2682: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2683: register int tmp;
2684:
2685: if (len <= 0) return(0);
1.135 daniel 2686: if ((str1 == NULL) && (str2 == NULL)) return(0);
2687: if (str1 == NULL) return(-1);
2688: if (str2 == NULL) return(1);
1.14 veillard 2689: do {
2690: tmp = *str1++ - *str2++;
2691: if (tmp != 0) return(tmp);
2692: len--;
2693: if (len <= 0) return(0);
2694: } while ((*str1 != 0) && (*str2 != 0));
2695: return (*str1 - *str2);
2696: }
2697:
1.50 daniel 2698: /**
2699: * xmlStrchr:
1.123 daniel 2700: * @str: the xmlChar * array
2701: * @val: the xmlChar to search
1.50 daniel 2702: *
1.123 daniel 2703: * a strchr for xmlChar's
1.68 daniel 2704: *
1.123 daniel 2705: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2706: */
2707:
1.123 daniel 2708: const xmlChar *
2709: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2710: if (str == NULL) return(NULL);
1.14 veillard 2711: while (*str != 0) {
1.123 daniel 2712: if (*str == val) return((xmlChar *) str);
1.14 veillard 2713: str++;
2714: }
2715: return(NULL);
1.89 daniel 2716: }
2717:
2718: /**
2719: * xmlStrstr:
1.123 daniel 2720: * @str: the xmlChar * array (haystack)
2721: * @val: the xmlChar to search (needle)
1.89 daniel 2722: *
1.123 daniel 2723: * a strstr for xmlChar's
1.89 daniel 2724: *
1.123 daniel 2725: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2726: */
2727:
1.123 daniel 2728: const xmlChar *
2729: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2730: int n;
2731:
2732: if (str == NULL) return(NULL);
2733: if (val == NULL) return(NULL);
2734: n = xmlStrlen(val);
2735:
2736: if (n == 0) return(str);
2737: while (*str != 0) {
2738: if (*str == *val) {
1.123 daniel 2739: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2740: }
2741: str++;
2742: }
2743: return(NULL);
2744: }
2745:
2746: /**
2747: * xmlStrsub:
1.123 daniel 2748: * @str: the xmlChar * array (haystack)
1.89 daniel 2749: * @start: the index of the first char (zero based)
2750: * @len: the length of the substring
2751: *
2752: * Extract a substring of a given string
2753: *
1.123 daniel 2754: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2755: */
2756:
1.123 daniel 2757: xmlChar *
2758: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2759: int i;
2760:
2761: if (str == NULL) return(NULL);
2762: if (start < 0) return(NULL);
1.90 daniel 2763: if (len < 0) return(NULL);
1.89 daniel 2764:
2765: for (i = 0;i < start;i++) {
2766: if (*str == 0) return(NULL);
2767: str++;
2768: }
2769: if (*str == 0) return(NULL);
2770: return(xmlStrndup(str, len));
1.14 veillard 2771: }
1.28 daniel 2772:
1.50 daniel 2773: /**
2774: * xmlStrlen:
1.123 daniel 2775: * @str: the xmlChar * array
1.50 daniel 2776: *
1.127 daniel 2777: * length of a xmlChar's string
1.68 daniel 2778: *
1.123 daniel 2779: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2780: */
2781:
1.55 daniel 2782: int
1.123 daniel 2783: xmlStrlen(const xmlChar *str) {
1.45 daniel 2784: int len = 0;
2785:
2786: if (str == NULL) return(0);
2787: while (*str != 0) {
2788: str++;
2789: len++;
2790: }
2791: return(len);
2792: }
2793:
1.50 daniel 2794: /**
2795: * xmlStrncat:
1.123 daniel 2796: * @cur: the original xmlChar * array
2797: * @add: the xmlChar * array added
1.50 daniel 2798: * @len: the length of @add
2799: *
1.123 daniel 2800: * a strncat for array of xmlChar's
1.68 daniel 2801: *
1.123 daniel 2802: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2803: */
2804:
1.123 daniel 2805: xmlChar *
2806: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2807: int size;
1.123 daniel 2808: xmlChar *ret;
1.45 daniel 2809:
2810: if ((add == NULL) || (len == 0))
2811: return(cur);
2812: if (cur == NULL)
2813: return(xmlStrndup(add, len));
2814:
2815: size = xmlStrlen(cur);
1.123 daniel 2816: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2817: if (ret == NULL) {
1.86 daniel 2818: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2819: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2820: return(cur);
2821: }
1.123 daniel 2822: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2823: ret[size + len] = 0;
2824: return(ret);
2825: }
2826:
1.50 daniel 2827: /**
2828: * xmlStrcat:
1.123 daniel 2829: * @cur: the original xmlChar * array
2830: * @add: the xmlChar * array added
1.50 daniel 2831: *
1.152 daniel 2832: * a strcat for array of xmlChar's. Since they are supposed to be
2833: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2834: * a termination mark of '0'.
1.68 daniel 2835: *
1.123 daniel 2836: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2837: */
1.123 daniel 2838: xmlChar *
2839: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2840: const xmlChar *p = add;
1.45 daniel 2841:
2842: if (add == NULL) return(cur);
2843: if (cur == NULL)
2844: return(xmlStrdup(add));
2845:
1.152 daniel 2846: while (*p != 0) p++;
1.45 daniel 2847: return(xmlStrncat(cur, add, p - add));
2848: }
2849:
2850: /************************************************************************
2851: * *
2852: * Commodity functions, cleanup needed ? *
2853: * *
2854: ************************************************************************/
2855:
1.50 daniel 2856: /**
2857: * areBlanks:
2858: * @ctxt: an XML parser context
1.123 daniel 2859: * @str: a xmlChar *
1.50 daniel 2860: * @len: the size of @str
2861: *
1.45 daniel 2862: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2863: *
1.68 daniel 2864: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2865: */
2866:
1.123 daniel 2867: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2868: int i, ret;
1.45 daniel 2869: xmlNodePtr lastChild;
2870:
1.176 daniel 2871: /*
2872: * Check for xml:space value.
2873: */
2874: if (*(ctxt->space) == 1)
2875: return(0);
2876:
2877: /*
2878: * Check that the string is made of blanks
2879: */
1.45 daniel 2880: for (i = 0;i < len;i++)
2881: if (!(IS_BLANK(str[i]))) return(0);
2882:
1.176 daniel 2883: /*
2884: * Look if the element is mixed content in the Dtd if available
2885: */
1.104 daniel 2886: if (ctxt->myDoc != NULL) {
2887: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2888: if (ret == 0) return(1);
2889: if (ret == 1) return(0);
2890: }
1.176 daniel 2891:
1.104 daniel 2892: /*
1.176 daniel 2893: * Otherwise, heuristic :-\
1.104 daniel 2894: */
1.179 daniel 2895: if (ctxt->keepBlanks)
2896: return(0);
2897: if (RAW != '<') return(0);
2898: if (ctxt->node == NULL) return(0);
2899: if ((ctxt->node->children == NULL) &&
2900: (RAW == '<') && (NXT(1) == '/')) return(0);
2901:
1.45 daniel 2902: lastChild = xmlGetLastChild(ctxt->node);
2903: if (lastChild == NULL) {
2904: if (ctxt->node->content != NULL) return(0);
2905: } else if (xmlNodeIsText(lastChild))
2906: return(0);
1.157 daniel 2907: else if ((ctxt->node->children != NULL) &&
2908: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2909: return(0);
1.45 daniel 2910: return(1);
2911: }
2912:
1.50 daniel 2913: /**
2914: * xmlHandleEntity:
2915: * @ctxt: an XML parser context
2916: * @entity: an XML entity pointer.
2917: *
2918: * Default handling of defined entities, when should we define a new input
1.45 daniel 2919: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2920: *
2921: * OBSOLETE: to be removed at some point.
1.45 daniel 2922: */
2923:
1.55 daniel 2924: void
2925: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2926: int len;
1.50 daniel 2927: xmlParserInputPtr input;
1.45 daniel 2928:
2929: if (entity->content == NULL) {
1.123 daniel 2930: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2931: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2932: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2933: entity->name);
1.59 daniel 2934: ctxt->wellFormed = 0;
1.180 daniel 2935: ctxt->disableSAX = 1;
1.45 daniel 2936: return;
2937: }
2938: len = xmlStrlen(entity->content);
2939: if (len <= 2) goto handle_as_char;
2940:
2941: /*
2942: * Redefine its content as an input stream.
2943: */
1.50 daniel 2944: input = xmlNewEntityInputStream(ctxt, entity);
2945: xmlPushInput(ctxt, input);
1.45 daniel 2946: return;
2947:
2948: handle_as_char:
2949: /*
2950: * Just handle the content as a set of chars.
2951: */
1.171 daniel 2952: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2953: (ctxt->sax->characters != NULL))
1.74 daniel 2954: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 2955:
2956: }
2957:
2958: /*
2959: * Forward definition for recusive behaviour.
2960: */
1.77 daniel 2961: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
2962: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 2963:
1.28 daniel 2964: /************************************************************************
2965: * *
2966: * Extra stuff for namespace support *
2967: * Relates to http://www.w3.org/TR/WD-xml-names *
2968: * *
2969: ************************************************************************/
2970:
1.50 daniel 2971: /**
2972: * xmlNamespaceParseNCName:
2973: * @ctxt: an XML parser context
2974: *
2975: * parse an XML namespace name.
1.28 daniel 2976: *
2977: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
2978: *
2979: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
2980: * CombiningChar | Extender
1.68 daniel 2981: *
2982: * Returns the namespace name or NULL
1.28 daniel 2983: */
2984:
1.123 daniel 2985: xmlChar *
1.55 daniel 2986: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 2987: xmlChar buf[XML_MAX_NAMELEN + 5];
2988: int len = 0, l;
2989: int cur = CUR_CHAR(l);
1.28 daniel 2990:
1.156 daniel 2991: /* load first the value of the char !!! */
1.152 daniel 2992: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 2993:
1.152 daniel 2994: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2995: (cur == '.') || (cur == '-') ||
2996: (cur == '_') ||
2997: (IS_COMBINING(cur)) ||
2998: (IS_EXTENDER(cur))) {
2999: COPY_BUF(l,buf,len,cur);
3000: NEXTL(l);
3001: cur = CUR_CHAR(l);
1.91 daniel 3002: if (len >= XML_MAX_NAMELEN) {
3003: fprintf(stderr,
3004: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3005: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3006: (cur == '.') || (cur == '-') ||
3007: (cur == '_') ||
3008: (IS_COMBINING(cur)) ||
3009: (IS_EXTENDER(cur))) {
3010: NEXTL(l);
3011: cur = CUR_CHAR(l);
3012: }
1.91 daniel 3013: break;
3014: }
3015: }
3016: return(xmlStrndup(buf, len));
1.28 daniel 3017: }
3018:
1.50 daniel 3019: /**
3020: * xmlNamespaceParseQName:
3021: * @ctxt: an XML parser context
1.123 daniel 3022: * @prefix: a xmlChar **
1.50 daniel 3023: *
3024: * parse an XML qualified name
1.28 daniel 3025: *
3026: * [NS 5] QName ::= (Prefix ':')? LocalPart
3027: *
3028: * [NS 6] Prefix ::= NCName
3029: *
3030: * [NS 7] LocalPart ::= NCName
1.68 daniel 3031: *
1.127 daniel 3032: * Returns the local part, and prefix is updated
1.50 daniel 3033: * to get the Prefix if any.
1.28 daniel 3034: */
3035:
1.123 daniel 3036: xmlChar *
3037: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3038: xmlChar *ret = NULL;
1.28 daniel 3039:
3040: *prefix = NULL;
3041: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3042: if (RAW == ':') {
1.28 daniel 3043: *prefix = ret;
1.40 daniel 3044: NEXT;
1.28 daniel 3045: ret = xmlNamespaceParseNCName(ctxt);
3046: }
3047:
3048: return(ret);
3049: }
3050:
1.50 daniel 3051: /**
1.72 daniel 3052: * xmlSplitQName:
1.162 daniel 3053: * @ctxt: an XML parser context
1.72 daniel 3054: * @name: an XML parser context
1.123 daniel 3055: * @prefix: a xmlChar **
1.72 daniel 3056: *
3057: * parse an XML qualified name string
3058: *
3059: * [NS 5] QName ::= (Prefix ':')? LocalPart
3060: *
3061: * [NS 6] Prefix ::= NCName
3062: *
3063: * [NS 7] LocalPart ::= NCName
3064: *
1.127 daniel 3065: * Returns the local part, and prefix is updated
1.72 daniel 3066: * to get the Prefix if any.
3067: */
3068:
1.123 daniel 3069: xmlChar *
1.162 daniel 3070: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3071: xmlChar buf[XML_MAX_NAMELEN + 5];
3072: int len = 0;
1.123 daniel 3073: xmlChar *ret = NULL;
3074: const xmlChar *cur = name;
1.162 daniel 3075: int c,l;
1.72 daniel 3076:
3077: *prefix = NULL;
1.113 daniel 3078:
3079: /* xml: prefix is not really a namespace */
3080: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3081: (cur[2] == 'l') && (cur[3] == ':'))
3082: return(xmlStrdup(name));
3083:
1.162 daniel 3084: /* nasty but valid */
3085: if (cur[0] == ':')
3086: return(xmlStrdup(name));
3087:
3088: c = CUR_SCHAR(cur, l);
3089: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3090:
1.162 daniel 3091: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3092: (c == '.') || (c == '-') ||
3093: (c == '_') ||
3094: (IS_COMBINING(c)) ||
3095: (IS_EXTENDER(c))) {
3096: COPY_BUF(l,buf,len,c);
3097: cur += l;
3098: c = CUR_SCHAR(cur, l);
3099: }
1.72 daniel 3100:
1.162 daniel 3101: ret = xmlStrndup(buf, len);
1.72 daniel 3102:
1.162 daniel 3103: if (c == ':') {
3104: cur += l;
1.163 daniel 3105: c = CUR_SCHAR(cur, l);
1.162 daniel 3106: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3107: *prefix = ret;
1.162 daniel 3108: len = 0;
1.72 daniel 3109:
1.162 daniel 3110: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3111: (c == '.') || (c == '-') ||
3112: (c == '_') ||
3113: (IS_COMBINING(c)) ||
3114: (IS_EXTENDER(c))) {
3115: COPY_BUF(l,buf,len,c);
3116: cur += l;
3117: c = CUR_SCHAR(cur, l);
3118: }
1.72 daniel 3119:
1.162 daniel 3120: ret = xmlStrndup(buf, len);
1.72 daniel 3121: }
3122:
3123: return(ret);
3124: }
3125: /**
1.50 daniel 3126: * xmlNamespaceParseNSDef:
3127: * @ctxt: an XML parser context
3128: *
3129: * parse a namespace prefix declaration
1.28 daniel 3130: *
3131: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3132: *
3133: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3134: *
3135: * Returns the namespace name
1.28 daniel 3136: */
3137:
1.123 daniel 3138: xmlChar *
1.55 daniel 3139: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3140: xmlChar *name = NULL;
1.28 daniel 3141:
1.152 daniel 3142: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3143: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3144: (NXT(4) == 's')) {
3145: SKIP(5);
1.152 daniel 3146: if (RAW == ':') {
1.40 daniel 3147: NEXT;
1.28 daniel 3148: name = xmlNamespaceParseNCName(ctxt);
3149: }
3150: }
1.39 daniel 3151: return(name);
1.28 daniel 3152: }
3153:
1.50 daniel 3154: /**
3155: * xmlParseQuotedString:
3156: * @ctxt: an XML parser context
3157: *
1.45 daniel 3158: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3159: * To be removed at next drop of binary compatibility
1.68 daniel 3160: *
3161: * Returns the string parser or NULL.
1.45 daniel 3162: */
1.123 daniel 3163: xmlChar *
1.55 daniel 3164: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3165: xmlChar *buf = NULL;
1.152 daniel 3166: int len = 0,l;
1.140 daniel 3167: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3168: int c;
1.45 daniel 3169:
1.135 daniel 3170: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3171: if (buf == NULL) {
3172: fprintf(stderr, "malloc of %d byte failed\n", size);
3173: return(NULL);
3174: }
1.152 daniel 3175: if (RAW == '"') {
1.45 daniel 3176: NEXT;
1.152 daniel 3177: c = CUR_CHAR(l);
1.135 daniel 3178: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3179: if (len + 5 >= size) {
1.135 daniel 3180: size *= 2;
3181: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3182: if (buf == NULL) {
3183: fprintf(stderr, "realloc of %d byte failed\n", size);
3184: return(NULL);
3185: }
3186: }
1.152 daniel 3187: COPY_BUF(l,buf,len,c);
3188: NEXTL(l);
3189: c = CUR_CHAR(l);
1.135 daniel 3190: }
3191: if (c != '"') {
1.123 daniel 3192: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3194: ctxt->sax->error(ctxt->userData,
3195: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3196: ctxt->wellFormed = 0;
1.180 daniel 3197: ctxt->disableSAX = 1;
1.55 daniel 3198: } else {
1.45 daniel 3199: NEXT;
3200: }
1.152 daniel 3201: } else if (RAW == '\''){
1.45 daniel 3202: NEXT;
1.135 daniel 3203: c = CUR;
3204: while (IS_CHAR(c) && (c != '\'')) {
3205: if (len + 1 >= size) {
3206: size *= 2;
3207: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3208: if (buf == NULL) {
3209: fprintf(stderr, "realloc of %d byte failed\n", size);
3210: return(NULL);
3211: }
3212: }
3213: buf[len++] = c;
3214: NEXT;
3215: c = CUR;
3216: }
1.152 daniel 3217: if (RAW != '\'') {
1.123 daniel 3218: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3219: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3220: ctxt->sax->error(ctxt->userData,
3221: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3222: ctxt->wellFormed = 0;
1.180 daniel 3223: ctxt->disableSAX = 1;
1.55 daniel 3224: } else {
1.45 daniel 3225: NEXT;
3226: }
3227: }
1.135 daniel 3228: return(buf);
1.45 daniel 3229: }
3230:
1.50 daniel 3231: /**
3232: * xmlParseNamespace:
3233: * @ctxt: an XML parser context
3234: *
1.45 daniel 3235: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3236: *
3237: * This is what the older xml-name Working Draft specified, a bunch of
3238: * other stuff may still rely on it, so support is still here as
1.127 daniel 3239: * if it was declared on the root of the Tree:-(
1.110 daniel 3240: *
3241: * To be removed at next drop of binary compatibility
1.45 daniel 3242: */
3243:
1.55 daniel 3244: void
3245: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3246: xmlChar *href = NULL;
3247: xmlChar *prefix = NULL;
1.45 daniel 3248: int garbage = 0;
3249:
3250: /*
3251: * We just skipped "namespace" or "xml:namespace"
3252: */
3253: SKIP_BLANKS;
3254:
1.153 daniel 3255: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3256: /*
3257: * We can have "ns" or "prefix" attributes
3258: * Old encoding as 'href' or 'AS' attributes is still supported
3259: */
1.152 daniel 3260: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3261: garbage = 0;
3262: SKIP(2);
3263: SKIP_BLANKS;
3264:
1.152 daniel 3265: if (RAW != '=') continue;
1.45 daniel 3266: NEXT;
3267: SKIP_BLANKS;
3268:
3269: href = xmlParseQuotedString(ctxt);
3270: SKIP_BLANKS;
1.152 daniel 3271: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3272: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3273: garbage = 0;
3274: SKIP(4);
3275: SKIP_BLANKS;
3276:
1.152 daniel 3277: if (RAW != '=') continue;
1.45 daniel 3278: NEXT;
3279: SKIP_BLANKS;
3280:
3281: href = xmlParseQuotedString(ctxt);
3282: SKIP_BLANKS;
1.152 daniel 3283: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3284: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3285: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3286: garbage = 0;
3287: SKIP(6);
3288: SKIP_BLANKS;
3289:
1.152 daniel 3290: if (RAW != '=') continue;
1.45 daniel 3291: NEXT;
3292: SKIP_BLANKS;
3293:
3294: prefix = xmlParseQuotedString(ctxt);
3295: SKIP_BLANKS;
1.152 daniel 3296: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3297: garbage = 0;
3298: SKIP(2);
3299: SKIP_BLANKS;
3300:
1.152 daniel 3301: if (RAW != '=') continue;
1.45 daniel 3302: NEXT;
3303: SKIP_BLANKS;
3304:
3305: prefix = xmlParseQuotedString(ctxt);
3306: SKIP_BLANKS;
1.152 daniel 3307: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3308: garbage = 0;
1.91 daniel 3309: NEXT;
1.45 daniel 3310: } else {
3311: /*
3312: * Found garbage when parsing the namespace
3313: */
1.122 daniel 3314: if (!garbage) {
1.55 daniel 3315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3316: ctxt->sax->error(ctxt->userData,
3317: "xmlParseNamespace found garbage\n");
3318: }
1.123 daniel 3319: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3320: ctxt->wellFormed = 0;
1.180 daniel 3321: ctxt->disableSAX = 1;
1.45 daniel 3322: NEXT;
3323: }
3324: }
3325:
3326: MOVETO_ENDTAG(CUR_PTR);
3327: NEXT;
3328:
3329: /*
3330: * Register the DTD.
1.72 daniel 3331: if (href != NULL)
3332: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3333: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3334: */
3335:
1.119 daniel 3336: if (prefix != NULL) xmlFree(prefix);
3337: if (href != NULL) xmlFree(href);
1.45 daniel 3338: }
3339:
1.28 daniel 3340: /************************************************************************
3341: * *
3342: * The parser itself *
3343: * Relates to http://www.w3.org/TR/REC-xml *
3344: * *
3345: ************************************************************************/
1.14 veillard 3346:
1.50 daniel 3347: /**
1.97 daniel 3348: * xmlScanName:
3349: * @ctxt: an XML parser context
3350: *
3351: * Trickery: parse an XML name but without consuming the input flow
3352: * Needed for rollback cases.
3353: *
3354: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3355: * CombiningChar | Extender
3356: *
3357: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3358: *
3359: * [6] Names ::= Name (S Name)*
3360: *
3361: * Returns the Name parsed or NULL
3362: */
3363:
1.123 daniel 3364: xmlChar *
1.97 daniel 3365: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3366: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3367: int len = 0;
3368:
3369: GROW;
1.152 daniel 3370: if (!IS_LETTER(RAW) && (RAW != '_') &&
3371: (RAW != ':')) {
1.97 daniel 3372: return(NULL);
3373: }
3374:
3375: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3376: (NXT(len) == '.') || (NXT(len) == '-') ||
3377: (NXT(len) == '_') || (NXT(len) == ':') ||
3378: (IS_COMBINING(NXT(len))) ||
3379: (IS_EXTENDER(NXT(len)))) {
3380: buf[len] = NXT(len);
3381: len++;
3382: if (len >= XML_MAX_NAMELEN) {
3383: fprintf(stderr,
3384: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3385: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3386: (NXT(len) == '.') || (NXT(len) == '-') ||
3387: (NXT(len) == '_') || (NXT(len) == ':') ||
3388: (IS_COMBINING(NXT(len))) ||
3389: (IS_EXTENDER(NXT(len))))
3390: len++;
3391: break;
3392: }
3393: }
3394: return(xmlStrndup(buf, len));
3395: }
3396:
3397: /**
1.50 daniel 3398: * xmlParseName:
3399: * @ctxt: an XML parser context
3400: *
3401: * parse an XML name.
1.22 daniel 3402: *
3403: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3404: * CombiningChar | Extender
3405: *
3406: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3407: *
3408: * [6] Names ::= Name (S Name)*
1.68 daniel 3409: *
3410: * Returns the Name parsed or NULL
1.1 veillard 3411: */
3412:
1.123 daniel 3413: xmlChar *
1.55 daniel 3414: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3415: xmlChar buf[XML_MAX_NAMELEN + 5];
3416: int len = 0, l;
3417: int c;
1.1 veillard 3418:
1.91 daniel 3419: GROW;
1.160 daniel 3420: c = CUR_CHAR(l);
1.190 daniel 3421: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3422: (!IS_LETTER(c) && (c != '_') &&
3423: (c != ':'))) {
1.91 daniel 3424: return(NULL);
3425: }
1.40 daniel 3426:
1.190 daniel 3427: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3428: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3429: (c == '.') || (c == '-') ||
3430: (c == '_') || (c == ':') ||
3431: (IS_COMBINING(c)) ||
3432: (IS_EXTENDER(c)))) {
1.160 daniel 3433: COPY_BUF(l,buf,len,c);
3434: NEXTL(l);
3435: c = CUR_CHAR(l);
1.91 daniel 3436: if (len >= XML_MAX_NAMELEN) {
3437: fprintf(stderr,
3438: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3439: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3440: (c == '.') || (c == '-') ||
3441: (c == '_') || (c == ':') ||
3442: (IS_COMBINING(c)) ||
3443: (IS_EXTENDER(c))) {
3444: NEXTL(l);
3445: c = CUR_CHAR(l);
1.97 daniel 3446: }
1.91 daniel 3447: break;
3448: }
3449: }
3450: return(xmlStrndup(buf, len));
1.22 daniel 3451: }
3452:
1.50 daniel 3453: /**
1.135 daniel 3454: * xmlParseStringName:
3455: * @ctxt: an XML parser context
3456: * @str: a pointer to an index in the string
3457: *
3458: * parse an XML name.
3459: *
3460: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3461: * CombiningChar | Extender
3462: *
3463: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3464: *
3465: * [6] Names ::= Name (S Name)*
3466: *
3467: * Returns the Name parsed or NULL. The str pointer
3468: * is updated to the current location in the string.
3469: */
3470:
3471: xmlChar *
3472: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3473: xmlChar buf[XML_MAX_NAMELEN + 5];
3474: const xmlChar *cur = *str;
3475: int len = 0, l;
3476: int c;
1.135 daniel 3477:
1.176 daniel 3478: c = CUR_SCHAR(cur, l);
3479: if (!IS_LETTER(c) && (c != '_') &&
3480: (c != ':')) {
1.135 daniel 3481: return(NULL);
3482: }
3483:
1.176 daniel 3484: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3485: (c == '.') || (c == '-') ||
3486: (c == '_') || (c == ':') ||
3487: (IS_COMBINING(c)) ||
3488: (IS_EXTENDER(c))) {
3489: COPY_BUF(l,buf,len,c);
3490: cur += l;
3491: c = CUR_SCHAR(cur, l);
3492: if (len >= XML_MAX_NAMELEN) {
3493: fprintf(stderr,
3494: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3495: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3496: (c == '.') || (c == '-') ||
3497: (c == '_') || (c == ':') ||
3498: (IS_COMBINING(c)) ||
3499: (IS_EXTENDER(c))) {
3500: cur += l;
3501: c = CUR_SCHAR(cur, l);
3502: }
3503: break;
3504: }
1.135 daniel 3505: }
1.176 daniel 3506: *str = cur;
3507: return(xmlStrndup(buf, len));
1.135 daniel 3508: }
3509:
3510: /**
1.50 daniel 3511: * xmlParseNmtoken:
3512: * @ctxt: an XML parser context
3513: *
3514: * parse an XML Nmtoken.
1.22 daniel 3515: *
3516: * [7] Nmtoken ::= (NameChar)+
3517: *
3518: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3519: *
3520: * Returns the Nmtoken parsed or NULL
1.22 daniel 3521: */
3522:
1.123 daniel 3523: xmlChar *
1.55 daniel 3524: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3525: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3526: int len = 0;
1.160 daniel 3527: int c,l;
1.22 daniel 3528:
1.91 daniel 3529: GROW;
1.160 daniel 3530: c = CUR_CHAR(l);
3531: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3532: (c == '.') || (c == '-') ||
3533: (c == '_') || (c == ':') ||
3534: (IS_COMBINING(c)) ||
3535: (IS_EXTENDER(c))) {
3536: COPY_BUF(l,buf,len,c);
3537: NEXTL(l);
3538: c = CUR_CHAR(l);
1.91 daniel 3539: if (len >= XML_MAX_NAMELEN) {
3540: fprintf(stderr,
3541: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3542: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3543: (c == '.') || (c == '-') ||
3544: (c == '_') || (c == ':') ||
3545: (IS_COMBINING(c)) ||
3546: (IS_EXTENDER(c))) {
3547: NEXTL(l);
3548: c = CUR_CHAR(l);
3549: }
1.91 daniel 3550: break;
3551: }
3552: }
1.168 daniel 3553: if (len == 0)
3554: return(NULL);
1.91 daniel 3555: return(xmlStrndup(buf, len));
1.1 veillard 3556: }
3557:
1.50 daniel 3558: /**
3559: * xmlParseEntityValue:
3560: * @ctxt: an XML parser context
1.78 daniel 3561: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3562: *
3563: * parse a value for ENTITY decl.
1.24 daniel 3564: *
3565: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3566: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3567: *
1.78 daniel 3568: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3569: */
3570:
1.123 daniel 3571: xmlChar *
3572: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3573: xmlChar *buf = NULL;
3574: int len = 0;
1.140 daniel 3575: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3576: int c, l;
1.135 daniel 3577: xmlChar stop;
1.123 daniel 3578: xmlChar *ret = NULL;
1.176 daniel 3579: const xmlChar *cur = NULL;
1.98 daniel 3580: xmlParserInputPtr input;
1.24 daniel 3581:
1.152 daniel 3582: if (RAW == '"') stop = '"';
3583: else if (RAW == '\'') stop = '\'';
1.135 daniel 3584: else {
3585: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3586: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3587: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3588: ctxt->wellFormed = 0;
1.180 daniel 3589: ctxt->disableSAX = 1;
1.135 daniel 3590: return(NULL);
3591: }
3592: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3593: if (buf == NULL) {
3594: fprintf(stderr, "malloc of %d byte failed\n", size);
3595: return(NULL);
3596: }
1.94 daniel 3597:
1.135 daniel 3598: /*
3599: * The content of the entity definition is copied in a buffer.
3600: */
1.94 daniel 3601:
1.135 daniel 3602: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3603: input = ctxt->input;
3604: GROW;
3605: NEXT;
1.152 daniel 3606: c = CUR_CHAR(l);
1.135 daniel 3607: /*
3608: * NOTE: 4.4.5 Included in Literal
3609: * When a parameter entity reference appears in a literal entity
3610: * value, ... a single or double quote character in the replacement
3611: * text is always treated as a normal data character and will not
3612: * terminate the literal.
3613: * In practice it means we stop the loop only when back at parsing
3614: * the initial entity and the quote is found
3615: */
3616: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3617: if (len + 5 >= size) {
1.135 daniel 3618: size *= 2;
3619: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3620: if (buf == NULL) {
3621: fprintf(stderr, "realloc of %d byte failed\n", size);
3622: return(NULL);
1.94 daniel 3623: }
1.79 daniel 3624: }
1.152 daniel 3625: COPY_BUF(l,buf,len,c);
3626: NEXTL(l);
1.98 daniel 3627: /*
1.135 daniel 3628: * Pop-up of finished entities.
1.98 daniel 3629: */
1.152 daniel 3630: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3631: xmlPopInput(ctxt);
1.152 daniel 3632:
3633: c = CUR_CHAR(l);
1.135 daniel 3634: if (c == 0) {
1.94 daniel 3635: GROW;
1.152 daniel 3636: c = CUR_CHAR(l);
1.79 daniel 3637: }
1.135 daniel 3638: }
3639: buf[len] = 0;
3640:
3641: /*
1.176 daniel 3642: * Raise problem w.r.t. '&' and '%' being used in non-entities
3643: * reference constructs. Note Charref will be handled in
3644: * xmlStringDecodeEntities()
3645: */
3646: cur = buf;
3647: while (*cur != 0) {
3648: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3649: xmlChar *name;
3650: xmlChar tmp = *cur;
3651:
3652: cur++;
3653: name = xmlParseStringName(ctxt, &cur);
3654: if ((name == NULL) || (*cur != ';')) {
3655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3656: ctxt->sax->error(ctxt->userData,
3657: "EntityValue: '%c' forbidden except for entities references\n",
3658: tmp);
3659: ctxt->wellFormed = 0;
1.180 daniel 3660: ctxt->disableSAX = 1;
1.176 daniel 3661: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3662: }
3663: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3664: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3665: ctxt->sax->error(ctxt->userData,
3666: "EntityValue: PEReferences forbidden in internal subset\n",
3667: tmp);
3668: ctxt->wellFormed = 0;
1.180 daniel 3669: ctxt->disableSAX = 1;
1.176 daniel 3670: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3671: }
3672: if (name != NULL)
3673: xmlFree(name);
3674: }
3675: cur++;
3676: }
3677:
3678: /*
1.135 daniel 3679: * Then PEReference entities are substituted.
3680: */
3681: if (c != stop) {
3682: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3684: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3685: ctxt->wellFormed = 0;
1.180 daniel 3686: ctxt->disableSAX = 1;
1.170 daniel 3687: xmlFree(buf);
1.135 daniel 3688: } else {
3689: NEXT;
3690: /*
3691: * NOTE: 4.4.7 Bypassed
3692: * When a general entity reference appears in the EntityValue in
3693: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3694: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3695: */
3696: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3697: 0, 0, 0);
3698: if (orig != NULL)
3699: *orig = buf;
3700: else
3701: xmlFree(buf);
1.24 daniel 3702: }
3703:
3704: return(ret);
3705: }
3706:
1.50 daniel 3707: /**
3708: * xmlParseAttValue:
3709: * @ctxt: an XML parser context
3710: *
3711: * parse a value for an attribute
1.78 daniel 3712: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3713: * will be handled later in xmlStringGetNodeList
1.29 daniel 3714: *
3715: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3716: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3717: *
1.129 daniel 3718: * 3.3.3 Attribute-Value Normalization:
3719: * Before the value of an attribute is passed to the application or
3720: * checked for validity, the XML processor must normalize it as follows:
3721: * - a character reference is processed by appending the referenced
3722: * character to the attribute value
3723: * - an entity reference is processed by recursively processing the
3724: * replacement text of the entity
3725: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3726: * appending #x20 to the normalized value, except that only a single
3727: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3728: * parsed entity or the literal entity value of an internal parsed entity
3729: * - other characters are processed by appending them to the normalized value
1.130 daniel 3730: * If the declared value is not CDATA, then the XML processor must further
3731: * process the normalized attribute value by discarding any leading and
3732: * trailing space (#x20) characters, and by replacing sequences of space
3733: * (#x20) characters by a single space (#x20) character.
3734: * All attributes for which no declaration has been read should be treated
3735: * by a non-validating parser as if declared CDATA.
1.129 daniel 3736: *
3737: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3738: */
3739:
1.123 daniel 3740: xmlChar *
1.55 daniel 3741: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3742: xmlChar limit = 0;
3743: xmlChar *buffer = NULL;
3744: int buffer_size = 0;
3745: xmlChar *out = NULL;
3746:
3747: xmlChar *current = NULL;
3748: xmlEntityPtr ent;
3749: xmlChar cur;
3750:
1.29 daniel 3751:
1.91 daniel 3752: SHRINK;
1.151 daniel 3753: if (NXT(0) == '"') {
1.96 daniel 3754: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3755: limit = '"';
1.40 daniel 3756: NEXT;
1.151 daniel 3757: } else if (NXT(0) == '\'') {
1.129 daniel 3758: limit = '\'';
1.96 daniel 3759: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3760: NEXT;
1.29 daniel 3761: } else {
1.123 daniel 3762: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3764: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3765: ctxt->wellFormed = 0;
1.180 daniel 3766: ctxt->disableSAX = 1;
1.129 daniel 3767: return(NULL);
1.29 daniel 3768: }
3769:
1.129 daniel 3770: /*
3771: * allocate a translation buffer.
3772: */
1.140 daniel 3773: buffer_size = XML_PARSER_BUFFER_SIZE;
1.129 daniel 3774: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
3775: if (buffer == NULL) {
3776: perror("xmlParseAttValue: malloc failed");
3777: return(NULL);
3778: }
3779: out = buffer;
3780:
3781: /*
3782: * Ok loop until we reach one of the ending char or a size limit.
3783: */
3784: cur = CUR;
1.156 daniel 3785: while (((NXT(0) != limit) && (cur != '<')) || (ctxt->token != 0)) {
1.129 daniel 3786: if (cur == 0) break;
3787: if ((cur == '&') && (NXT(1) == '#')) {
3788: int val = xmlParseCharRef(ctxt);
3789: *out++ = val;
3790: } else if (cur == '&') {
3791: ent = xmlParseEntityRef(ctxt);
3792: if ((ent != NULL) &&
3793: (ctxt->replaceEntities != 0)) {
1.185 daniel 3794: xmlChar *rep;
3795:
1.186 daniel 3796: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3797: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3798: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3799: if (rep != NULL) {
3800: current = rep;
3801: while (*current != 0) {
3802: *out++ = *current++;
3803: if (out - buffer > buffer_size - 10) {
3804: int index = out - buffer;
1.129 daniel 3805:
1.186 daniel 3806: growBuffer(buffer);
3807: out = &buffer[index];
3808: }
1.185 daniel 3809: }
1.186 daniel 3810: xmlFree(rep);
1.129 daniel 3811: }
1.186 daniel 3812: } else {
3813: if (ent->content != NULL)
3814: *out++ = ent->content[0];
1.129 daniel 3815: }
3816: } else if (ent != NULL) {
3817: int i = xmlStrlen(ent->name);
3818: const xmlChar *cur = ent->name;
3819:
1.186 daniel 3820: /*
3821: * This may look absurd but is needed to detect
3822: * entities problems
3823: */
3824: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3825: xmlChar *rep;
3826: rep = xmlStringDecodeEntities(ctxt, ent->content,
3827: XML_SUBSTITUTE_REF, 0, 0, 0);
3828: if (rep != NULL)
3829: xmlFree(rep);
3830: }
3831:
3832: /*
3833: * Just output the reference
3834: */
1.129 daniel 3835: *out++ = '&';
3836: if (out - buffer > buffer_size - i - 10) {
3837: int index = out - buffer;
3838:
3839: growBuffer(buffer);
3840: out = &buffer[index];
3841: }
3842: for (;i > 0;i--)
3843: *out++ = *cur++;
3844: *out++ = ';';
3845: }
3846: } else {
1.156 daniel 3847: /* invalid for UTF-8 , use COPY(out); !!! */
1.129 daniel 3848: if ((cur == 0x20) || (cur == 0xD) || (cur == 0xA) || (cur == 0x9)) {
1.130 daniel 3849: *out++ = 0x20;
3850: if (out - buffer > buffer_size - 10) {
3851: int index = out - buffer;
3852:
3853: growBuffer(buffer);
3854: out = &buffer[index];
1.129 daniel 3855: }
3856: } else {
3857: *out++ = cur;
3858: if (out - buffer > buffer_size - 10) {
3859: int index = out - buffer;
3860:
3861: growBuffer(buffer);
3862: out = &buffer[index];
3863: }
3864: }
3865: NEXT;
3866: }
3867: cur = CUR;
3868: }
3869: *out++ = 0;
1.152 daniel 3870: if (RAW == '<') {
1.129 daniel 3871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3872: ctxt->sax->error(ctxt->userData,
3873: "Unescaped '<' not allowed in attributes values\n");
3874: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3875: ctxt->wellFormed = 0;
1.180 daniel 3876: ctxt->disableSAX = 1;
1.152 daniel 3877: } else if (RAW != limit) {
1.129 daniel 3878: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3879: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3880: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3881: ctxt->wellFormed = 0;
1.180 daniel 3882: ctxt->disableSAX = 1;
1.129 daniel 3883: } else
3884: NEXT;
3885: return(buffer);
1.29 daniel 3886: }
3887:
1.50 daniel 3888: /**
3889: * xmlParseSystemLiteral:
3890: * @ctxt: an XML parser context
3891: *
3892: * parse an XML Literal
1.21 daniel 3893: *
1.22 daniel 3894: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3895: *
3896: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3897: */
3898:
1.123 daniel 3899: xmlChar *
1.55 daniel 3900: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3901: xmlChar *buf = NULL;
3902: int len = 0;
1.140 daniel 3903: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3904: int cur, l;
1.135 daniel 3905: xmlChar stop;
1.168 daniel 3906: int state = ctxt->instate;
1.21 daniel 3907:
1.91 daniel 3908: SHRINK;
1.152 daniel 3909: if (RAW == '"') {
1.40 daniel 3910: NEXT;
1.135 daniel 3911: stop = '"';
1.152 daniel 3912: } else if (RAW == '\'') {
1.40 daniel 3913: NEXT;
1.135 daniel 3914: stop = '\'';
1.21 daniel 3915: } else {
1.55 daniel 3916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3917: ctxt->sax->error(ctxt->userData,
3918: "SystemLiteral \" or ' expected\n");
1.123 daniel 3919: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3920: ctxt->wellFormed = 0;
1.180 daniel 3921: ctxt->disableSAX = 1;
1.135 daniel 3922: return(NULL);
1.21 daniel 3923: }
3924:
1.135 daniel 3925: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3926: if (buf == NULL) {
3927: fprintf(stderr, "malloc of %d byte failed\n", size);
3928: return(NULL);
3929: }
1.168 daniel 3930: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3931: cur = CUR_CHAR(l);
1.135 daniel 3932: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3933: if (len + 5 >= size) {
1.135 daniel 3934: size *= 2;
3935: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3936: if (buf == NULL) {
3937: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3938: ctxt->instate = state;
1.135 daniel 3939: return(NULL);
3940: }
3941: }
1.152 daniel 3942: COPY_BUF(l,buf,len,cur);
3943: NEXTL(l);
3944: cur = CUR_CHAR(l);
1.135 daniel 3945: if (cur == 0) {
3946: GROW;
3947: SHRINK;
1.152 daniel 3948: cur = CUR_CHAR(l);
1.135 daniel 3949: }
3950: }
3951: buf[len] = 0;
1.168 daniel 3952: ctxt->instate = state;
1.135 daniel 3953: if (!IS_CHAR(cur)) {
3954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3955: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3956: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3957: ctxt->wellFormed = 0;
1.180 daniel 3958: ctxt->disableSAX = 1;
1.135 daniel 3959: } else {
3960: NEXT;
3961: }
3962: return(buf);
1.21 daniel 3963: }
3964:
1.50 daniel 3965: /**
3966: * xmlParsePubidLiteral:
3967: * @ctxt: an XML parser context
1.21 daniel 3968: *
1.50 daniel 3969: * parse an XML public literal
1.68 daniel 3970: *
3971: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3972: *
3973: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 3974: */
3975:
1.123 daniel 3976: xmlChar *
1.55 daniel 3977: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3978: xmlChar *buf = NULL;
3979: int len = 0;
1.140 daniel 3980: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 3981: xmlChar cur;
3982: xmlChar stop;
1.125 daniel 3983:
1.91 daniel 3984: SHRINK;
1.152 daniel 3985: if (RAW == '"') {
1.40 daniel 3986: NEXT;
1.135 daniel 3987: stop = '"';
1.152 daniel 3988: } else if (RAW == '\'') {
1.40 daniel 3989: NEXT;
1.135 daniel 3990: stop = '\'';
1.21 daniel 3991: } else {
1.55 daniel 3992: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3993: ctxt->sax->error(ctxt->userData,
3994: "SystemLiteral \" or ' expected\n");
1.123 daniel 3995: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3996: ctxt->wellFormed = 0;
1.180 daniel 3997: ctxt->disableSAX = 1;
1.135 daniel 3998: return(NULL);
3999: }
4000: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4001: if (buf == NULL) {
4002: fprintf(stderr, "malloc of %d byte failed\n", size);
4003: return(NULL);
4004: }
4005: cur = CUR;
4006: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4007: if (len + 1 >= size) {
4008: size *= 2;
4009: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4010: if (buf == NULL) {
4011: fprintf(stderr, "realloc of %d byte failed\n", size);
4012: return(NULL);
4013: }
4014: }
4015: buf[len++] = cur;
4016: NEXT;
4017: cur = CUR;
4018: if (cur == 0) {
4019: GROW;
4020: SHRINK;
4021: cur = CUR;
4022: }
4023: }
4024: buf[len] = 0;
4025: if (cur != stop) {
4026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4027: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4028: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4029: ctxt->wellFormed = 0;
1.180 daniel 4030: ctxt->disableSAX = 1;
1.135 daniel 4031: } else {
4032: NEXT;
1.21 daniel 4033: }
1.135 daniel 4034: return(buf);
1.21 daniel 4035: }
4036:
1.50 daniel 4037: /**
4038: * xmlParseCharData:
4039: * @ctxt: an XML parser context
4040: * @cdata: int indicating whether we are within a CDATA section
4041: *
4042: * parse a CharData section.
4043: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4044: *
1.151 daniel 4045: * The right angle bracket (>) may be represented using the string ">",
4046: * and must, for compatibility, be escaped using ">" or a character
4047: * reference when it appears in the string "]]>" in content, when that
4048: * string is not marking the end of a CDATA section.
4049: *
1.27 daniel 4050: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4051: */
4052:
1.55 daniel 4053: void
4054: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4055: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4056: int nbchar = 0;
1.152 daniel 4057: int cur, l;
1.27 daniel 4058:
1.91 daniel 4059: SHRINK;
1.152 daniel 4060: cur = CUR_CHAR(l);
1.190 daniel 4061: while (((cur != '<') || (ctxt->token == '<')) &&
4062: ((cur != '&') || (ctxt->token == '&')) &&
4063: (IS_CHAR(cur))) {
1.97 daniel 4064: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4065: (NXT(2) == '>')) {
4066: if (cdata) break;
4067: else {
4068: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4069: ctxt->sax->error(ctxt->userData,
1.59 daniel 4070: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4071: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4072: /* Should this be relaxed ??? I see a "must here */
4073: ctxt->wellFormed = 0;
1.180 daniel 4074: ctxt->disableSAX = 1;
1.59 daniel 4075: }
4076: }
1.152 daniel 4077: COPY_BUF(l,buf,nbchar,cur);
4078: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4079: /*
4080: * Ok the segment is to be consumed as chars.
4081: */
1.171 daniel 4082: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4083: if (areBlanks(ctxt, buf, nbchar)) {
4084: if (ctxt->sax->ignorableWhitespace != NULL)
4085: ctxt->sax->ignorableWhitespace(ctxt->userData,
4086: buf, nbchar);
4087: } else {
4088: if (ctxt->sax->characters != NULL)
4089: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4090: }
4091: }
4092: nbchar = 0;
4093: }
1.152 daniel 4094: NEXTL(l);
4095: cur = CUR_CHAR(l);
1.27 daniel 4096: }
1.91 daniel 4097: if (nbchar != 0) {
4098: /*
4099: * Ok the segment is to be consumed as chars.
4100: */
1.171 daniel 4101: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4102: if (areBlanks(ctxt, buf, nbchar)) {
4103: if (ctxt->sax->ignorableWhitespace != NULL)
4104: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4105: } else {
4106: if (ctxt->sax->characters != NULL)
4107: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4108: }
4109: }
1.45 daniel 4110: }
1.27 daniel 4111: }
4112:
1.50 daniel 4113: /**
4114: * xmlParseExternalID:
4115: * @ctxt: an XML parser context
1.123 daniel 4116: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4117: * @strict: indicate whether we should restrict parsing to only
4118: * production [75], see NOTE below
1.50 daniel 4119: *
1.67 daniel 4120: * Parse an External ID or a Public ID
4121: *
4122: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4123: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4124: *
4125: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4126: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4127: *
4128: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4129: *
1.68 daniel 4130: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4131: * case publicID receives PubidLiteral, is strict is off
4132: * it is possible to return NULL and have publicID set.
1.22 daniel 4133: */
4134:
1.123 daniel 4135: xmlChar *
4136: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4137: xmlChar *URI = NULL;
1.22 daniel 4138:
1.91 daniel 4139: SHRINK;
1.152 daniel 4140: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4141: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4142: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4143: SKIP(6);
1.59 daniel 4144: if (!IS_BLANK(CUR)) {
4145: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4146: ctxt->sax->error(ctxt->userData,
1.59 daniel 4147: "Space required after 'SYSTEM'\n");
1.123 daniel 4148: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4149: ctxt->wellFormed = 0;
1.180 daniel 4150: ctxt->disableSAX = 1;
1.59 daniel 4151: }
1.42 daniel 4152: SKIP_BLANKS;
1.39 daniel 4153: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4154: if (URI == NULL) {
1.55 daniel 4155: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4156: ctxt->sax->error(ctxt->userData,
1.39 daniel 4157: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4158: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4159: ctxt->wellFormed = 0;
1.180 daniel 4160: ctxt->disableSAX = 1;
1.59 daniel 4161: }
1.152 daniel 4162: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4163: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4164: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4165: SKIP(6);
1.59 daniel 4166: if (!IS_BLANK(CUR)) {
4167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4168: ctxt->sax->error(ctxt->userData,
1.59 daniel 4169: "Space required after 'PUBLIC'\n");
1.123 daniel 4170: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4171: ctxt->wellFormed = 0;
1.180 daniel 4172: ctxt->disableSAX = 1;
1.59 daniel 4173: }
1.42 daniel 4174: SKIP_BLANKS;
1.39 daniel 4175: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4176: if (*publicID == NULL) {
1.55 daniel 4177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4178: ctxt->sax->error(ctxt->userData,
1.39 daniel 4179: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4180: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4181: ctxt->wellFormed = 0;
1.180 daniel 4182: ctxt->disableSAX = 1;
1.59 daniel 4183: }
1.67 daniel 4184: if (strict) {
4185: /*
4186: * We don't handle [83] so "S SystemLiteral" is required.
4187: */
4188: if (!IS_BLANK(CUR)) {
4189: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4190: ctxt->sax->error(ctxt->userData,
1.67 daniel 4191: "Space required after the Public Identifier\n");
1.123 daniel 4192: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4193: ctxt->wellFormed = 0;
1.180 daniel 4194: ctxt->disableSAX = 1;
1.67 daniel 4195: }
4196: } else {
4197: /*
4198: * We handle [83] so we return immediately, if
4199: * "S SystemLiteral" is not detected. From a purely parsing
4200: * point of view that's a nice mess.
4201: */
1.135 daniel 4202: const xmlChar *ptr;
4203: GROW;
4204:
4205: ptr = CUR_PTR;
1.67 daniel 4206: if (!IS_BLANK(*ptr)) return(NULL);
4207:
4208: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4209: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4210: }
1.42 daniel 4211: SKIP_BLANKS;
1.39 daniel 4212: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4213: if (URI == NULL) {
1.55 daniel 4214: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4215: ctxt->sax->error(ctxt->userData,
1.39 daniel 4216: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4217: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4218: ctxt->wellFormed = 0;
1.180 daniel 4219: ctxt->disableSAX = 1;
1.59 daniel 4220: }
1.22 daniel 4221: }
1.39 daniel 4222: return(URI);
1.22 daniel 4223: }
4224:
1.50 daniel 4225: /**
4226: * xmlParseComment:
1.69 daniel 4227: * @ctxt: an XML parser context
1.50 daniel 4228: *
1.3 veillard 4229: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4230: * The spec says that "For compatibility, the string "--" (double-hyphen)
4231: * must not occur within comments. "
1.22 daniel 4232: *
4233: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4234: */
1.72 daniel 4235: void
1.114 daniel 4236: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4237: xmlChar *buf = NULL;
1.195 daniel 4238: int len;
1.140 daniel 4239: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4240: int q, ql;
4241: int r, rl;
4242: int cur, l;
1.140 daniel 4243: xmlParserInputState state;
1.187 daniel 4244: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4245:
4246: /*
1.22 daniel 4247: * Check that there is a comment right here.
1.3 veillard 4248: */
1.152 daniel 4249: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4250: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4251:
1.140 daniel 4252: state = ctxt->instate;
1.97 daniel 4253: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4254: SHRINK;
1.40 daniel 4255: SKIP(4);
1.135 daniel 4256: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4257: if (buf == NULL) {
4258: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4259: ctxt->instate = state;
1.135 daniel 4260: return;
4261: }
1.152 daniel 4262: q = CUR_CHAR(ql);
4263: NEXTL(ql);
4264: r = CUR_CHAR(rl);
4265: NEXTL(rl);
4266: cur = CUR_CHAR(l);
1.195 daniel 4267: len = 0;
1.135 daniel 4268: while (IS_CHAR(cur) &&
4269: ((cur != '>') ||
4270: (r != '-') || (q != '-'))) {
1.195 daniel 4271: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4273: ctxt->sax->error(ctxt->userData,
1.38 daniel 4274: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4275: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4276: ctxt->wellFormed = 0;
1.180 daniel 4277: ctxt->disableSAX = 1;
1.59 daniel 4278: }
1.152 daniel 4279: if (len + 5 >= size) {
1.135 daniel 4280: size *= 2;
4281: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4282: if (buf == NULL) {
4283: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4284: ctxt->instate = state;
1.135 daniel 4285: return;
4286: }
4287: }
1.152 daniel 4288: COPY_BUF(ql,buf,len,q);
1.135 daniel 4289: q = r;
1.152 daniel 4290: ql = rl;
1.135 daniel 4291: r = cur;
1.152 daniel 4292: rl = l;
4293: NEXTL(l);
4294: cur = CUR_CHAR(l);
1.135 daniel 4295: if (cur == 0) {
4296: SHRINK;
4297: GROW;
1.152 daniel 4298: cur = CUR_CHAR(l);
1.135 daniel 4299: }
1.3 veillard 4300: }
1.135 daniel 4301: buf[len] = 0;
4302: if (!IS_CHAR(cur)) {
1.55 daniel 4303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4304: ctxt->sax->error(ctxt->userData,
1.135 daniel 4305: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4306: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4307: ctxt->wellFormed = 0;
1.180 daniel 4308: ctxt->disableSAX = 1;
1.178 daniel 4309: xmlFree(buf);
1.3 veillard 4310: } else {
1.187 daniel 4311: if (input != ctxt->input) {
4312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4313: ctxt->sax->error(ctxt->userData,
4314: "Comment doesn't start and stop in the same entity\n");
4315: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4316: ctxt->wellFormed = 0;
4317: ctxt->disableSAX = 1;
4318: }
1.40 daniel 4319: NEXT;
1.171 daniel 4320: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4321: (!ctxt->disableSAX))
1.135 daniel 4322: ctxt->sax->comment(ctxt->userData, buf);
4323: xmlFree(buf);
1.3 veillard 4324: }
1.140 daniel 4325: ctxt->instate = state;
1.3 veillard 4326: }
4327:
1.50 daniel 4328: /**
4329: * xmlParsePITarget:
4330: * @ctxt: an XML parser context
4331: *
4332: * parse the name of a PI
1.22 daniel 4333: *
4334: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4335: *
4336: * Returns the PITarget name or NULL
1.22 daniel 4337: */
4338:
1.123 daniel 4339: xmlChar *
1.55 daniel 4340: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4341: xmlChar *name;
1.22 daniel 4342:
4343: name = xmlParseName(ctxt);
1.139 daniel 4344: if ((name != NULL) &&
1.22 daniel 4345: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4346: ((name[1] == 'm') || (name[1] == 'M')) &&
4347: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4348: int i;
1.177 daniel 4349: if ((name[0] == 'x') && (name[1] == 'm') &&
4350: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4352: ctxt->sax->error(ctxt->userData,
4353: "XML declaration allowed only at the start of the document\n");
4354: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4355: ctxt->wellFormed = 0;
1.180 daniel 4356: ctxt->disableSAX = 1;
1.151 daniel 4357: return(name);
4358: } else if (name[3] == 0) {
4359: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4360: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4361: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4362: ctxt->wellFormed = 0;
1.180 daniel 4363: ctxt->disableSAX = 1;
1.151 daniel 4364: return(name);
4365: }
1.139 daniel 4366: for (i = 0;;i++) {
4367: if (xmlW3CPIs[i] == NULL) break;
4368: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4369: return(name);
4370: }
4371: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4372: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4373: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4374: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4375: }
1.22 daniel 4376: }
4377: return(name);
4378: }
4379:
1.50 daniel 4380: /**
4381: * xmlParsePI:
4382: * @ctxt: an XML parser context
4383: *
4384: * parse an XML Processing Instruction.
1.22 daniel 4385: *
4386: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4387: *
1.69 daniel 4388: * The processing is transfered to SAX once parsed.
1.3 veillard 4389: */
4390:
1.55 daniel 4391: void
4392: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4393: xmlChar *buf = NULL;
4394: int len = 0;
1.140 daniel 4395: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4396: int cur, l;
1.123 daniel 4397: xmlChar *target;
1.140 daniel 4398: xmlParserInputState state;
1.22 daniel 4399:
1.152 daniel 4400: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4401: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4402: state = ctxt->instate;
4403: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4404: /*
4405: * this is a Processing Instruction.
4406: */
1.40 daniel 4407: SKIP(2);
1.91 daniel 4408: SHRINK;
1.3 veillard 4409:
4410: /*
1.22 daniel 4411: * Parse the target name and check for special support like
4412: * namespace.
1.3 veillard 4413: */
1.22 daniel 4414: target = xmlParsePITarget(ctxt);
4415: if (target != NULL) {
1.156 daniel 4416: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4417: if (input != ctxt->input) {
4418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4419: ctxt->sax->error(ctxt->userData,
4420: "PI declaration doesn't start and stop in the same entity\n");
4421: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4422: ctxt->wellFormed = 0;
4423: ctxt->disableSAX = 1;
4424: }
1.156 daniel 4425: SKIP(2);
4426:
4427: /*
4428: * SAX: PI detected.
4429: */
1.171 daniel 4430: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4431: (ctxt->sax->processingInstruction != NULL))
4432: ctxt->sax->processingInstruction(ctxt->userData,
4433: target, NULL);
4434: ctxt->instate = state;
1.170 daniel 4435: xmlFree(target);
1.156 daniel 4436: return;
4437: }
1.135 daniel 4438: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4439: if (buf == NULL) {
4440: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4441: ctxt->instate = state;
1.135 daniel 4442: return;
4443: }
4444: cur = CUR;
4445: if (!IS_BLANK(cur)) {
1.114 daniel 4446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4447: ctxt->sax->error(ctxt->userData,
4448: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4449: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4450: ctxt->wellFormed = 0;
1.180 daniel 4451: ctxt->disableSAX = 1;
1.114 daniel 4452: }
4453: SKIP_BLANKS;
1.152 daniel 4454: cur = CUR_CHAR(l);
1.135 daniel 4455: while (IS_CHAR(cur) &&
4456: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4457: if (len + 5 >= size) {
1.135 daniel 4458: size *= 2;
4459: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4460: if (buf == NULL) {
4461: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4462: ctxt->instate = state;
1.135 daniel 4463: return;
4464: }
4465: }
1.152 daniel 4466: COPY_BUF(l,buf,len,cur);
4467: NEXTL(l);
4468: cur = CUR_CHAR(l);
1.135 daniel 4469: if (cur == 0) {
4470: SHRINK;
4471: GROW;
1.152 daniel 4472: cur = CUR_CHAR(l);
1.135 daniel 4473: }
4474: }
4475: buf[len] = 0;
1.152 daniel 4476: if (cur != '?') {
1.72 daniel 4477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4478: ctxt->sax->error(ctxt->userData,
1.72 daniel 4479: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4480: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4481: ctxt->wellFormed = 0;
1.180 daniel 4482: ctxt->disableSAX = 1;
1.22 daniel 4483: } else {
1.187 daniel 4484: if (input != ctxt->input) {
4485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4486: ctxt->sax->error(ctxt->userData,
4487: "PI declaration doesn't start and stop in the same entity\n");
4488: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4489: ctxt->wellFormed = 0;
4490: ctxt->disableSAX = 1;
4491: }
1.72 daniel 4492: SKIP(2);
1.44 daniel 4493:
1.72 daniel 4494: /*
4495: * SAX: PI detected.
4496: */
1.171 daniel 4497: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4498: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4499: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4500: target, buf);
1.22 daniel 4501: }
1.135 daniel 4502: xmlFree(buf);
1.119 daniel 4503: xmlFree(target);
1.3 veillard 4504: } else {
1.55 daniel 4505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4506: ctxt->sax->error(ctxt->userData,
4507: "xmlParsePI : no target name\n");
1.123 daniel 4508: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4509: ctxt->wellFormed = 0;
1.180 daniel 4510: ctxt->disableSAX = 1;
1.22 daniel 4511: }
1.140 daniel 4512: ctxt->instate = state;
1.22 daniel 4513: }
4514: }
4515:
1.50 daniel 4516: /**
4517: * xmlParseNotationDecl:
4518: * @ctxt: an XML parser context
4519: *
4520: * parse a notation declaration
1.22 daniel 4521: *
4522: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4523: *
4524: * Hence there is actually 3 choices:
4525: * 'PUBLIC' S PubidLiteral
4526: * 'PUBLIC' S PubidLiteral S SystemLiteral
4527: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4528: *
1.67 daniel 4529: * See the NOTE on xmlParseExternalID().
1.22 daniel 4530: */
4531:
1.55 daniel 4532: void
4533: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4534: xmlChar *name;
4535: xmlChar *Pubid;
4536: xmlChar *Systemid;
1.22 daniel 4537:
1.152 daniel 4538: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4539: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4540: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4541: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4542: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4543: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4544: SHRINK;
1.40 daniel 4545: SKIP(10);
1.67 daniel 4546: if (!IS_BLANK(CUR)) {
4547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4548: ctxt->sax->error(ctxt->userData,
4549: "Space required after '<!NOTATION'\n");
1.123 daniel 4550: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4551: ctxt->wellFormed = 0;
1.180 daniel 4552: ctxt->disableSAX = 1;
1.67 daniel 4553: return;
4554: }
4555: SKIP_BLANKS;
1.22 daniel 4556:
4557: name = xmlParseName(ctxt);
4558: if (name == NULL) {
1.55 daniel 4559: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4560: ctxt->sax->error(ctxt->userData,
4561: "NOTATION: Name expected here\n");
1.123 daniel 4562: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4563: ctxt->wellFormed = 0;
1.180 daniel 4564: ctxt->disableSAX = 1;
1.67 daniel 4565: return;
4566: }
4567: if (!IS_BLANK(CUR)) {
4568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4569: ctxt->sax->error(ctxt->userData,
1.67 daniel 4570: "Space required after the NOTATION name'\n");
1.123 daniel 4571: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4572: ctxt->wellFormed = 0;
1.180 daniel 4573: ctxt->disableSAX = 1;
1.22 daniel 4574: return;
4575: }
1.42 daniel 4576: SKIP_BLANKS;
1.67 daniel 4577:
1.22 daniel 4578: /*
1.67 daniel 4579: * Parse the IDs.
1.22 daniel 4580: */
1.160 daniel 4581: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4582: SKIP_BLANKS;
4583:
1.152 daniel 4584: if (RAW == '>') {
1.187 daniel 4585: if (input != ctxt->input) {
4586: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587: ctxt->sax->error(ctxt->userData,
4588: "Notation declaration doesn't start and stop in the same entity\n");
4589: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4590: ctxt->wellFormed = 0;
4591: ctxt->disableSAX = 1;
4592: }
1.40 daniel 4593: NEXT;
1.171 daniel 4594: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4595: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4596: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4597: } else {
4598: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4599: ctxt->sax->error(ctxt->userData,
1.67 daniel 4600: "'>' required to close NOTATION declaration\n");
1.123 daniel 4601: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4602: ctxt->wellFormed = 0;
1.180 daniel 4603: ctxt->disableSAX = 1;
1.67 daniel 4604: }
1.119 daniel 4605: xmlFree(name);
4606: if (Systemid != NULL) xmlFree(Systemid);
4607: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4608: }
4609: }
4610:
1.50 daniel 4611: /**
4612: * xmlParseEntityDecl:
4613: * @ctxt: an XML parser context
4614: *
4615: * parse <!ENTITY declarations
1.22 daniel 4616: *
4617: * [70] EntityDecl ::= GEDecl | PEDecl
4618: *
4619: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4620: *
4621: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4622: *
4623: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4624: *
4625: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4626: *
4627: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4628: *
4629: * [ VC: Notation Declared ]
1.116 daniel 4630: * The Name must match the declared name of a notation.
1.22 daniel 4631: */
4632:
1.55 daniel 4633: void
4634: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4635: xmlChar *name = NULL;
4636: xmlChar *value = NULL;
4637: xmlChar *URI = NULL, *literal = NULL;
4638: xmlChar *ndata = NULL;
1.39 daniel 4639: int isParameter = 0;
1.123 daniel 4640: xmlChar *orig = NULL;
1.22 daniel 4641:
1.94 daniel 4642: GROW;
1.152 daniel 4643: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4644: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4645: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4646: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4647: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4648: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4649: SHRINK;
1.40 daniel 4650: SKIP(8);
1.59 daniel 4651: if (!IS_BLANK(CUR)) {
4652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4653: ctxt->sax->error(ctxt->userData,
4654: "Space required after '<!ENTITY'\n");
1.123 daniel 4655: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4656: ctxt->wellFormed = 0;
1.180 daniel 4657: ctxt->disableSAX = 1;
1.59 daniel 4658: }
4659: SKIP_BLANKS;
1.40 daniel 4660:
1.152 daniel 4661: if (RAW == '%') {
1.40 daniel 4662: NEXT;
1.59 daniel 4663: if (!IS_BLANK(CUR)) {
4664: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4665: ctxt->sax->error(ctxt->userData,
4666: "Space required after '%'\n");
1.123 daniel 4667: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4668: ctxt->wellFormed = 0;
1.180 daniel 4669: ctxt->disableSAX = 1;
1.59 daniel 4670: }
1.42 daniel 4671: SKIP_BLANKS;
1.39 daniel 4672: isParameter = 1;
1.22 daniel 4673: }
4674:
4675: name = xmlParseName(ctxt);
1.24 daniel 4676: if (name == NULL) {
1.55 daniel 4677: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4678: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4679: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4680: ctxt->wellFormed = 0;
1.180 daniel 4681: ctxt->disableSAX = 1;
1.24 daniel 4682: return;
4683: }
1.59 daniel 4684: if (!IS_BLANK(CUR)) {
4685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4686: ctxt->sax->error(ctxt->userData,
1.59 daniel 4687: "Space required after the entity name\n");
1.123 daniel 4688: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4689: ctxt->wellFormed = 0;
1.180 daniel 4690: ctxt->disableSAX = 1;
1.59 daniel 4691: }
1.42 daniel 4692: SKIP_BLANKS;
1.24 daniel 4693:
1.22 daniel 4694: /*
1.68 daniel 4695: * handle the various case of definitions...
1.22 daniel 4696: */
1.39 daniel 4697: if (isParameter) {
1.152 daniel 4698: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4699: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4700: if (value) {
1.171 daniel 4701: if ((ctxt->sax != NULL) &&
4702: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4703: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4704: XML_INTERNAL_PARAMETER_ENTITY,
4705: NULL, NULL, value);
4706: }
1.24 daniel 4707: else {
1.67 daniel 4708: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4709: if ((URI == NULL) && (literal == NULL)) {
4710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4711: ctxt->sax->error(ctxt->userData,
4712: "Entity value required\n");
4713: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4714: ctxt->wellFormed = 0;
1.180 daniel 4715: ctxt->disableSAX = 1;
1.169 daniel 4716: }
1.39 daniel 4717: if (URI) {
1.193 daniel 4718: xmlURIPtr uri;
4719:
4720: uri = xmlParseURI((const char *) URI);
4721: if (uri == NULL) {
4722: if ((ctxt->sax != NULL) &&
4723: (!ctxt->disableSAX) &&
4724: (ctxt->sax->error != NULL))
4725: ctxt->sax->error(ctxt->userData,
4726: "Invalid URI: %s\n", URI);
4727: ctxt->wellFormed = 0;
4728: ctxt->errNo = XML_ERR_INVALID_URI;
4729: } else {
4730: if (uri->fragment != NULL) {
4731: if ((ctxt->sax != NULL) &&
4732: (!ctxt->disableSAX) &&
4733: (ctxt->sax->error != NULL))
4734: ctxt->sax->error(ctxt->userData,
4735: "Fragment not allowed: %s\n", URI);
4736: ctxt->wellFormed = 0;
4737: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4738: } else {
4739: if ((ctxt->sax != NULL) &&
4740: (!ctxt->disableSAX) &&
4741: (ctxt->sax->entityDecl != NULL))
4742: ctxt->sax->entityDecl(ctxt->userData, name,
4743: XML_EXTERNAL_PARAMETER_ENTITY,
4744: literal, URI, NULL);
4745: }
4746: xmlFreeURI(uri);
4747: }
1.39 daniel 4748: }
1.24 daniel 4749: }
4750: } else {
1.152 daniel 4751: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4752: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4753: if ((ctxt->sax != NULL) &&
4754: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4755: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4756: XML_INTERNAL_GENERAL_ENTITY,
4757: NULL, NULL, value);
4758: } else {
1.67 daniel 4759: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4760: if ((URI == NULL) && (literal == NULL)) {
4761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4762: ctxt->sax->error(ctxt->userData,
4763: "Entity value required\n");
4764: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4765: ctxt->wellFormed = 0;
1.180 daniel 4766: ctxt->disableSAX = 1;
1.169 daniel 4767: }
1.193 daniel 4768: if (URI) {
4769: xmlURIPtr uri;
4770:
4771: uri = xmlParseURI((const char *)URI);
4772: if (uri == NULL) {
4773: if ((ctxt->sax != NULL) &&
4774: (!ctxt->disableSAX) &&
4775: (ctxt->sax->error != NULL))
4776: ctxt->sax->error(ctxt->userData,
4777: "Invalid URI: %s\n", URI);
4778: ctxt->wellFormed = 0;
4779: ctxt->errNo = XML_ERR_INVALID_URI;
4780: } else {
4781: if (uri->fragment != NULL) {
4782: if ((ctxt->sax != NULL) &&
4783: (!ctxt->disableSAX) &&
4784: (ctxt->sax->error != NULL))
4785: ctxt->sax->error(ctxt->userData,
4786: "Fragment not allowed: %s\n", URI);
4787: ctxt->wellFormed = 0;
4788: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4789: }
4790: xmlFreeURI(uri);
4791: }
4792: }
1.152 daniel 4793: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4795: ctxt->sax->error(ctxt->userData,
1.59 daniel 4796: "Space required before 'NDATA'\n");
1.123 daniel 4797: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4798: ctxt->wellFormed = 0;
1.180 daniel 4799: ctxt->disableSAX = 1;
1.59 daniel 4800: }
1.42 daniel 4801: SKIP_BLANKS;
1.152 daniel 4802: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4803: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4804: (NXT(4) == 'A')) {
4805: SKIP(5);
1.59 daniel 4806: if (!IS_BLANK(CUR)) {
4807: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4808: ctxt->sax->error(ctxt->userData,
1.59 daniel 4809: "Space required after 'NDATA'\n");
1.123 daniel 4810: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4811: ctxt->wellFormed = 0;
1.180 daniel 4812: ctxt->disableSAX = 1;
1.59 daniel 4813: }
1.42 daniel 4814: SKIP_BLANKS;
1.24 daniel 4815: ndata = xmlParseName(ctxt);
1.171 daniel 4816: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4817: (ctxt->sax->unparsedEntityDecl != NULL))
4818: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4819: literal, URI, ndata);
4820: } else {
1.171 daniel 4821: if ((ctxt->sax != NULL) &&
4822: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4823: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4824: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4825: literal, URI, NULL);
1.24 daniel 4826: }
4827: }
4828: }
1.42 daniel 4829: SKIP_BLANKS;
1.152 daniel 4830: if (RAW != '>') {
1.55 daniel 4831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4832: ctxt->sax->error(ctxt->userData,
1.31 daniel 4833: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4834: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4835: ctxt->wellFormed = 0;
1.180 daniel 4836: ctxt->disableSAX = 1;
1.187 daniel 4837: } else {
4838: if (input != ctxt->input) {
4839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4840: ctxt->sax->error(ctxt->userData,
4841: "Entity declaration doesn't start and stop in the same entity\n");
4842: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4843: ctxt->wellFormed = 0;
4844: ctxt->disableSAX = 1;
4845: }
1.40 daniel 4846: NEXT;
1.187 daniel 4847: }
1.78 daniel 4848: if (orig != NULL) {
4849: /*
1.98 daniel 4850: * Ugly mechanism to save the raw entity value.
1.78 daniel 4851: */
4852: xmlEntityPtr cur = NULL;
4853:
1.98 daniel 4854: if (isParameter) {
4855: if ((ctxt->sax != NULL) &&
4856: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4857: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4858: } else {
4859: if ((ctxt->sax != NULL) &&
4860: (ctxt->sax->getEntity != NULL))
1.120 daniel 4861: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4862: }
4863: if (cur != NULL) {
4864: if (cur->orig != NULL)
1.119 daniel 4865: xmlFree(orig);
1.98 daniel 4866: else
4867: cur->orig = orig;
4868: } else
1.119 daniel 4869: xmlFree(orig);
1.78 daniel 4870: }
1.119 daniel 4871: if (name != NULL) xmlFree(name);
4872: if (value != NULL) xmlFree(value);
4873: if (URI != NULL) xmlFree(URI);
4874: if (literal != NULL) xmlFree(literal);
4875: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4876: }
4877: }
4878:
1.50 daniel 4879: /**
1.59 daniel 4880: * xmlParseDefaultDecl:
4881: * @ctxt: an XML parser context
4882: * @value: Receive a possible fixed default value for the attribute
4883: *
4884: * Parse an attribute default declaration
4885: *
4886: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4887: *
1.99 daniel 4888: * [ VC: Required Attribute ]
1.117 daniel 4889: * if the default declaration is the keyword #REQUIRED, then the
4890: * attribute must be specified for all elements of the type in the
4891: * attribute-list declaration.
1.99 daniel 4892: *
4893: * [ VC: Attribute Default Legal ]
1.102 daniel 4894: * The declared default value must meet the lexical constraints of
4895: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4896: *
4897: * [ VC: Fixed Attribute Default ]
1.117 daniel 4898: * if an attribute has a default value declared with the #FIXED
4899: * keyword, instances of that attribute must match the default value.
1.99 daniel 4900: *
4901: * [ WFC: No < in Attribute Values ]
4902: * handled in xmlParseAttValue()
4903: *
1.59 daniel 4904: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4905: * or XML_ATTRIBUTE_FIXED.
4906: */
4907:
4908: int
1.123 daniel 4909: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4910: int val;
1.123 daniel 4911: xmlChar *ret;
1.59 daniel 4912:
4913: *value = NULL;
1.152 daniel 4914: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4915: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4916: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4917: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4918: (NXT(8) == 'D')) {
4919: SKIP(9);
4920: return(XML_ATTRIBUTE_REQUIRED);
4921: }
1.152 daniel 4922: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4923: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4924: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4925: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4926: SKIP(8);
4927: return(XML_ATTRIBUTE_IMPLIED);
4928: }
4929: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4930: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4931: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4932: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4933: SKIP(6);
4934: val = XML_ATTRIBUTE_FIXED;
4935: if (!IS_BLANK(CUR)) {
4936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4937: ctxt->sax->error(ctxt->userData,
4938: "Space required after '#FIXED'\n");
1.123 daniel 4939: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4940: ctxt->wellFormed = 0;
1.180 daniel 4941: ctxt->disableSAX = 1;
1.59 daniel 4942: }
4943: SKIP_BLANKS;
4944: }
4945: ret = xmlParseAttValue(ctxt);
1.96 daniel 4946: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4947: if (ret == NULL) {
4948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4949: ctxt->sax->error(ctxt->userData,
1.59 daniel 4950: "Attribute default value declaration error\n");
4951: ctxt->wellFormed = 0;
1.180 daniel 4952: ctxt->disableSAX = 1;
1.59 daniel 4953: } else
4954: *value = ret;
4955: return(val);
4956: }
4957:
4958: /**
1.66 daniel 4959: * xmlParseNotationType:
4960: * @ctxt: an XML parser context
4961: *
4962: * parse an Notation attribute type.
4963: *
1.99 daniel 4964: * Note: the leading 'NOTATION' S part has already being parsed...
4965: *
1.66 daniel 4966: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4967: *
1.99 daniel 4968: * [ VC: Notation Attributes ]
1.117 daniel 4969: * Values of this type must match one of the notation names included
1.99 daniel 4970: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 4971: *
4972: * Returns: the notation attribute tree built while parsing
4973: */
4974:
4975: xmlEnumerationPtr
4976: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 4977: xmlChar *name;
1.66 daniel 4978: xmlEnumerationPtr ret = NULL, last = NULL, cur;
4979:
1.152 daniel 4980: if (RAW != '(') {
1.66 daniel 4981: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4982: ctxt->sax->error(ctxt->userData,
4983: "'(' required to start 'NOTATION'\n");
1.123 daniel 4984: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 4985: ctxt->wellFormed = 0;
1.180 daniel 4986: ctxt->disableSAX = 1;
1.66 daniel 4987: return(NULL);
4988: }
1.91 daniel 4989: SHRINK;
1.66 daniel 4990: do {
4991: NEXT;
4992: SKIP_BLANKS;
4993: name = xmlParseName(ctxt);
4994: if (name == NULL) {
4995: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4996: ctxt->sax->error(ctxt->userData,
1.66 daniel 4997: "Name expected in NOTATION declaration\n");
1.123 daniel 4998: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 4999: ctxt->wellFormed = 0;
1.180 daniel 5000: ctxt->disableSAX = 1;
1.66 daniel 5001: return(ret);
5002: }
5003: cur = xmlCreateEnumeration(name);
1.119 daniel 5004: xmlFree(name);
1.66 daniel 5005: if (cur == NULL) return(ret);
5006: if (last == NULL) ret = last = cur;
5007: else {
5008: last->next = cur;
5009: last = cur;
5010: }
5011: SKIP_BLANKS;
1.152 daniel 5012: } while (RAW == '|');
5013: if (RAW != ')') {
1.66 daniel 5014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5015: ctxt->sax->error(ctxt->userData,
1.66 daniel 5016: "')' required to finish NOTATION declaration\n");
1.123 daniel 5017: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5018: ctxt->wellFormed = 0;
1.180 daniel 5019: ctxt->disableSAX = 1;
1.170 daniel 5020: if ((last != NULL) && (last != ret))
5021: xmlFreeEnumeration(last);
1.66 daniel 5022: return(ret);
5023: }
5024: NEXT;
5025: return(ret);
5026: }
5027:
5028: /**
5029: * xmlParseEnumerationType:
5030: * @ctxt: an XML parser context
5031: *
5032: * parse an Enumeration attribute type.
5033: *
5034: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5035: *
1.99 daniel 5036: * [ VC: Enumeration ]
1.117 daniel 5037: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5038: * the declaration
5039: *
1.66 daniel 5040: * Returns: the enumeration attribute tree built while parsing
5041: */
5042:
5043: xmlEnumerationPtr
5044: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5045: xmlChar *name;
1.66 daniel 5046: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5047:
1.152 daniel 5048: if (RAW != '(') {
1.66 daniel 5049: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5050: ctxt->sax->error(ctxt->userData,
1.66 daniel 5051: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5052: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5053: ctxt->wellFormed = 0;
1.180 daniel 5054: ctxt->disableSAX = 1;
1.66 daniel 5055: return(NULL);
5056: }
1.91 daniel 5057: SHRINK;
1.66 daniel 5058: do {
5059: NEXT;
5060: SKIP_BLANKS;
5061: name = xmlParseNmtoken(ctxt);
5062: if (name == NULL) {
5063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5064: ctxt->sax->error(ctxt->userData,
1.66 daniel 5065: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5066: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5067: ctxt->wellFormed = 0;
1.180 daniel 5068: ctxt->disableSAX = 1;
1.66 daniel 5069: return(ret);
5070: }
5071: cur = xmlCreateEnumeration(name);
1.119 daniel 5072: xmlFree(name);
1.66 daniel 5073: if (cur == NULL) return(ret);
5074: if (last == NULL) ret = last = cur;
5075: else {
5076: last->next = cur;
5077: last = cur;
5078: }
5079: SKIP_BLANKS;
1.152 daniel 5080: } while (RAW == '|');
5081: if (RAW != ')') {
1.66 daniel 5082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5083: ctxt->sax->error(ctxt->userData,
1.66 daniel 5084: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5085: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5086: ctxt->wellFormed = 0;
1.180 daniel 5087: ctxt->disableSAX = 1;
1.66 daniel 5088: return(ret);
5089: }
5090: NEXT;
5091: return(ret);
5092: }
5093:
5094: /**
1.50 daniel 5095: * xmlParseEnumeratedType:
5096: * @ctxt: an XML parser context
1.66 daniel 5097: * @tree: the enumeration tree built while parsing
1.50 daniel 5098: *
1.66 daniel 5099: * parse an Enumerated attribute type.
1.22 daniel 5100: *
5101: * [57] EnumeratedType ::= NotationType | Enumeration
5102: *
5103: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5104: *
1.50 daniel 5105: *
1.66 daniel 5106: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5107: */
5108:
1.66 daniel 5109: int
5110: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5111: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5112: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5113: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5114: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5115: SKIP(8);
5116: if (!IS_BLANK(CUR)) {
5117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5118: ctxt->sax->error(ctxt->userData,
5119: "Space required after 'NOTATION'\n");
1.123 daniel 5120: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5121: ctxt->wellFormed = 0;
1.180 daniel 5122: ctxt->disableSAX = 1;
1.66 daniel 5123: return(0);
5124: }
5125: SKIP_BLANKS;
5126: *tree = xmlParseNotationType(ctxt);
5127: if (*tree == NULL) return(0);
5128: return(XML_ATTRIBUTE_NOTATION);
5129: }
5130: *tree = xmlParseEnumerationType(ctxt);
5131: if (*tree == NULL) return(0);
5132: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5133: }
5134:
1.50 daniel 5135: /**
5136: * xmlParseAttributeType:
5137: * @ctxt: an XML parser context
1.66 daniel 5138: * @tree: the enumeration tree built while parsing
1.50 daniel 5139: *
1.59 daniel 5140: * parse the Attribute list def for an element
1.22 daniel 5141: *
5142: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5143: *
5144: * [55] StringType ::= 'CDATA'
5145: *
5146: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5147: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5148: *
1.102 daniel 5149: * Validity constraints for attribute values syntax are checked in
5150: * xmlValidateAttributeValue()
5151: *
1.99 daniel 5152: * [ VC: ID ]
1.117 daniel 5153: * Values of type ID must match the Name production. A name must not
1.99 daniel 5154: * appear more than once in an XML document as a value of this type;
5155: * i.e., ID values must uniquely identify the elements which bear them.
5156: *
5157: * [ VC: One ID per Element Type ]
1.117 daniel 5158: * No element type may have more than one ID attribute specified.
1.99 daniel 5159: *
5160: * [ VC: ID Attribute Default ]
1.117 daniel 5161: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5162: *
5163: * [ VC: IDREF ]
1.102 daniel 5164: * Values of type IDREF must match the Name production, and values
1.140 daniel 5165: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5166: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5167: * values must match the value of some ID attribute.
5168: *
5169: * [ VC: Entity Name ]
1.102 daniel 5170: * Values of type ENTITY must match the Name production, values
1.140 daniel 5171: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5172: * name of an unparsed entity declared in the DTD.
1.99 daniel 5173: *
5174: * [ VC: Name Token ]
1.102 daniel 5175: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5176: * of type NMTOKENS must match Nmtokens.
5177: *
1.69 daniel 5178: * Returns the attribute type
1.22 daniel 5179: */
1.59 daniel 5180: int
1.66 daniel 5181: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5182: SHRINK;
1.152 daniel 5183: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5184: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5185: (NXT(4) == 'A')) {
5186: SKIP(5);
1.66 daniel 5187: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5188: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5189: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5190: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5191: SKIP(6);
5192: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5193: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5194: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5195: (NXT(4) == 'F')) {
5196: SKIP(5);
1.59 daniel 5197: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5198: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5199: SKIP(2);
5200: return(XML_ATTRIBUTE_ID);
1.152 daniel 5201: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5202: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5203: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5204: SKIP(6);
1.59 daniel 5205: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5206: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5207: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5208: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5209: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5210: SKIP(8);
1.59 daniel 5211: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5212: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5213: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5214: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5215: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5216: SKIP(8);
5217: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5218: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5219: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5220: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5221: (NXT(6) == 'N')) {
5222: SKIP(7);
1.59 daniel 5223: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5224: }
1.66 daniel 5225: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5226: }
5227:
1.50 daniel 5228: /**
5229: * xmlParseAttributeListDecl:
5230: * @ctxt: an XML parser context
5231: *
5232: * : parse the Attribute list def for an element
1.22 daniel 5233: *
5234: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5235: *
5236: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5237: *
1.22 daniel 5238: */
1.55 daniel 5239: void
5240: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5241: xmlChar *elemName;
5242: xmlChar *attrName;
1.103 daniel 5243: xmlEnumerationPtr tree;
1.22 daniel 5244:
1.152 daniel 5245: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5246: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5247: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5248: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5249: (NXT(8) == 'T')) {
1.187 daniel 5250: xmlParserInputPtr input = ctxt->input;
5251:
1.40 daniel 5252: SKIP(9);
1.59 daniel 5253: if (!IS_BLANK(CUR)) {
5254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5255: ctxt->sax->error(ctxt->userData,
5256: "Space required after '<!ATTLIST'\n");
1.123 daniel 5257: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5258: ctxt->wellFormed = 0;
1.180 daniel 5259: ctxt->disableSAX = 1;
1.59 daniel 5260: }
1.42 daniel 5261: SKIP_BLANKS;
1.59 daniel 5262: elemName = xmlParseName(ctxt);
5263: if (elemName == NULL) {
1.55 daniel 5264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5265: ctxt->sax->error(ctxt->userData,
5266: "ATTLIST: no name for Element\n");
1.123 daniel 5267: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5268: ctxt->wellFormed = 0;
1.180 daniel 5269: ctxt->disableSAX = 1;
1.22 daniel 5270: return;
5271: }
1.42 daniel 5272: SKIP_BLANKS;
1.152 daniel 5273: while (RAW != '>') {
1.123 daniel 5274: const xmlChar *check = CUR_PTR;
1.59 daniel 5275: int type;
5276: int def;
1.123 daniel 5277: xmlChar *defaultValue = NULL;
1.59 daniel 5278:
1.103 daniel 5279: tree = NULL;
1.59 daniel 5280: attrName = xmlParseName(ctxt);
5281: if (attrName == NULL) {
5282: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5283: ctxt->sax->error(ctxt->userData,
5284: "ATTLIST: no name for Attribute\n");
1.123 daniel 5285: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5286: ctxt->wellFormed = 0;
1.180 daniel 5287: ctxt->disableSAX = 1;
1.59 daniel 5288: break;
5289: }
1.97 daniel 5290: GROW;
1.59 daniel 5291: if (!IS_BLANK(CUR)) {
5292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5293: ctxt->sax->error(ctxt->userData,
1.59 daniel 5294: "Space required after the attribute name\n");
1.123 daniel 5295: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5296: ctxt->wellFormed = 0;
1.180 daniel 5297: ctxt->disableSAX = 1;
1.170 daniel 5298: if (attrName != NULL)
5299: xmlFree(attrName);
5300: if (defaultValue != NULL)
5301: xmlFree(defaultValue);
1.59 daniel 5302: break;
5303: }
5304: SKIP_BLANKS;
5305:
1.66 daniel 5306: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5307: if (type <= 0) {
5308: if (attrName != NULL)
5309: xmlFree(attrName);
5310: if (defaultValue != NULL)
5311: xmlFree(defaultValue);
5312: break;
5313: }
1.22 daniel 5314:
1.97 daniel 5315: GROW;
1.59 daniel 5316: if (!IS_BLANK(CUR)) {
5317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5318: ctxt->sax->error(ctxt->userData,
1.59 daniel 5319: "Space required after the attribute type\n");
1.123 daniel 5320: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5321: ctxt->wellFormed = 0;
1.180 daniel 5322: ctxt->disableSAX = 1;
1.170 daniel 5323: if (attrName != NULL)
5324: xmlFree(attrName);
5325: if (defaultValue != NULL)
5326: xmlFree(defaultValue);
5327: if (tree != NULL)
5328: xmlFreeEnumeration(tree);
1.59 daniel 5329: break;
5330: }
1.42 daniel 5331: SKIP_BLANKS;
1.59 daniel 5332:
5333: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5334: if (def <= 0) {
5335: if (attrName != NULL)
5336: xmlFree(attrName);
5337: if (defaultValue != NULL)
5338: xmlFree(defaultValue);
5339: if (tree != NULL)
5340: xmlFreeEnumeration(tree);
5341: break;
5342: }
1.59 daniel 5343:
1.97 daniel 5344: GROW;
1.152 daniel 5345: if (RAW != '>') {
1.59 daniel 5346: if (!IS_BLANK(CUR)) {
5347: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5348: ctxt->sax->error(ctxt->userData,
1.59 daniel 5349: "Space required after the attribute default value\n");
1.123 daniel 5350: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5351: ctxt->wellFormed = 0;
1.180 daniel 5352: ctxt->disableSAX = 1;
1.170 daniel 5353: if (attrName != NULL)
5354: xmlFree(attrName);
5355: if (defaultValue != NULL)
5356: xmlFree(defaultValue);
5357: if (tree != NULL)
5358: xmlFreeEnumeration(tree);
1.59 daniel 5359: break;
5360: }
5361: SKIP_BLANKS;
5362: }
1.40 daniel 5363: if (check == CUR_PTR) {
1.55 daniel 5364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5365: ctxt->sax->error(ctxt->userData,
1.59 daniel 5366: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5367: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5368: if (attrName != NULL)
5369: xmlFree(attrName);
5370: if (defaultValue != NULL)
5371: xmlFree(defaultValue);
5372: if (tree != NULL)
5373: xmlFreeEnumeration(tree);
1.22 daniel 5374: break;
5375: }
1.171 daniel 5376: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5377: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5378: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5379: type, def, defaultValue, tree);
1.59 daniel 5380: if (attrName != NULL)
1.119 daniel 5381: xmlFree(attrName);
1.59 daniel 5382: if (defaultValue != NULL)
1.119 daniel 5383: xmlFree(defaultValue);
1.97 daniel 5384: GROW;
1.22 daniel 5385: }
1.187 daniel 5386: if (RAW == '>') {
5387: if (input != ctxt->input) {
5388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5389: ctxt->sax->error(ctxt->userData,
5390: "Attribute list declaration doesn't start and stop in the same entity\n");
5391: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5392: ctxt->wellFormed = 0;
5393: ctxt->disableSAX = 1;
5394: }
1.40 daniel 5395: NEXT;
1.187 daniel 5396: }
1.22 daniel 5397:
1.119 daniel 5398: xmlFree(elemName);
1.22 daniel 5399: }
5400: }
5401:
1.50 daniel 5402: /**
1.61 daniel 5403: * xmlParseElementMixedContentDecl:
5404: * @ctxt: an XML parser context
5405: *
5406: * parse the declaration for a Mixed Element content
5407: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5408: *
5409: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5410: * '(' S? '#PCDATA' S? ')'
5411: *
1.99 daniel 5412: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5413: *
5414: * [ VC: No Duplicate Types ]
1.117 daniel 5415: * The same name must not appear more than once in a single
5416: * mixed-content declaration.
1.99 daniel 5417: *
1.61 daniel 5418: * returns: the list of the xmlElementContentPtr describing the element choices
5419: */
5420: xmlElementContentPtr
1.62 daniel 5421: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5422: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5423: xmlChar *elem = NULL;
1.61 daniel 5424:
1.97 daniel 5425: GROW;
1.152 daniel 5426: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5427: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5428: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5429: (NXT(6) == 'A')) {
5430: SKIP(7);
5431: SKIP_BLANKS;
1.91 daniel 5432: SHRINK;
1.152 daniel 5433: if (RAW == ')') {
1.187 daniel 5434: ctxt->entity = ctxt->input;
1.63 daniel 5435: NEXT;
5436: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5437: if (RAW == '*') {
1.136 daniel 5438: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5439: NEXT;
5440: }
1.63 daniel 5441: return(ret);
5442: }
1.152 daniel 5443: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5444: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5445: if (ret == NULL) return(NULL);
1.99 daniel 5446: }
1.152 daniel 5447: while (RAW == '|') {
1.64 daniel 5448: NEXT;
1.61 daniel 5449: if (elem == NULL) {
5450: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5451: if (ret == NULL) return(NULL);
5452: ret->c1 = cur;
1.64 daniel 5453: cur = ret;
1.61 daniel 5454: } else {
1.64 daniel 5455: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5456: if (n == NULL) return(NULL);
5457: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5458: cur->c2 = n;
5459: cur = n;
1.119 daniel 5460: xmlFree(elem);
1.61 daniel 5461: }
5462: SKIP_BLANKS;
5463: elem = xmlParseName(ctxt);
5464: if (elem == NULL) {
5465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5466: ctxt->sax->error(ctxt->userData,
1.61 daniel 5467: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5468: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5469: ctxt->wellFormed = 0;
1.180 daniel 5470: ctxt->disableSAX = 1;
1.61 daniel 5471: xmlFreeElementContent(cur);
5472: return(NULL);
5473: }
5474: SKIP_BLANKS;
1.97 daniel 5475: GROW;
1.61 daniel 5476: }
1.152 daniel 5477: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5478: if (elem != NULL) {
1.61 daniel 5479: cur->c2 = xmlNewElementContent(elem,
5480: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5481: xmlFree(elem);
1.66 daniel 5482: }
1.65 daniel 5483: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5484: ctxt->entity = ctxt->input;
1.64 daniel 5485: SKIP(2);
1.61 daniel 5486: } else {
1.119 daniel 5487: if (elem != NULL) xmlFree(elem);
1.61 daniel 5488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5489: ctxt->sax->error(ctxt->userData,
1.63 daniel 5490: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5491: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5492: ctxt->wellFormed = 0;
1.180 daniel 5493: ctxt->disableSAX = 1;
1.61 daniel 5494: xmlFreeElementContent(ret);
5495: return(NULL);
5496: }
5497:
5498: } else {
5499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5500: ctxt->sax->error(ctxt->userData,
1.61 daniel 5501: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5502: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5503: ctxt->wellFormed = 0;
1.180 daniel 5504: ctxt->disableSAX = 1;
1.61 daniel 5505: }
5506: return(ret);
5507: }
5508:
5509: /**
5510: * xmlParseElementChildrenContentDecl:
1.50 daniel 5511: * @ctxt: an XML parser context
5512: *
1.61 daniel 5513: * parse the declaration for a Mixed Element content
5514: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5515: *
1.61 daniel 5516: *
1.22 daniel 5517: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5518: *
5519: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5520: *
5521: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5522: *
5523: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5524: *
1.99 daniel 5525: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5526: * TODO Parameter-entity replacement text must be properly nested
5527: * with parenthetized groups. That is to say, if either of the
5528: * opening or closing parentheses in a choice, seq, or Mixed
5529: * construct is contained in the replacement text for a parameter
5530: * entity, both must be contained in the same replacement text. For
5531: * interoperability, if a parameter-entity reference appears in a
5532: * choice, seq, or Mixed construct, its replacement text should not
5533: * be empty, and neither the first nor last non-blank character of
5534: * the replacement text should be a connector (| or ,).
5535: *
1.62 daniel 5536: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5537: * hierarchy.
5538: */
5539: xmlElementContentPtr
1.62 daniel 5540: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5541: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5542: xmlChar *elem;
5543: xmlChar type = 0;
1.62 daniel 5544:
5545: SKIP_BLANKS;
1.94 daniel 5546: GROW;
1.152 daniel 5547: if (RAW == '(') {
1.63 daniel 5548: /* Recurse on first child */
1.62 daniel 5549: NEXT;
5550: SKIP_BLANKS;
5551: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5552: SKIP_BLANKS;
1.101 daniel 5553: GROW;
1.62 daniel 5554: } else {
5555: elem = xmlParseName(ctxt);
5556: if (elem == NULL) {
5557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5558: ctxt->sax->error(ctxt->userData,
1.62 daniel 5559: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5560: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5561: ctxt->wellFormed = 0;
1.180 daniel 5562: ctxt->disableSAX = 1;
1.62 daniel 5563: return(NULL);
5564: }
5565: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5566: GROW;
1.152 daniel 5567: if (RAW == '?') {
1.104 daniel 5568: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5569: NEXT;
1.152 daniel 5570: } else if (RAW == '*') {
1.104 daniel 5571: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5572: NEXT;
1.152 daniel 5573: } else if (RAW == '+') {
1.104 daniel 5574: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5575: NEXT;
5576: } else {
1.104 daniel 5577: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5578: }
1.119 daniel 5579: xmlFree(elem);
1.101 daniel 5580: GROW;
1.62 daniel 5581: }
5582: SKIP_BLANKS;
1.91 daniel 5583: SHRINK;
1.152 daniel 5584: while (RAW != ')') {
1.63 daniel 5585: /*
5586: * Each loop we parse one separator and one element.
5587: */
1.152 daniel 5588: if (RAW == ',') {
1.62 daniel 5589: if (type == 0) type = CUR;
5590:
5591: /*
5592: * Detect "Name | Name , Name" error
5593: */
5594: else if (type != CUR) {
5595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5596: ctxt->sax->error(ctxt->userData,
1.62 daniel 5597: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5598: type);
1.123 daniel 5599: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5600: ctxt->wellFormed = 0;
1.180 daniel 5601: ctxt->disableSAX = 1;
1.170 daniel 5602: if ((op != NULL) && (op != ret))
5603: xmlFreeElementContent(op);
5604: if ((last != NULL) && (last != ret))
5605: xmlFreeElementContent(last);
5606: if (ret != NULL)
5607: xmlFreeElementContent(ret);
1.62 daniel 5608: return(NULL);
5609: }
1.64 daniel 5610: NEXT;
1.62 daniel 5611:
1.63 daniel 5612: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5613: if (op == NULL) {
5614: xmlFreeElementContent(ret);
5615: return(NULL);
5616: }
5617: if (last == NULL) {
5618: op->c1 = ret;
1.65 daniel 5619: ret = cur = op;
1.63 daniel 5620: } else {
5621: cur->c2 = op;
5622: op->c1 = last;
5623: cur =op;
1.65 daniel 5624: last = NULL;
1.63 daniel 5625: }
1.152 daniel 5626: } else if (RAW == '|') {
1.62 daniel 5627: if (type == 0) type = CUR;
5628:
5629: /*
1.63 daniel 5630: * Detect "Name , Name | Name" error
1.62 daniel 5631: */
5632: else if (type != CUR) {
5633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5634: ctxt->sax->error(ctxt->userData,
1.62 daniel 5635: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5636: type);
1.123 daniel 5637: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5638: ctxt->wellFormed = 0;
1.180 daniel 5639: ctxt->disableSAX = 1;
1.170 daniel 5640: if ((op != NULL) && (op != ret))
5641: xmlFreeElementContent(op);
5642: if ((last != NULL) && (last != ret))
5643: xmlFreeElementContent(last);
5644: if (ret != NULL)
5645: xmlFreeElementContent(ret);
1.62 daniel 5646: return(NULL);
5647: }
1.64 daniel 5648: NEXT;
1.62 daniel 5649:
1.63 daniel 5650: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5651: if (op == NULL) {
1.170 daniel 5652: if ((op != NULL) && (op != ret))
5653: xmlFreeElementContent(op);
5654: if ((last != NULL) && (last != ret))
5655: xmlFreeElementContent(last);
5656: if (ret != NULL)
5657: xmlFreeElementContent(ret);
1.63 daniel 5658: return(NULL);
5659: }
5660: if (last == NULL) {
5661: op->c1 = ret;
1.65 daniel 5662: ret = cur = op;
1.63 daniel 5663: } else {
5664: cur->c2 = op;
5665: op->c1 = last;
5666: cur =op;
1.65 daniel 5667: last = NULL;
1.63 daniel 5668: }
1.62 daniel 5669: } else {
5670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5671: ctxt->sax->error(ctxt->userData,
1.62 daniel 5672: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5673: ctxt->wellFormed = 0;
1.180 daniel 5674: ctxt->disableSAX = 1;
1.123 daniel 5675: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5676: if ((op != NULL) && (op != ret))
5677: xmlFreeElementContent(op);
5678: if ((last != NULL) && (last != ret))
5679: xmlFreeElementContent(last);
5680: if (ret != NULL)
5681: xmlFreeElementContent(ret);
1.62 daniel 5682: return(NULL);
5683: }
1.101 daniel 5684: GROW;
1.62 daniel 5685: SKIP_BLANKS;
1.101 daniel 5686: GROW;
1.152 daniel 5687: if (RAW == '(') {
1.63 daniel 5688: /* Recurse on second child */
1.62 daniel 5689: NEXT;
5690: SKIP_BLANKS;
1.65 daniel 5691: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5692: SKIP_BLANKS;
5693: } else {
5694: elem = xmlParseName(ctxt);
5695: if (elem == NULL) {
5696: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5697: ctxt->sax->error(ctxt->userData,
1.122 daniel 5698: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5699: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5700: ctxt->wellFormed = 0;
1.180 daniel 5701: ctxt->disableSAX = 1;
1.170 daniel 5702: if ((op != NULL) && (op != ret))
5703: xmlFreeElementContent(op);
5704: if ((last != NULL) && (last != ret))
5705: xmlFreeElementContent(last);
5706: if (ret != NULL)
5707: xmlFreeElementContent(ret);
1.62 daniel 5708: return(NULL);
5709: }
1.65 daniel 5710: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5711: xmlFree(elem);
1.152 daniel 5712: if (RAW == '?') {
1.105 daniel 5713: last->ocur = XML_ELEMENT_CONTENT_OPT;
5714: NEXT;
1.152 daniel 5715: } else if (RAW == '*') {
1.105 daniel 5716: last->ocur = XML_ELEMENT_CONTENT_MULT;
5717: NEXT;
1.152 daniel 5718: } else if (RAW == '+') {
1.105 daniel 5719: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5720: NEXT;
5721: } else {
5722: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5723: }
1.63 daniel 5724: }
5725: SKIP_BLANKS;
1.97 daniel 5726: GROW;
1.64 daniel 5727: }
1.65 daniel 5728: if ((cur != NULL) && (last != NULL)) {
5729: cur->c2 = last;
1.62 daniel 5730: }
1.187 daniel 5731: ctxt->entity = ctxt->input;
1.62 daniel 5732: NEXT;
1.152 daniel 5733: if (RAW == '?') {
1.62 daniel 5734: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5735: NEXT;
1.152 daniel 5736: } else if (RAW == '*') {
1.62 daniel 5737: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5738: NEXT;
1.152 daniel 5739: } else if (RAW == '+') {
1.62 daniel 5740: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5741: NEXT;
5742: }
5743: return(ret);
1.61 daniel 5744: }
5745:
5746: /**
5747: * xmlParseElementContentDecl:
5748: * @ctxt: an XML parser context
5749: * @name: the name of the element being defined.
5750: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5751: *
1.61 daniel 5752: * parse the declaration for an Element content either Mixed or Children,
5753: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5754: *
5755: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5756: *
1.61 daniel 5757: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5758: */
5759:
1.61 daniel 5760: int
1.123 daniel 5761: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5762: xmlElementContentPtr *result) {
5763:
5764: xmlElementContentPtr tree = NULL;
1.187 daniel 5765: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5766: int res;
5767:
5768: *result = NULL;
5769:
1.152 daniel 5770: if (RAW != '(') {
1.61 daniel 5771: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5772: ctxt->sax->error(ctxt->userData,
1.61 daniel 5773: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5774: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5775: ctxt->wellFormed = 0;
1.180 daniel 5776: ctxt->disableSAX = 1;
1.61 daniel 5777: return(-1);
5778: }
5779: NEXT;
1.97 daniel 5780: GROW;
1.61 daniel 5781: SKIP_BLANKS;
1.152 daniel 5782: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5783: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5784: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5785: (NXT(6) == 'A')) {
1.62 daniel 5786: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5787: res = XML_ELEMENT_TYPE_MIXED;
5788: } else {
1.62 daniel 5789: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5790: res = XML_ELEMENT_TYPE_ELEMENT;
5791: }
1.187 daniel 5792: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5793: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5794: ctxt->sax->error(ctxt->userData,
5795: "Element content declaration doesn't start and stop in the same entity\n");
5796: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5797: ctxt->wellFormed = 0;
5798: ctxt->disableSAX = 1;
5799: }
1.61 daniel 5800: SKIP_BLANKS;
1.63 daniel 5801: /****************************
1.152 daniel 5802: if (RAW != ')') {
1.61 daniel 5803: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5804: ctxt->sax->error(ctxt->userData,
1.61 daniel 5805: "xmlParseElementContentDecl : ')' expected\n");
5806: ctxt->wellFormed = 0;
1.180 daniel 5807: ctxt->disableSAX = 1;
1.61 daniel 5808: return(-1);
5809: }
1.63 daniel 5810: ****************************/
5811: *result = tree;
1.61 daniel 5812: return(res);
1.22 daniel 5813: }
5814:
1.50 daniel 5815: /**
5816: * xmlParseElementDecl:
5817: * @ctxt: an XML parser context
5818: *
5819: * parse an Element declaration.
1.22 daniel 5820: *
5821: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5822: *
1.99 daniel 5823: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5824: * No element type may be declared more than once
1.69 daniel 5825: *
5826: * Returns the type of the element, or -1 in case of error
1.22 daniel 5827: */
1.59 daniel 5828: int
1.55 daniel 5829: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5830: xmlChar *name;
1.59 daniel 5831: int ret = -1;
1.61 daniel 5832: xmlElementContentPtr content = NULL;
1.22 daniel 5833:
1.97 daniel 5834: GROW;
1.152 daniel 5835: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5836: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5837: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5838: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5839: (NXT(8) == 'T')) {
1.187 daniel 5840: xmlParserInputPtr input = ctxt->input;
5841:
1.40 daniel 5842: SKIP(9);
1.59 daniel 5843: if (!IS_BLANK(CUR)) {
5844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5845: ctxt->sax->error(ctxt->userData,
1.59 daniel 5846: "Space required after 'ELEMENT'\n");
1.123 daniel 5847: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5848: ctxt->wellFormed = 0;
1.180 daniel 5849: ctxt->disableSAX = 1;
1.59 daniel 5850: }
1.42 daniel 5851: SKIP_BLANKS;
1.22 daniel 5852: name = xmlParseName(ctxt);
5853: if (name == NULL) {
1.55 daniel 5854: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5855: ctxt->sax->error(ctxt->userData,
1.59 daniel 5856: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5857: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5858: ctxt->wellFormed = 0;
1.180 daniel 5859: ctxt->disableSAX = 1;
1.59 daniel 5860: return(-1);
5861: }
5862: if (!IS_BLANK(CUR)) {
5863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5864: ctxt->sax->error(ctxt->userData,
1.59 daniel 5865: "Space required after the element name\n");
1.123 daniel 5866: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5867: ctxt->wellFormed = 0;
1.180 daniel 5868: ctxt->disableSAX = 1;
1.22 daniel 5869: }
1.42 daniel 5870: SKIP_BLANKS;
1.152 daniel 5871: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5872: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5873: (NXT(4) == 'Y')) {
5874: SKIP(5);
1.22 daniel 5875: /*
5876: * Element must always be empty.
5877: */
1.59 daniel 5878: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5879: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5880: (NXT(2) == 'Y')) {
5881: SKIP(3);
1.22 daniel 5882: /*
5883: * Element is a generic container.
5884: */
1.59 daniel 5885: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5886: } else if (RAW == '(') {
1.61 daniel 5887: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5888: } else {
1.98 daniel 5889: /*
5890: * [ WFC: PEs in Internal Subset ] error handling.
5891: */
1.152 daniel 5892: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5893: (ctxt->inputNr == 1)) {
5894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5895: ctxt->sax->error(ctxt->userData,
5896: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5897: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5898: } else {
5899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5900: ctxt->sax->error(ctxt->userData,
5901: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5902: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5903: }
1.61 daniel 5904: ctxt->wellFormed = 0;
1.180 daniel 5905: ctxt->disableSAX = 1;
1.119 daniel 5906: if (name != NULL) xmlFree(name);
1.61 daniel 5907: return(-1);
1.22 daniel 5908: }
1.142 daniel 5909:
5910: SKIP_BLANKS;
5911: /*
5912: * Pop-up of finished entities.
5913: */
1.152 daniel 5914: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5915: xmlPopInput(ctxt);
1.42 daniel 5916: SKIP_BLANKS;
1.142 daniel 5917:
1.152 daniel 5918: if (RAW != '>') {
1.55 daniel 5919: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5920: ctxt->sax->error(ctxt->userData,
1.31 daniel 5921: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5922: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5923: ctxt->wellFormed = 0;
1.180 daniel 5924: ctxt->disableSAX = 1;
1.61 daniel 5925: } else {
1.187 daniel 5926: if (input != ctxt->input) {
5927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5928: ctxt->sax->error(ctxt->userData,
5929: "Element declaration doesn't start and stop in the same entity\n");
5930: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5931: ctxt->wellFormed = 0;
5932: ctxt->disableSAX = 1;
5933: }
5934:
1.40 daniel 5935: NEXT;
1.171 daniel 5936: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5937: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5938: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5939: content);
1.61 daniel 5940: }
1.84 daniel 5941: if (content != NULL) {
5942: xmlFreeElementContent(content);
5943: }
1.61 daniel 5944: if (name != NULL) {
1.119 daniel 5945: xmlFree(name);
1.61 daniel 5946: }
1.22 daniel 5947: }
1.59 daniel 5948: return(ret);
1.22 daniel 5949: }
5950:
1.50 daniel 5951: /**
5952: * xmlParseMarkupDecl:
5953: * @ctxt: an XML parser context
5954: *
5955: * parse Markup declarations
1.22 daniel 5956: *
5957: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5958: * NotationDecl | PI | Comment
5959: *
1.98 daniel 5960: * [ VC: Proper Declaration/PE Nesting ]
5961: * TODO Parameter-entity replacement text must be properly nested with
5962: * markup declarations. That is to say, if either the first character
5963: * or the last character of a markup declaration (markupdecl above) is
5964: * contained in the replacement text for a parameter-entity reference,
5965: * both must be contained in the same replacement text.
5966: *
5967: * [ WFC: PEs in Internal Subset ]
5968: * In the internal DTD subset, parameter-entity references can occur
5969: * only where markup declarations can occur, not within markup declarations.
5970: * (This does not apply to references that occur in external parameter
5971: * entities or to the external subset.)
1.22 daniel 5972: */
1.55 daniel 5973: void
5974: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 5975: GROW;
1.22 daniel 5976: xmlParseElementDecl(ctxt);
5977: xmlParseAttributeListDecl(ctxt);
5978: xmlParseEntityDecl(ctxt);
5979: xmlParseNotationDecl(ctxt);
5980: xmlParsePI(ctxt);
1.114 daniel 5981: xmlParseComment(ctxt);
1.98 daniel 5982: /*
5983: * This is only for internal subset. On external entities,
5984: * the replacement is done before parsing stage
5985: */
5986: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5987: xmlParsePEReference(ctxt);
1.97 daniel 5988: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 5989: }
5990:
1.50 daniel 5991: /**
1.76 daniel 5992: * xmlParseTextDecl:
5993: * @ctxt: an XML parser context
5994: *
5995: * parse an XML declaration header for external entities
5996: *
5997: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 5998: *
5999: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6000: */
6001:
1.172 daniel 6002: void
1.76 daniel 6003: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6004: xmlChar *version;
1.76 daniel 6005:
6006: /*
6007: * We know that '<?xml' is here.
6008: */
1.193 daniel 6009: if ((RAW == '<') && (NXT(1) == '?') &&
6010: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6011: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6012: SKIP(5);
6013: } else {
6014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6015: ctxt->sax->error(ctxt->userData,
6016: "Text declaration '<?xml' required\n");
6017: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6018: ctxt->wellFormed = 0;
6019: ctxt->disableSAX = 1;
6020:
6021: return;
6022: }
1.76 daniel 6023:
6024: if (!IS_BLANK(CUR)) {
6025: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6026: ctxt->sax->error(ctxt->userData,
6027: "Space needed after '<?xml'\n");
1.123 daniel 6028: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6029: ctxt->wellFormed = 0;
1.180 daniel 6030: ctxt->disableSAX = 1;
1.76 daniel 6031: }
6032: SKIP_BLANKS;
6033:
6034: /*
6035: * We may have the VersionInfo here.
6036: */
6037: version = xmlParseVersionInfo(ctxt);
6038: if (version == NULL)
6039: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6040: ctxt->input->version = version;
1.76 daniel 6041:
6042: /*
6043: * We must have the encoding declaration
6044: */
6045: if (!IS_BLANK(CUR)) {
6046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6047: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6048: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6049: ctxt->wellFormed = 0;
1.180 daniel 6050: ctxt->disableSAX = 1;
1.76 daniel 6051: }
1.195 daniel 6052: xmlParseEncodingDecl(ctxt);
1.193 daniel 6053: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6054: /*
6055: * The XML REC instructs us to stop parsing right here
6056: */
6057: return;
6058: }
1.76 daniel 6059:
6060: SKIP_BLANKS;
1.152 daniel 6061: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6062: SKIP(2);
1.152 daniel 6063: } else if (RAW == '>') {
1.76 daniel 6064: /* Deprecated old WD ... */
6065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6066: ctxt->sax->error(ctxt->userData,
6067: "XML declaration must end-up with '?>'\n");
1.123 daniel 6068: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6069: ctxt->wellFormed = 0;
1.180 daniel 6070: ctxt->disableSAX = 1;
1.76 daniel 6071: NEXT;
6072: } else {
6073: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6074: ctxt->sax->error(ctxt->userData,
6075: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6076: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6077: ctxt->wellFormed = 0;
1.180 daniel 6078: ctxt->disableSAX = 1;
1.76 daniel 6079: MOVETO_ENDTAG(CUR_PTR);
6080: NEXT;
6081: }
6082: }
6083:
6084: /*
6085: * xmlParseConditionalSections
6086: * @ctxt: an XML parser context
6087: *
6088: * TODO : Conditionnal section are not yet supported !
6089: *
6090: * [61] conditionalSect ::= includeSect | ignoreSect
6091: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6092: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6093: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6094: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6095: */
6096:
6097: void
6098: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6099: SKIP(3);
6100: SKIP_BLANKS;
1.168 daniel 6101: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6102: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6103: (NXT(6) == 'E')) {
1.165 daniel 6104: SKIP(7);
1.168 daniel 6105: SKIP_BLANKS;
6106: if (RAW != '[') {
6107: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6108: ctxt->sax->error(ctxt->userData,
6109: "XML conditional section '[' expected\n");
6110: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6111: ctxt->wellFormed = 0;
1.180 daniel 6112: ctxt->disableSAX = 1;
1.168 daniel 6113: } else {
6114: NEXT;
6115: }
1.165 daniel 6116: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6117: (NXT(2) != '>'))) {
6118: const xmlChar *check = CUR_PTR;
6119: int cons = ctxt->input->consumed;
6120: int tok = ctxt->token;
6121:
6122: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6123: xmlParseConditionalSections(ctxt);
6124: } else if (IS_BLANK(CUR)) {
6125: NEXT;
6126: } else if (RAW == '%') {
6127: xmlParsePEReference(ctxt);
6128: } else
6129: xmlParseMarkupDecl(ctxt);
6130:
6131: /*
6132: * Pop-up of finished entities.
6133: */
6134: while ((RAW == 0) && (ctxt->inputNr > 1))
6135: xmlPopInput(ctxt);
6136:
6137: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6138: (tok == ctxt->token)) {
6139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6140: ctxt->sax->error(ctxt->userData,
6141: "Content error in the external subset\n");
6142: ctxt->wellFormed = 0;
1.180 daniel 6143: ctxt->disableSAX = 1;
1.165 daniel 6144: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6145: break;
6146: }
6147: }
1.168 daniel 6148: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6149: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6150: int state;
6151:
1.168 daniel 6152: SKIP(6);
6153: SKIP_BLANKS;
6154: if (RAW != '[') {
6155: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6156: ctxt->sax->error(ctxt->userData,
6157: "XML conditional section '[' expected\n");
6158: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6159: ctxt->wellFormed = 0;
1.180 daniel 6160: ctxt->disableSAX = 1;
1.168 daniel 6161: } else {
6162: NEXT;
6163: }
1.171 daniel 6164:
1.143 daniel 6165: /*
1.171 daniel 6166: * Parse up to the end of the conditionnal section
6167: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6168: */
1.171 daniel 6169: state = ctxt->disableSAX;
1.165 daniel 6170: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6171: (NXT(2) != '>'))) {
1.171 daniel 6172: const xmlChar *check = CUR_PTR;
6173: int cons = ctxt->input->consumed;
6174: int tok = ctxt->token;
6175:
6176: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6177: xmlParseConditionalSections(ctxt);
6178: } else if (IS_BLANK(CUR)) {
6179: NEXT;
6180: } else if (RAW == '%') {
6181: xmlParsePEReference(ctxt);
6182: } else
6183: xmlParseMarkupDecl(ctxt);
6184:
1.165 daniel 6185: /*
6186: * Pop-up of finished entities.
6187: */
6188: while ((RAW == 0) && (ctxt->inputNr > 1))
6189: xmlPopInput(ctxt);
1.143 daniel 6190:
1.171 daniel 6191: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6192: (tok == ctxt->token)) {
6193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6194: ctxt->sax->error(ctxt->userData,
6195: "Content error in the external subset\n");
6196: ctxt->wellFormed = 0;
1.180 daniel 6197: ctxt->disableSAX = 1;
1.171 daniel 6198: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6199: break;
6200: }
1.165 daniel 6201: }
1.171 daniel 6202: ctxt->disableSAX = state;
1.168 daniel 6203: } else {
6204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6205: ctxt->sax->error(ctxt->userData,
6206: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6207: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6208: ctxt->wellFormed = 0;
1.180 daniel 6209: ctxt->disableSAX = 1;
1.143 daniel 6210: }
6211:
1.152 daniel 6212: if (RAW == 0)
1.143 daniel 6213: SHRINK;
6214:
1.152 daniel 6215: if (RAW == 0) {
1.76 daniel 6216: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6217: ctxt->sax->error(ctxt->userData,
6218: "XML conditional section not closed\n");
1.123 daniel 6219: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6220: ctxt->wellFormed = 0;
1.180 daniel 6221: ctxt->disableSAX = 1;
1.143 daniel 6222: } else {
6223: SKIP(3);
1.76 daniel 6224: }
6225: }
6226:
6227: /**
1.124 daniel 6228: * xmlParseExternalSubset:
1.76 daniel 6229: * @ctxt: an XML parser context
1.124 daniel 6230: * @ExternalID: the external identifier
6231: * @SystemID: the system identifier (or URL)
1.76 daniel 6232: *
6233: * parse Markup declarations from an external subset
6234: *
6235: * [30] extSubset ::= textDecl? extSubsetDecl
6236: *
6237: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6238: */
6239: void
1.123 daniel 6240: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6241: const xmlChar *SystemID) {
1.132 daniel 6242: GROW;
1.152 daniel 6243: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6244: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6245: (NXT(4) == 'l')) {
1.172 daniel 6246: xmlParseTextDecl(ctxt);
1.193 daniel 6247: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6248: /*
6249: * The XML REC instructs us to stop parsing right here
6250: */
6251: ctxt->instate = XML_PARSER_EOF;
6252: return;
6253: }
1.76 daniel 6254: }
1.79 daniel 6255: if (ctxt->myDoc == NULL) {
1.116 daniel 6256: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6257: }
6258: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6259: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6260:
1.96 daniel 6261: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6262: ctxt->external = 1;
1.152 daniel 6263: while (((RAW == '<') && (NXT(1) == '?')) ||
6264: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6265: IS_BLANK(CUR)) {
1.123 daniel 6266: const xmlChar *check = CUR_PTR;
1.115 daniel 6267: int cons = ctxt->input->consumed;
1.164 daniel 6268: int tok = ctxt->token;
1.115 daniel 6269:
1.152 daniel 6270: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6271: xmlParseConditionalSections(ctxt);
6272: } else if (IS_BLANK(CUR)) {
6273: NEXT;
1.152 daniel 6274: } else if (RAW == '%') {
1.76 daniel 6275: xmlParsePEReference(ctxt);
6276: } else
6277: xmlParseMarkupDecl(ctxt);
1.77 daniel 6278:
6279: /*
6280: * Pop-up of finished entities.
6281: */
1.166 daniel 6282: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6283: xmlPopInput(ctxt);
6284:
1.164 daniel 6285: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6286: (tok == ctxt->token)) {
1.115 daniel 6287: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6288: ctxt->sax->error(ctxt->userData,
6289: "Content error in the external subset\n");
6290: ctxt->wellFormed = 0;
1.180 daniel 6291: ctxt->disableSAX = 1;
1.123 daniel 6292: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6293: break;
6294: }
1.76 daniel 6295: }
6296:
1.152 daniel 6297: if (RAW != 0) {
1.76 daniel 6298: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6299: ctxt->sax->error(ctxt->userData,
6300: "Extra content at the end of the document\n");
1.123 daniel 6301: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6302: ctxt->wellFormed = 0;
1.180 daniel 6303: ctxt->disableSAX = 1;
1.76 daniel 6304: }
6305:
6306: }
6307:
6308: /**
1.77 daniel 6309: * xmlParseReference:
6310: * @ctxt: an XML parser context
6311: *
6312: * parse and handle entity references in content, depending on the SAX
6313: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6314: * CharRef, a predefined entity, if there is no reference() callback.
6315: * or if the parser was asked to switch to that mode.
1.77 daniel 6316: *
6317: * [67] Reference ::= EntityRef | CharRef
6318: */
6319: void
6320: xmlParseReference(xmlParserCtxtPtr ctxt) {
6321: xmlEntityPtr ent;
1.123 daniel 6322: xmlChar *val;
1.152 daniel 6323: if (RAW != '&') return;
1.77 daniel 6324:
1.113 daniel 6325: if (ctxt->inputNr > 1) {
1.123 daniel 6326: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6327:
1.171 daniel 6328: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6329: (!ctxt->disableSAX))
1.113 daniel 6330: ctxt->sax->characters(ctxt->userData, cur, 1);
6331: if (ctxt->token == '&')
6332: ctxt->token = 0;
6333: else {
6334: SKIP(1);
6335: }
6336: return;
6337: }
1.77 daniel 6338: if (NXT(1) == '#') {
1.152 daniel 6339: int i = 0;
1.153 daniel 6340: xmlChar out[10];
6341: int hex = NXT(2);
1.77 daniel 6342: int val = xmlParseCharRef(ctxt);
1.152 daniel 6343:
1.153 daniel 6344: if (ctxt->encoding != NULL) {
6345: /*
6346: * So we are using non-UTF-8 buffers
6347: * Check that the char fit on 8bits, if not
6348: * generate a CharRef.
6349: */
6350: if (val <= 0xFF) {
6351: out[0] = val;
6352: out[1] = 0;
1.171 daniel 6353: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6354: (!ctxt->disableSAX))
1.153 daniel 6355: ctxt->sax->characters(ctxt->userData, out, 1);
6356: } else {
6357: if ((hex == 'x') || (hex == 'X'))
6358: sprintf((char *)out, "#x%X", val);
6359: else
6360: sprintf((char *)out, "#%d", val);
1.171 daniel 6361: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6362: (!ctxt->disableSAX))
1.153 daniel 6363: ctxt->sax->reference(ctxt->userData, out);
6364: }
6365: } else {
6366: /*
6367: * Just encode the value in UTF-8
6368: */
6369: COPY_BUF(0 ,out, i, val);
6370: out[i] = 0;
1.171 daniel 6371: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6372: (!ctxt->disableSAX))
1.153 daniel 6373: ctxt->sax->characters(ctxt->userData, out, i);
6374: }
1.77 daniel 6375: } else {
6376: ent = xmlParseEntityRef(ctxt);
6377: if (ent == NULL) return;
6378: if ((ent->name != NULL) &&
1.159 daniel 6379: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6380: xmlNodePtr list = NULL;
6381: int ret;
6382:
6383:
6384: /*
6385: * The first reference to the entity trigger a parsing phase
6386: * where the ent->children is filled with the result from
6387: * the parsing.
6388: */
6389: if (ent->children == NULL) {
6390: xmlChar *value;
6391: value = ent->content;
6392:
6393: /*
6394: * Check that this entity is well formed
6395: */
6396: if ((value != NULL) &&
6397: (value[1] == 0) && (value[0] == '<') &&
6398: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6399: /*
6400: * TODO: get definite answer on this !!!
6401: * Lots of entity decls are used to declare a single
6402: * char
6403: * <!ENTITY lt "<">
6404: * Which seems to be valid since
6405: * 2.4: The ampersand character (&) and the left angle
6406: * bracket (<) may appear in their literal form only
6407: * when used ... They are also legal within the literal
6408: * entity value of an internal entity declaration;i
6409: * see "4.3.2 Well-Formed Parsed Entities".
6410: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6411: * Looking at the OASIS test suite and James Clark
6412: * tests, this is broken. However the XML REC uses
6413: * it. Is the XML REC not well-formed ????
6414: * This is a hack to avoid this problem
6415: */
6416: list = xmlNewDocText(ctxt->myDoc, value);
6417: if (list != NULL) {
6418: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6419: (ent->children == NULL)) {
6420: ent->children = list;
6421: ent->last = list;
6422: list->parent = (xmlNodePtr) ent;
6423: } else {
6424: xmlFreeNodeList(list);
6425: }
6426: } else if (list != NULL) {
6427: xmlFreeNodeList(list);
6428: }
1.181 daniel 6429: } else {
1.180 daniel 6430: /*
6431: * 4.3.2: An internal general parsed entity is well-formed
6432: * if its replacement text matches the production labeled
6433: * content.
6434: */
1.185 daniel 6435: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6436: ctxt->depth++;
1.180 daniel 6437: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6438: ctxt->sax, NULL, ctxt->depth,
6439: value, &list);
6440: ctxt->depth--;
6441: } else if (ent->etype ==
6442: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6443: ctxt->depth++;
1.180 daniel 6444: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6445: ctxt->sax, NULL, ctxt->depth,
6446: ent->SystemID, ent->ExternalID, &list);
6447: ctxt->depth--;
6448: } else {
1.180 daniel 6449: ret = -1;
6450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6451: ctxt->sax->error(ctxt->userData,
6452: "Internal: invalid entity type\n");
6453: }
1.185 daniel 6454: if (ret == XML_ERR_ENTITY_LOOP) {
6455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6456: ctxt->sax->error(ctxt->userData,
6457: "Detected entity reference loop\n");
6458: ctxt->wellFormed = 0;
6459: ctxt->disableSAX = 1;
6460: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6461: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6462: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6463: (ent->children == NULL)) {
6464: ent->children = list;
6465: while (list != NULL) {
6466: list->parent = (xmlNodePtr) ent;
6467: if (list->next == NULL)
6468: ent->last = list;
6469: list = list->next;
6470: }
6471: } else {
6472: xmlFreeNodeList(list);
6473: }
6474: } else if (ret > 0) {
6475: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6476: ctxt->sax->error(ctxt->userData,
6477: "Entity value required\n");
6478: ctxt->errNo = ret;
6479: ctxt->wellFormed = 0;
6480: ctxt->disableSAX = 1;
6481: } else if (list != NULL) {
6482: xmlFreeNodeList(list);
6483: }
6484: }
6485: }
1.113 daniel 6486: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6487: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6488: /*
6489: * Create a node.
6490: */
6491: ctxt->sax->reference(ctxt->userData, ent->name);
6492: return;
6493: } else if (ctxt->replaceEntities) {
6494: xmlParserInputPtr input;
1.79 daniel 6495:
1.113 daniel 6496: input = xmlNewEntityInputStream(ctxt, ent);
6497: xmlPushInput(ctxt, input);
1.167 daniel 6498: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6499: (RAW == '<') && (NXT(1) == '?') &&
6500: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6501: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6502: xmlParseTextDecl(ctxt);
1.193 daniel 6503: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6504: /*
6505: * The XML REC instructs us to stop parsing right here
6506: */
6507: ctxt->instate = XML_PARSER_EOF;
6508: return;
6509: }
1.167 daniel 6510: if (input->standalone) {
6511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6512: ctxt->sax->error(ctxt->userData,
6513: "external parsed entities cannot be standalone\n");
6514: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6515: ctxt->wellFormed = 0;
1.180 daniel 6516: ctxt->disableSAX = 1;
1.167 daniel 6517: }
6518: }
1.179 daniel 6519: /*
6520: * !!! TODO: build the tree under the entity first
6521: * 1234
6522: */
1.113 daniel 6523: return;
6524: }
1.77 daniel 6525: }
6526: val = ent->content;
6527: if (val == NULL) return;
6528: /*
6529: * inline the entity.
6530: */
1.171 daniel 6531: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6532: (!ctxt->disableSAX))
1.77 daniel 6533: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6534: }
1.24 daniel 6535: }
6536:
1.50 daniel 6537: /**
6538: * xmlParseEntityRef:
6539: * @ctxt: an XML parser context
6540: *
6541: * parse ENTITY references declarations
1.24 daniel 6542: *
6543: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6544: *
1.98 daniel 6545: * [ WFC: Entity Declared ]
6546: * In a document without any DTD, a document with only an internal DTD
6547: * subset which contains no parameter entity references, or a document
6548: * with "standalone='yes'", the Name given in the entity reference
6549: * must match that in an entity declaration, except that well-formed
6550: * documents need not declare any of the following entities: amp, lt,
6551: * gt, apos, quot. The declaration of a parameter entity must precede
6552: * any reference to it. Similarly, the declaration of a general entity
6553: * must precede any reference to it which appears in a default value in an
6554: * attribute-list declaration. Note that if entities are declared in the
6555: * external subset or in external parameter entities, a non-validating
6556: * processor is not obligated to read and process their declarations;
6557: * for such documents, the rule that an entity must be declared is a
6558: * well-formedness constraint only if standalone='yes'.
6559: *
6560: * [ WFC: Parsed Entity ]
6561: * An entity reference must not contain the name of an unparsed entity
6562: *
1.77 daniel 6563: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6564: */
1.77 daniel 6565: xmlEntityPtr
1.55 daniel 6566: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6567: xmlChar *name;
1.72 daniel 6568: xmlEntityPtr ent = NULL;
1.24 daniel 6569:
1.91 daniel 6570: GROW;
1.111 daniel 6571:
1.152 daniel 6572: if (RAW == '&') {
1.40 daniel 6573: NEXT;
1.24 daniel 6574: name = xmlParseName(ctxt);
6575: if (name == NULL) {
1.55 daniel 6576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6577: ctxt->sax->error(ctxt->userData,
6578: "xmlParseEntityRef: no name\n");
1.123 daniel 6579: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6580: ctxt->wellFormed = 0;
1.180 daniel 6581: ctxt->disableSAX = 1;
1.24 daniel 6582: } else {
1.152 daniel 6583: if (RAW == ';') {
1.40 daniel 6584: NEXT;
1.24 daniel 6585: /*
1.77 daniel 6586: * Ask first SAX for entity resolution, otherwise try the
6587: * predefined set.
6588: */
6589: if (ctxt->sax != NULL) {
6590: if (ctxt->sax->getEntity != NULL)
6591: ent = ctxt->sax->getEntity(ctxt->userData, name);
6592: if (ent == NULL)
6593: ent = xmlGetPredefinedEntity(name);
6594: }
6595: /*
1.98 daniel 6596: * [ WFC: Entity Declared ]
6597: * In a document without any DTD, a document with only an
6598: * internal DTD subset which contains no parameter entity
6599: * references, or a document with "standalone='yes'", the
6600: * Name given in the entity reference must match that in an
6601: * entity declaration, except that well-formed documents
6602: * need not declare any of the following entities: amp, lt,
6603: * gt, apos, quot.
6604: * The declaration of a parameter entity must precede any
6605: * reference to it.
6606: * Similarly, the declaration of a general entity must
6607: * precede any reference to it which appears in a default
6608: * value in an attribute-list declaration. Note that if
6609: * entities are declared in the external subset or in
6610: * external parameter entities, a non-validating processor
6611: * is not obligated to read and process their declarations;
6612: * for such documents, the rule that an entity must be
6613: * declared is a well-formedness constraint only if
6614: * standalone='yes'.
1.59 daniel 6615: */
1.77 daniel 6616: if (ent == NULL) {
1.98 daniel 6617: if ((ctxt->standalone == 1) ||
6618: ((ctxt->hasExternalSubset == 0) &&
6619: (ctxt->hasPErefs == 0))) {
6620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6621: ctxt->sax->error(ctxt->userData,
6622: "Entity '%s' not defined\n", name);
1.123 daniel 6623: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6624: ctxt->wellFormed = 0;
1.180 daniel 6625: ctxt->disableSAX = 1;
1.77 daniel 6626: } else {
1.98 daniel 6627: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6628: ctxt->sax->warning(ctxt->userData,
6629: "Entity '%s' not defined\n", name);
1.123 daniel 6630: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6631: }
1.77 daniel 6632: }
1.59 daniel 6633:
6634: /*
1.98 daniel 6635: * [ WFC: Parsed Entity ]
6636: * An entity reference must not contain the name of an
6637: * unparsed entity
6638: */
1.159 daniel 6639: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6641: ctxt->sax->error(ctxt->userData,
6642: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6643: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6644: ctxt->wellFormed = 0;
1.180 daniel 6645: ctxt->disableSAX = 1;
1.98 daniel 6646: }
6647:
6648: /*
6649: * [ WFC: No External Entity References ]
6650: * Attribute values cannot contain direct or indirect
6651: * entity references to external entities.
6652: */
6653: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6654: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6656: ctxt->sax->error(ctxt->userData,
6657: "Attribute references external entity '%s'\n", name);
1.123 daniel 6658: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6659: ctxt->wellFormed = 0;
1.180 daniel 6660: ctxt->disableSAX = 1;
1.98 daniel 6661: }
6662: /*
6663: * [ WFC: No < in Attribute Values ]
6664: * The replacement text of any entity referred to directly or
6665: * indirectly in an attribute value (other than "<") must
6666: * not contain a <.
1.59 daniel 6667: */
1.98 daniel 6668: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6669: (ent != NULL) &&
6670: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6671: (ent->content != NULL) &&
6672: (xmlStrchr(ent->content, '<'))) {
6673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6674: ctxt->sax->error(ctxt->userData,
6675: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6676: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6677: ctxt->wellFormed = 0;
1.180 daniel 6678: ctxt->disableSAX = 1;
1.98 daniel 6679: }
6680:
6681: /*
6682: * Internal check, no parameter entities here ...
6683: */
6684: else {
1.159 daniel 6685: switch (ent->etype) {
1.59 daniel 6686: case XML_INTERNAL_PARAMETER_ENTITY:
6687: case XML_EXTERNAL_PARAMETER_ENTITY:
6688: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6689: ctxt->sax->error(ctxt->userData,
1.59 daniel 6690: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6691: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6692: ctxt->wellFormed = 0;
1.180 daniel 6693: ctxt->disableSAX = 1;
6694: break;
6695: default:
1.59 daniel 6696: break;
6697: }
6698: }
6699:
6700: /*
1.98 daniel 6701: * [ WFC: No Recursion ]
1.117 daniel 6702: * TODO A parsed entity must not contain a recursive reference
6703: * to itself, either directly or indirectly.
1.59 daniel 6704: */
1.77 daniel 6705:
1.24 daniel 6706: } else {
1.55 daniel 6707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6708: ctxt->sax->error(ctxt->userData,
1.59 daniel 6709: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6710: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6711: ctxt->wellFormed = 0;
1.180 daniel 6712: ctxt->disableSAX = 1;
1.24 daniel 6713: }
1.119 daniel 6714: xmlFree(name);
1.24 daniel 6715: }
6716: }
1.77 daniel 6717: return(ent);
1.24 daniel 6718: }
1.135 daniel 6719: /**
6720: * xmlParseStringEntityRef:
6721: * @ctxt: an XML parser context
6722: * @str: a pointer to an index in the string
6723: *
6724: * parse ENTITY references declarations, but this version parses it from
6725: * a string value.
6726: *
6727: * [68] EntityRef ::= '&' Name ';'
6728: *
6729: * [ WFC: Entity Declared ]
6730: * In a document without any DTD, a document with only an internal DTD
6731: * subset which contains no parameter entity references, or a document
6732: * with "standalone='yes'", the Name given in the entity reference
6733: * must match that in an entity declaration, except that well-formed
6734: * documents need not declare any of the following entities: amp, lt,
6735: * gt, apos, quot. The declaration of a parameter entity must precede
6736: * any reference to it. Similarly, the declaration of a general entity
6737: * must precede any reference to it which appears in a default value in an
6738: * attribute-list declaration. Note that if entities are declared in the
6739: * external subset or in external parameter entities, a non-validating
6740: * processor is not obligated to read and process their declarations;
6741: * for such documents, the rule that an entity must be declared is a
6742: * well-formedness constraint only if standalone='yes'.
6743: *
6744: * [ WFC: Parsed Entity ]
6745: * An entity reference must not contain the name of an unparsed entity
6746: *
6747: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6748: * is updated to the current location in the string.
6749: */
6750: xmlEntityPtr
6751: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6752: xmlChar *name;
6753: const xmlChar *ptr;
6754: xmlChar cur;
6755: xmlEntityPtr ent = NULL;
6756:
1.156 daniel 6757: if ((str == NULL) || (*str == NULL))
6758: return(NULL);
1.135 daniel 6759: ptr = *str;
6760: cur = *ptr;
6761: if (cur == '&') {
6762: ptr++;
6763: cur = *ptr;
6764: name = xmlParseStringName(ctxt, &ptr);
6765: if (name == NULL) {
6766: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6767: ctxt->sax->error(ctxt->userData,
6768: "xmlParseEntityRef: no name\n");
6769: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6770: ctxt->wellFormed = 0;
1.180 daniel 6771: ctxt->disableSAX = 1;
1.135 daniel 6772: } else {
1.185 daniel 6773: if (*ptr == ';') {
6774: ptr++;
1.135 daniel 6775: /*
6776: * Ask first SAX for entity resolution, otherwise try the
6777: * predefined set.
6778: */
6779: if (ctxt->sax != NULL) {
6780: if (ctxt->sax->getEntity != NULL)
6781: ent = ctxt->sax->getEntity(ctxt->userData, name);
6782: if (ent == NULL)
6783: ent = xmlGetPredefinedEntity(name);
6784: }
6785: /*
6786: * [ WFC: Entity Declared ]
6787: * In a document without any DTD, a document with only an
6788: * internal DTD subset which contains no parameter entity
6789: * references, or a document with "standalone='yes'", the
6790: * Name given in the entity reference must match that in an
6791: * entity declaration, except that well-formed documents
6792: * need not declare any of the following entities: amp, lt,
6793: * gt, apos, quot.
6794: * The declaration of a parameter entity must precede any
6795: * reference to it.
6796: * Similarly, the declaration of a general entity must
6797: * precede any reference to it which appears in a default
6798: * value in an attribute-list declaration. Note that if
6799: * entities are declared in the external subset or in
6800: * external parameter entities, a non-validating processor
6801: * is not obligated to read and process their declarations;
6802: * for such documents, the rule that an entity must be
6803: * declared is a well-formedness constraint only if
6804: * standalone='yes'.
6805: */
6806: if (ent == NULL) {
6807: if ((ctxt->standalone == 1) ||
6808: ((ctxt->hasExternalSubset == 0) &&
6809: (ctxt->hasPErefs == 0))) {
6810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6811: ctxt->sax->error(ctxt->userData,
6812: "Entity '%s' not defined\n", name);
6813: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6814: ctxt->wellFormed = 0;
1.180 daniel 6815: ctxt->disableSAX = 1;
1.135 daniel 6816: } else {
6817: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6818: ctxt->sax->warning(ctxt->userData,
6819: "Entity '%s' not defined\n", name);
6820: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6821: }
6822: }
6823:
6824: /*
6825: * [ WFC: Parsed Entity ]
6826: * An entity reference must not contain the name of an
6827: * unparsed entity
6828: */
1.159 daniel 6829: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6830: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6831: ctxt->sax->error(ctxt->userData,
6832: "Entity reference to unparsed entity %s\n", name);
6833: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6834: ctxt->wellFormed = 0;
1.180 daniel 6835: ctxt->disableSAX = 1;
1.135 daniel 6836: }
6837:
6838: /*
6839: * [ WFC: No External Entity References ]
6840: * Attribute values cannot contain direct or indirect
6841: * entity references to external entities.
6842: */
6843: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6844: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6845: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6846: ctxt->sax->error(ctxt->userData,
6847: "Attribute references external entity '%s'\n", name);
6848: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6849: ctxt->wellFormed = 0;
1.180 daniel 6850: ctxt->disableSAX = 1;
1.135 daniel 6851: }
6852: /*
6853: * [ WFC: No < in Attribute Values ]
6854: * The replacement text of any entity referred to directly or
6855: * indirectly in an attribute value (other than "<") must
6856: * not contain a <.
6857: */
6858: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6859: (ent != NULL) &&
6860: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6861: (ent->content != NULL) &&
6862: (xmlStrchr(ent->content, '<'))) {
6863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6864: ctxt->sax->error(ctxt->userData,
6865: "'<' in entity '%s' is not allowed in attributes values\n", name);
6866: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6867: ctxt->wellFormed = 0;
1.180 daniel 6868: ctxt->disableSAX = 1;
1.135 daniel 6869: }
6870:
6871: /*
6872: * Internal check, no parameter entities here ...
6873: */
6874: else {
1.159 daniel 6875: switch (ent->etype) {
1.135 daniel 6876: case XML_INTERNAL_PARAMETER_ENTITY:
6877: case XML_EXTERNAL_PARAMETER_ENTITY:
6878: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6879: ctxt->sax->error(ctxt->userData,
6880: "Attempt to reference the parameter entity '%s'\n", name);
6881: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6882: ctxt->wellFormed = 0;
1.180 daniel 6883: ctxt->disableSAX = 1;
6884: break;
6885: default:
1.135 daniel 6886: break;
6887: }
6888: }
6889:
6890: /*
6891: * [ WFC: No Recursion ]
6892: * TODO A parsed entity must not contain a recursive reference
6893: * to itself, either directly or indirectly.
6894: */
6895:
6896: } else {
6897: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6898: ctxt->sax->error(ctxt->userData,
6899: "xmlParseEntityRef: expecting ';'\n");
6900: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6901: ctxt->wellFormed = 0;
1.180 daniel 6902: ctxt->disableSAX = 1;
1.135 daniel 6903: }
6904: xmlFree(name);
6905: }
6906: }
1.185 daniel 6907: *str = ptr;
1.135 daniel 6908: return(ent);
6909: }
1.24 daniel 6910:
1.50 daniel 6911: /**
6912: * xmlParsePEReference:
6913: * @ctxt: an XML parser context
6914: *
6915: * parse PEReference declarations
1.77 daniel 6916: * The entity content is handled directly by pushing it's content as
6917: * a new input stream.
1.22 daniel 6918: *
6919: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6920: *
1.98 daniel 6921: * [ WFC: No Recursion ]
6922: * TODO A parsed entity must not contain a recursive
6923: * reference to itself, either directly or indirectly.
6924: *
6925: * [ WFC: Entity Declared ]
6926: * In a document without any DTD, a document with only an internal DTD
6927: * subset which contains no parameter entity references, or a document
6928: * with "standalone='yes'", ... ... The declaration of a parameter
6929: * entity must precede any reference to it...
6930: *
6931: * [ VC: Entity Declared ]
6932: * In a document with an external subset or external parameter entities
6933: * with "standalone='no'", ... ... The declaration of a parameter entity
6934: * must precede any reference to it...
6935: *
6936: * [ WFC: In DTD ]
6937: * Parameter-entity references may only appear in the DTD.
6938: * NOTE: misleading but this is handled.
1.22 daniel 6939: */
1.77 daniel 6940: void
1.55 daniel 6941: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6942: xmlChar *name;
1.72 daniel 6943: xmlEntityPtr entity = NULL;
1.50 daniel 6944: xmlParserInputPtr input;
1.22 daniel 6945:
1.152 daniel 6946: if (RAW == '%') {
1.40 daniel 6947: NEXT;
1.22 daniel 6948: name = xmlParseName(ctxt);
6949: if (name == NULL) {
1.55 daniel 6950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6951: ctxt->sax->error(ctxt->userData,
6952: "xmlParsePEReference: no name\n");
1.123 daniel 6953: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6954: ctxt->wellFormed = 0;
1.180 daniel 6955: ctxt->disableSAX = 1;
1.22 daniel 6956: } else {
1.152 daniel 6957: if (RAW == ';') {
1.40 daniel 6958: NEXT;
1.98 daniel 6959: if ((ctxt->sax != NULL) &&
6960: (ctxt->sax->getParameterEntity != NULL))
6961: entity = ctxt->sax->getParameterEntity(ctxt->userData,
6962: name);
1.45 daniel 6963: if (entity == NULL) {
1.98 daniel 6964: /*
6965: * [ WFC: Entity Declared ]
6966: * In a document without any DTD, a document with only an
6967: * internal DTD subset which contains no parameter entity
6968: * references, or a document with "standalone='yes'", ...
6969: * ... The declaration of a parameter entity must precede
6970: * any reference to it...
6971: */
6972: if ((ctxt->standalone == 1) ||
6973: ((ctxt->hasExternalSubset == 0) &&
6974: (ctxt->hasPErefs == 0))) {
6975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6976: ctxt->sax->error(ctxt->userData,
6977: "PEReference: %%%s; not found\n", name);
1.123 daniel 6978: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 6979: ctxt->wellFormed = 0;
1.180 daniel 6980: ctxt->disableSAX = 1;
1.98 daniel 6981: } else {
6982: /*
6983: * [ VC: Entity Declared ]
6984: * In a document with an external subset or external
6985: * parameter entities with "standalone='no'", ...
6986: * ... The declaration of a parameter entity must precede
6987: * any reference to it...
6988: */
6989: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6990: ctxt->sax->warning(ctxt->userData,
6991: "PEReference: %%%s; not found\n", name);
6992: ctxt->valid = 0;
6993: }
1.50 daniel 6994: } else {
1.98 daniel 6995: /*
6996: * Internal checking in case the entity quest barfed
6997: */
1.159 daniel 6998: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6999: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7000: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7001: ctxt->sax->warning(ctxt->userData,
7002: "Internal: %%%s; is not a parameter entity\n", name);
7003: } else {
1.164 daniel 7004: /*
7005: * TODO !!!
7006: * handle the extra spaces added before and after
7007: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7008: */
1.98 daniel 7009: input = xmlNewEntityInputStream(ctxt, entity);
7010: xmlPushInput(ctxt, input);
1.164 daniel 7011: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7012: (RAW == '<') && (NXT(1) == '?') &&
7013: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7014: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7015: xmlParseTextDecl(ctxt);
1.193 daniel 7016: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7017: /*
7018: * The XML REC instructs us to stop parsing
7019: * right here
7020: */
7021: ctxt->instate = XML_PARSER_EOF;
7022: xmlFree(name);
7023: return;
7024: }
1.164 daniel 7025: }
7026: if (ctxt->token == 0)
7027: ctxt->token = ' ';
1.98 daniel 7028: }
1.45 daniel 7029: }
1.98 daniel 7030: ctxt->hasPErefs = 1;
1.22 daniel 7031: } else {
1.55 daniel 7032: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7033: ctxt->sax->error(ctxt->userData,
1.59 daniel 7034: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7035: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7036: ctxt->wellFormed = 0;
1.180 daniel 7037: ctxt->disableSAX = 1;
1.22 daniel 7038: }
1.119 daniel 7039: xmlFree(name);
1.3 veillard 7040: }
7041: }
7042: }
7043:
1.50 daniel 7044: /**
1.135 daniel 7045: * xmlParseStringPEReference:
7046: * @ctxt: an XML parser context
7047: * @str: a pointer to an index in the string
7048: *
7049: * parse PEReference declarations
7050: *
7051: * [69] PEReference ::= '%' Name ';'
7052: *
7053: * [ WFC: No Recursion ]
7054: * TODO A parsed entity must not contain a recursive
7055: * reference to itself, either directly or indirectly.
7056: *
7057: * [ WFC: Entity Declared ]
7058: * In a document without any DTD, a document with only an internal DTD
7059: * subset which contains no parameter entity references, or a document
7060: * with "standalone='yes'", ... ... The declaration of a parameter
7061: * entity must precede any reference to it...
7062: *
7063: * [ VC: Entity Declared ]
7064: * In a document with an external subset or external parameter entities
7065: * with "standalone='no'", ... ... The declaration of a parameter entity
7066: * must precede any reference to it...
7067: *
7068: * [ WFC: In DTD ]
7069: * Parameter-entity references may only appear in the DTD.
7070: * NOTE: misleading but this is handled.
7071: *
7072: * Returns the string of the entity content.
7073: * str is updated to the current value of the index
7074: */
7075: xmlEntityPtr
7076: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7077: const xmlChar *ptr;
7078: xmlChar cur;
7079: xmlChar *name;
7080: xmlEntityPtr entity = NULL;
7081:
7082: if ((str == NULL) || (*str == NULL)) return(NULL);
7083: ptr = *str;
7084: cur = *ptr;
7085: if (cur == '%') {
7086: ptr++;
7087: cur = *ptr;
7088: name = xmlParseStringName(ctxt, &ptr);
7089: if (name == NULL) {
7090: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7091: ctxt->sax->error(ctxt->userData,
7092: "xmlParseStringPEReference: no name\n");
7093: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7094: ctxt->wellFormed = 0;
1.180 daniel 7095: ctxt->disableSAX = 1;
1.135 daniel 7096: } else {
7097: cur = *ptr;
7098: if (cur == ';') {
7099: ptr++;
7100: cur = *ptr;
7101: if ((ctxt->sax != NULL) &&
7102: (ctxt->sax->getParameterEntity != NULL))
7103: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7104: name);
7105: if (entity == NULL) {
7106: /*
7107: * [ WFC: Entity Declared ]
7108: * In a document without any DTD, a document with only an
7109: * internal DTD subset which contains no parameter entity
7110: * references, or a document with "standalone='yes'", ...
7111: * ... The declaration of a parameter entity must precede
7112: * any reference to it...
7113: */
7114: if ((ctxt->standalone == 1) ||
7115: ((ctxt->hasExternalSubset == 0) &&
7116: (ctxt->hasPErefs == 0))) {
7117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7118: ctxt->sax->error(ctxt->userData,
7119: "PEReference: %%%s; not found\n", name);
7120: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7121: ctxt->wellFormed = 0;
1.180 daniel 7122: ctxt->disableSAX = 1;
1.135 daniel 7123: } else {
7124: /*
7125: * [ VC: Entity Declared ]
7126: * In a document with an external subset or external
7127: * parameter entities with "standalone='no'", ...
7128: * ... The declaration of a parameter entity must
7129: * precede any reference to it...
7130: */
7131: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7132: ctxt->sax->warning(ctxt->userData,
7133: "PEReference: %%%s; not found\n", name);
7134: ctxt->valid = 0;
7135: }
7136: } else {
7137: /*
7138: * Internal checking in case the entity quest barfed
7139: */
1.159 daniel 7140: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7141: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7142: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7143: ctxt->sax->warning(ctxt->userData,
7144: "Internal: %%%s; is not a parameter entity\n", name);
7145: }
7146: }
7147: ctxt->hasPErefs = 1;
7148: } else {
7149: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7150: ctxt->sax->error(ctxt->userData,
7151: "xmlParseStringPEReference: expecting ';'\n");
7152: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7153: ctxt->wellFormed = 0;
1.180 daniel 7154: ctxt->disableSAX = 1;
1.135 daniel 7155: }
7156: xmlFree(name);
7157: }
7158: }
7159: *str = ptr;
7160: return(entity);
7161: }
7162:
7163: /**
1.181 daniel 7164: * xmlParseDocTypeDecl:
1.50 daniel 7165: * @ctxt: an XML parser context
7166: *
7167: * parse a DOCTYPE declaration
1.21 daniel 7168: *
1.22 daniel 7169: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7170: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7171: *
7172: * [ VC: Root Element Type ]
1.99 daniel 7173: * The Name in the document type declaration must match the element
1.98 daniel 7174: * type of the root element.
1.21 daniel 7175: */
7176:
1.55 daniel 7177: void
7178: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7179: xmlChar *name = NULL;
1.123 daniel 7180: xmlChar *ExternalID = NULL;
7181: xmlChar *URI = NULL;
1.21 daniel 7182:
7183: /*
7184: * We know that '<!DOCTYPE' has been detected.
7185: */
1.40 daniel 7186: SKIP(9);
1.21 daniel 7187:
1.42 daniel 7188: SKIP_BLANKS;
1.21 daniel 7189:
7190: /*
7191: * Parse the DOCTYPE name.
7192: */
7193: name = xmlParseName(ctxt);
7194: if (name == NULL) {
1.55 daniel 7195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7196: ctxt->sax->error(ctxt->userData,
7197: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7198: ctxt->wellFormed = 0;
1.180 daniel 7199: ctxt->disableSAX = 1;
1.123 daniel 7200: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7201: }
1.165 daniel 7202: ctxt->intSubName = name;
1.21 daniel 7203:
1.42 daniel 7204: SKIP_BLANKS;
1.21 daniel 7205:
7206: /*
1.22 daniel 7207: * Check for SystemID and ExternalID
7208: */
1.67 daniel 7209: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7210:
7211: if ((URI != NULL) || (ExternalID != NULL)) {
7212: ctxt->hasExternalSubset = 1;
7213: }
1.165 daniel 7214: ctxt->extSubURI = URI;
7215: ctxt->extSubSystem = ExternalID;
1.98 daniel 7216:
1.42 daniel 7217: SKIP_BLANKS;
1.36 daniel 7218:
1.76 daniel 7219: /*
1.165 daniel 7220: * Create and update the internal subset.
1.76 daniel 7221: */
1.171 daniel 7222: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7223: (!ctxt->disableSAX))
1.74 daniel 7224: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7225:
7226: /*
1.140 daniel 7227: * Is there any internal subset declarations ?
7228: * they are handled separately in xmlParseInternalSubset()
7229: */
1.152 daniel 7230: if (RAW == '[')
1.140 daniel 7231: return;
7232:
7233: /*
7234: * We should be at the end of the DOCTYPE declaration.
7235: */
1.152 daniel 7236: if (RAW != '>') {
1.140 daniel 7237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7238: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7239: ctxt->wellFormed = 0;
1.180 daniel 7240: ctxt->disableSAX = 1;
1.140 daniel 7241: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7242: }
7243: NEXT;
7244: }
7245:
7246: /**
1.181 daniel 7247: * xmlParseInternalsubset:
1.140 daniel 7248: * @ctxt: an XML parser context
7249: *
7250: * parse the internal subset declaration
7251: *
7252: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7253: */
7254:
7255: void
7256: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7257: /*
1.22 daniel 7258: * Is there any DTD definition ?
7259: */
1.152 daniel 7260: if (RAW == '[') {
1.96 daniel 7261: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7262: NEXT;
1.22 daniel 7263: /*
7264: * Parse the succession of Markup declarations and
7265: * PEReferences.
7266: * Subsequence (markupdecl | PEReference | S)*
7267: */
1.152 daniel 7268: while (RAW != ']') {
1.123 daniel 7269: const xmlChar *check = CUR_PTR;
1.115 daniel 7270: int cons = ctxt->input->consumed;
1.22 daniel 7271:
1.42 daniel 7272: SKIP_BLANKS;
1.22 daniel 7273: xmlParseMarkupDecl(ctxt);
1.50 daniel 7274: xmlParsePEReference(ctxt);
1.22 daniel 7275:
1.115 daniel 7276: /*
7277: * Pop-up of finished entities.
7278: */
1.152 daniel 7279: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7280: xmlPopInput(ctxt);
7281:
1.118 daniel 7282: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7284: ctxt->sax->error(ctxt->userData,
1.140 daniel 7285: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7286: ctxt->wellFormed = 0;
1.180 daniel 7287: ctxt->disableSAX = 1;
1.123 daniel 7288: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7289: break;
7290: }
7291: }
1.152 daniel 7292: if (RAW == ']') NEXT;
1.22 daniel 7293: }
7294:
7295: /*
7296: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7297: */
1.152 daniel 7298: if (RAW != '>') {
1.55 daniel 7299: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7300: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7301: ctxt->wellFormed = 0;
1.180 daniel 7302: ctxt->disableSAX = 1;
1.123 daniel 7303: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7304: }
1.40 daniel 7305: NEXT;
1.21 daniel 7306: }
7307:
1.50 daniel 7308: /**
7309: * xmlParseAttribute:
7310: * @ctxt: an XML parser context
1.123 daniel 7311: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7312: *
7313: * parse an attribute
1.3 veillard 7314: *
1.22 daniel 7315: * [41] Attribute ::= Name Eq AttValue
7316: *
1.98 daniel 7317: * [ WFC: No External Entity References ]
7318: * Attribute values cannot contain direct or indirect entity references
7319: * to external entities.
7320: *
7321: * [ WFC: No < in Attribute Values ]
7322: * The replacement text of any entity referred to directly or indirectly in
7323: * an attribute value (other than "<") must not contain a <.
7324: *
7325: * [ VC: Attribute Value Type ]
1.117 daniel 7326: * The attribute must have been declared; the value must be of the type
1.99 daniel 7327: * declared for it.
1.98 daniel 7328: *
1.22 daniel 7329: * [25] Eq ::= S? '=' S?
7330: *
1.29 daniel 7331: * With namespace:
7332: *
7333: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7334: *
7335: * Also the case QName == xmlns:??? is handled independently as a namespace
7336: * definition.
1.69 daniel 7337: *
1.72 daniel 7338: * Returns the attribute name, and the value in *value.
1.3 veillard 7339: */
7340:
1.123 daniel 7341: xmlChar *
7342: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7343: xmlChar *name, *val;
1.3 veillard 7344:
1.72 daniel 7345: *value = NULL;
7346: name = xmlParseName(ctxt);
1.22 daniel 7347: if (name == NULL) {
1.55 daniel 7348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7349: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7350: ctxt->wellFormed = 0;
1.180 daniel 7351: ctxt->disableSAX = 1;
1.123 daniel 7352: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7353: return(NULL);
1.3 veillard 7354: }
7355:
7356: /*
1.29 daniel 7357: * read the value
1.3 veillard 7358: */
1.42 daniel 7359: SKIP_BLANKS;
1.152 daniel 7360: if (RAW == '=') {
1.40 daniel 7361: NEXT;
1.42 daniel 7362: SKIP_BLANKS;
1.72 daniel 7363: val = xmlParseAttValue(ctxt);
1.96 daniel 7364: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7365: } else {
1.55 daniel 7366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7367: ctxt->sax->error(ctxt->userData,
1.59 daniel 7368: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7369: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7370: ctxt->wellFormed = 0;
1.180 daniel 7371: ctxt->disableSAX = 1;
1.170 daniel 7372: xmlFree(name);
1.52 daniel 7373: return(NULL);
1.43 daniel 7374: }
7375:
1.172 daniel 7376: /*
7377: * Check that xml:lang conforms to the specification
7378: */
7379: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7380: if (!xmlCheckLanguageID(val)) {
7381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7382: ctxt->sax->error(ctxt->userData,
7383: "Invalid value for xml:lang : %s\n", val);
7384: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7385: ctxt->wellFormed = 0;
1.180 daniel 7386: ctxt->disableSAX = 1;
1.172 daniel 7387: }
7388: }
7389:
1.176 daniel 7390: /*
7391: * Check that xml:space conforms to the specification
7392: */
7393: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7394: if (!xmlStrcmp(val, BAD_CAST "default"))
7395: *(ctxt->space) = 0;
7396: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7397: *(ctxt->space) = 1;
7398: else {
7399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7400: ctxt->sax->error(ctxt->userData,
7401: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7402: val);
7403: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7404: ctxt->wellFormed = 0;
1.180 daniel 7405: ctxt->disableSAX = 1;
1.176 daniel 7406: }
7407: }
7408:
1.72 daniel 7409: *value = val;
7410: return(name);
1.3 veillard 7411: }
7412:
1.50 daniel 7413: /**
7414: * xmlParseStartTag:
7415: * @ctxt: an XML parser context
7416: *
7417: * parse a start of tag either for rule element or
7418: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7419: *
7420: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7421: *
1.98 daniel 7422: * [ WFC: Unique Att Spec ]
7423: * No attribute name may appear more than once in the same start-tag or
7424: * empty-element tag.
7425: *
1.29 daniel 7426: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7427: *
1.98 daniel 7428: * [ WFC: Unique Att Spec ]
7429: * No attribute name may appear more than once in the same start-tag or
7430: * empty-element tag.
7431: *
1.29 daniel 7432: * With namespace:
7433: *
7434: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7435: *
7436: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7437: *
1.192 daniel 7438: * Returns the element name parsed
1.2 veillard 7439: */
7440:
1.123 daniel 7441: xmlChar *
1.69 daniel 7442: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7443: xmlChar *name;
7444: xmlChar *attname;
7445: xmlChar *attvalue;
7446: const xmlChar **atts = NULL;
1.72 daniel 7447: int nbatts = 0;
7448: int maxatts = 0;
7449: int i;
1.2 veillard 7450:
1.152 daniel 7451: if (RAW != '<') return(NULL);
1.40 daniel 7452: NEXT;
1.3 veillard 7453:
1.72 daniel 7454: name = xmlParseName(ctxt);
1.59 daniel 7455: if (name == NULL) {
7456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7457: ctxt->sax->error(ctxt->userData,
1.59 daniel 7458: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7459: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7460: ctxt->wellFormed = 0;
1.180 daniel 7461: ctxt->disableSAX = 1;
1.83 daniel 7462: return(NULL);
1.50 daniel 7463: }
7464:
7465: /*
1.3 veillard 7466: * Now parse the attributes, it ends up with the ending
7467: *
7468: * (S Attribute)* S?
7469: */
1.42 daniel 7470: SKIP_BLANKS;
1.91 daniel 7471: GROW;
1.168 daniel 7472:
1.153 daniel 7473: while ((IS_CHAR(RAW)) &&
1.152 daniel 7474: (RAW != '>') &&
7475: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7476: const xmlChar *q = CUR_PTR;
1.91 daniel 7477: int cons = ctxt->input->consumed;
1.29 daniel 7478:
1.72 daniel 7479: attname = xmlParseAttribute(ctxt, &attvalue);
7480: if ((attname != NULL) && (attvalue != NULL)) {
7481: /*
1.98 daniel 7482: * [ WFC: Unique Att Spec ]
7483: * No attribute name may appear more than once in the same
7484: * start-tag or empty-element tag.
1.72 daniel 7485: */
7486: for (i = 0; i < nbatts;i += 2) {
7487: if (!xmlStrcmp(atts[i], attname)) {
7488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7489: ctxt->sax->error(ctxt->userData,
7490: "Attribute %s redefined\n",
7491: attname);
1.72 daniel 7492: ctxt->wellFormed = 0;
1.180 daniel 7493: ctxt->disableSAX = 1;
1.123 daniel 7494: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7495: xmlFree(attname);
7496: xmlFree(attvalue);
1.98 daniel 7497: goto failed;
1.72 daniel 7498: }
7499: }
7500:
7501: /*
7502: * Add the pair to atts
7503: */
7504: if (atts == NULL) {
7505: maxatts = 10;
1.123 daniel 7506: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7507: if (atts == NULL) {
1.86 daniel 7508: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7509: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7510: return(NULL);
1.72 daniel 7511: }
1.127 daniel 7512: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7513: maxatts *= 2;
1.123 daniel 7514: atts = (const xmlChar **) xmlRealloc(atts,
7515: maxatts * sizeof(xmlChar *));
1.72 daniel 7516: if (atts == NULL) {
1.86 daniel 7517: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7518: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7519: return(NULL);
1.72 daniel 7520: }
7521: }
7522: atts[nbatts++] = attname;
7523: atts[nbatts++] = attvalue;
7524: atts[nbatts] = NULL;
7525: atts[nbatts + 1] = NULL;
1.176 daniel 7526: } else {
7527: if (attname != NULL)
7528: xmlFree(attname);
7529: if (attvalue != NULL)
7530: xmlFree(attvalue);
1.72 daniel 7531: }
7532:
1.116 daniel 7533: failed:
1.168 daniel 7534:
7535: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7536: break;
7537: if (!IS_BLANK(RAW)) {
7538: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7539: ctxt->sax->error(ctxt->userData,
7540: "attributes construct error\n");
7541: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7542: ctxt->wellFormed = 0;
1.180 daniel 7543: ctxt->disableSAX = 1;
1.168 daniel 7544: }
1.42 daniel 7545: SKIP_BLANKS;
1.91 daniel 7546: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7548: ctxt->sax->error(ctxt->userData,
1.31 daniel 7549: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7550: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7551: ctxt->wellFormed = 0;
1.180 daniel 7552: ctxt->disableSAX = 1;
1.29 daniel 7553: break;
1.3 veillard 7554: }
1.91 daniel 7555: GROW;
1.3 veillard 7556: }
7557:
1.43 daniel 7558: /*
1.72 daniel 7559: * SAX: Start of Element !
1.43 daniel 7560: */
1.171 daniel 7561: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7562: (!ctxt->disableSAX))
1.74 daniel 7563: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7564:
1.72 daniel 7565: if (atts != NULL) {
1.123 daniel 7566: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7567: xmlFree(atts);
1.72 daniel 7568: }
1.83 daniel 7569: return(name);
1.3 veillard 7570: }
7571:
1.50 daniel 7572: /**
7573: * xmlParseEndTag:
7574: * @ctxt: an XML parser context
7575: *
7576: * parse an end of tag
1.27 daniel 7577: *
7578: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7579: *
7580: * With namespace
7581: *
1.72 daniel 7582: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7583: */
7584:
1.55 daniel 7585: void
1.140 daniel 7586: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7587: xmlChar *name;
1.140 daniel 7588: xmlChar *oldname;
1.7 veillard 7589:
1.91 daniel 7590: GROW;
1.152 daniel 7591: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7593: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7594: ctxt->wellFormed = 0;
1.180 daniel 7595: ctxt->disableSAX = 1;
1.123 daniel 7596: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7597: return;
7598: }
1.40 daniel 7599: SKIP(2);
1.7 veillard 7600:
1.72 daniel 7601: name = xmlParseName(ctxt);
1.7 veillard 7602:
7603: /*
7604: * We should definitely be at the ending "S? '>'" part
7605: */
1.91 daniel 7606: GROW;
1.42 daniel 7607: SKIP_BLANKS;
1.153 daniel 7608: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7610: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7611: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7612: ctxt->wellFormed = 0;
1.180 daniel 7613: ctxt->disableSAX = 1;
1.7 veillard 7614: } else
1.40 daniel 7615: NEXT;
1.7 veillard 7616:
1.72 daniel 7617: /*
1.98 daniel 7618: * [ WFC: Element Type Match ]
7619: * The Name in an element's end-tag must match the element type in the
7620: * start-tag.
7621: *
1.83 daniel 7622: */
1.147 daniel 7623: if ((name == NULL) || (ctxt->name == NULL) ||
7624: (xmlStrcmp(name, ctxt->name))) {
7625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7626: if ((name != NULL) && (ctxt->name != NULL)) {
7627: ctxt->sax->error(ctxt->userData,
7628: "Opening and ending tag mismatch: %s and %s\n",
7629: ctxt->name, name);
7630: } else if (ctxt->name != NULL) {
7631: ctxt->sax->error(ctxt->userData,
7632: "Ending tag eror for: %s\n", ctxt->name);
7633: } else {
7634: ctxt->sax->error(ctxt->userData,
7635: "Ending tag error: internal error ???\n");
7636: }
1.122 daniel 7637:
1.147 daniel 7638: }
1.123 daniel 7639: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7640: ctxt->wellFormed = 0;
1.180 daniel 7641: ctxt->disableSAX = 1;
1.83 daniel 7642: }
7643:
7644: /*
1.72 daniel 7645: * SAX: End of Tag
7646: */
1.171 daniel 7647: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7648: (!ctxt->disableSAX))
1.74 daniel 7649: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7650:
7651: if (name != NULL)
1.119 daniel 7652: xmlFree(name);
1.140 daniel 7653: oldname = namePop(ctxt);
1.176 daniel 7654: spacePop(ctxt);
1.140 daniel 7655: if (oldname != NULL) {
7656: #ifdef DEBUG_STACK
7657: fprintf(stderr,"Close: popped %s\n", oldname);
7658: #endif
7659: xmlFree(oldname);
7660: }
1.7 veillard 7661: return;
7662: }
7663:
1.50 daniel 7664: /**
7665: * xmlParseCDSect:
7666: * @ctxt: an XML parser context
7667: *
7668: * Parse escaped pure raw content.
1.29 daniel 7669: *
7670: * [18] CDSect ::= CDStart CData CDEnd
7671: *
7672: * [19] CDStart ::= '<![CDATA['
7673: *
7674: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7675: *
7676: * [21] CDEnd ::= ']]>'
1.3 veillard 7677: */
1.55 daniel 7678: void
7679: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7680: xmlChar *buf = NULL;
7681: int len = 0;
1.140 daniel 7682: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7683: int r, rl;
7684: int s, sl;
7685: int cur, l;
1.3 veillard 7686:
1.106 daniel 7687: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7688: (NXT(2) == '[') && (NXT(3) == 'C') &&
7689: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7690: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7691: (NXT(8) == '[')) {
7692: SKIP(9);
1.29 daniel 7693: } else
1.45 daniel 7694: return;
1.109 daniel 7695:
7696: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7697: r = CUR_CHAR(rl);
7698: if (!IS_CHAR(r)) {
1.55 daniel 7699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7700: ctxt->sax->error(ctxt->userData,
1.135 daniel 7701: "CData section not finished\n");
1.59 daniel 7702: ctxt->wellFormed = 0;
1.180 daniel 7703: ctxt->disableSAX = 1;
1.123 daniel 7704: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7705: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7706: return;
1.3 veillard 7707: }
1.152 daniel 7708: NEXTL(rl);
7709: s = CUR_CHAR(sl);
7710: if (!IS_CHAR(s)) {
1.55 daniel 7711: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7712: ctxt->sax->error(ctxt->userData,
1.135 daniel 7713: "CData section not finished\n");
1.123 daniel 7714: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7715: ctxt->wellFormed = 0;
1.180 daniel 7716: ctxt->disableSAX = 1;
1.109 daniel 7717: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7718: return;
1.3 veillard 7719: }
1.152 daniel 7720: NEXTL(sl);
7721: cur = CUR_CHAR(l);
1.135 daniel 7722: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7723: if (buf == NULL) {
7724: fprintf(stderr, "malloc of %d byte failed\n", size);
7725: return;
7726: }
1.108 veillard 7727: while (IS_CHAR(cur) &&
1.110 daniel 7728: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7729: if (len + 5 >= size) {
1.135 daniel 7730: size *= 2;
7731: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7732: if (buf == NULL) {
7733: fprintf(stderr, "realloc of %d byte failed\n", size);
7734: return;
7735: }
7736: }
1.152 daniel 7737: COPY_BUF(rl,buf,len,r);
1.110 daniel 7738: r = s;
1.152 daniel 7739: rl = sl;
1.110 daniel 7740: s = cur;
1.152 daniel 7741: sl = l;
7742: NEXTL(l);
7743: cur = CUR_CHAR(l);
1.3 veillard 7744: }
1.135 daniel 7745: buf[len] = 0;
1.109 daniel 7746: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7747: if (cur != '>') {
1.55 daniel 7748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7749: ctxt->sax->error(ctxt->userData,
1.135 daniel 7750: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7751: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7752: ctxt->wellFormed = 0;
1.180 daniel 7753: ctxt->disableSAX = 1;
1.135 daniel 7754: xmlFree(buf);
1.45 daniel 7755: return;
1.3 veillard 7756: }
1.152 daniel 7757: NEXTL(l);
1.16 daniel 7758:
1.45 daniel 7759: /*
1.135 daniel 7760: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7761: */
1.171 daniel 7762: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7763: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7764: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7765: }
1.135 daniel 7766: xmlFree(buf);
1.2 veillard 7767: }
7768:
1.50 daniel 7769: /**
7770: * xmlParseContent:
7771: * @ctxt: an XML parser context
7772: *
7773: * Parse a content:
1.2 veillard 7774: *
1.27 daniel 7775: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7776: */
7777:
1.55 daniel 7778: void
7779: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7780: GROW;
1.176 daniel 7781: while (((RAW != 0) || (ctxt->token != 0)) &&
7782: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7783: const xmlChar *test = CUR_PTR;
1.91 daniel 7784: int cons = ctxt->input->consumed;
1.123 daniel 7785: xmlChar tok = ctxt->token;
1.27 daniel 7786:
7787: /*
1.152 daniel 7788: * Handle possible processed charrefs.
7789: */
7790: if (ctxt->token != 0) {
7791: xmlParseCharData(ctxt, 0);
7792: }
7793: /*
1.27 daniel 7794: * First case : a Processing Instruction.
7795: */
1.152 daniel 7796: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7797: xmlParsePI(ctxt);
7798: }
1.72 daniel 7799:
1.27 daniel 7800: /*
7801: * Second case : a CDSection
7802: */
1.152 daniel 7803: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7804: (NXT(2) == '[') && (NXT(3) == 'C') &&
7805: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7806: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7807: (NXT(8) == '[')) {
1.45 daniel 7808: xmlParseCDSect(ctxt);
1.27 daniel 7809: }
1.72 daniel 7810:
1.27 daniel 7811: /*
7812: * Third case : a comment
7813: */
1.152 daniel 7814: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7815: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7816: xmlParseComment(ctxt);
1.97 daniel 7817: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7818: }
1.72 daniel 7819:
1.27 daniel 7820: /*
7821: * Fourth case : a sub-element.
7822: */
1.152 daniel 7823: else if (RAW == '<') {
1.72 daniel 7824: xmlParseElement(ctxt);
1.45 daniel 7825: }
1.72 daniel 7826:
1.45 daniel 7827: /*
1.50 daniel 7828: * Fifth case : a reference. If if has not been resolved,
7829: * parsing returns it's Name, create the node
1.45 daniel 7830: */
1.97 daniel 7831:
1.152 daniel 7832: else if (RAW == '&') {
1.77 daniel 7833: xmlParseReference(ctxt);
1.27 daniel 7834: }
1.72 daniel 7835:
1.27 daniel 7836: /*
7837: * Last case, text. Note that References are handled directly.
7838: */
7839: else {
1.45 daniel 7840: xmlParseCharData(ctxt, 0);
1.3 veillard 7841: }
1.14 veillard 7842:
1.91 daniel 7843: GROW;
1.14 veillard 7844: /*
1.45 daniel 7845: * Pop-up of finished entities.
1.14 veillard 7846: */
1.152 daniel 7847: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7848: xmlPopInput(ctxt);
1.135 daniel 7849: SHRINK;
1.45 daniel 7850:
1.113 daniel 7851: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7852: (tok == ctxt->token)) {
1.55 daniel 7853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7854: ctxt->sax->error(ctxt->userData,
1.59 daniel 7855: "detected an error in element content\n");
1.123 daniel 7856: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7857: ctxt->wellFormed = 0;
1.180 daniel 7858: ctxt->disableSAX = 1;
1.29 daniel 7859: break;
7860: }
1.3 veillard 7861: }
1.2 veillard 7862: }
7863:
1.50 daniel 7864: /**
7865: * xmlParseElement:
7866: * @ctxt: an XML parser context
7867: *
7868: * parse an XML element, this is highly recursive
1.26 daniel 7869: *
7870: * [39] element ::= EmptyElemTag | STag content ETag
7871: *
1.98 daniel 7872: * [ WFC: Element Type Match ]
7873: * The Name in an element's end-tag must match the element type in the
7874: * start-tag.
7875: *
7876: * [ VC: Element Valid ]
1.117 daniel 7877: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7878: * where the Name matches the element type and one of the following holds:
7879: * - The declaration matches EMPTY and the element has no content.
7880: * - The declaration matches children and the sequence of child elements
7881: * belongs to the language generated by the regular expression in the
7882: * content model, with optional white space (characters matching the
7883: * nonterminal S) between each pair of child elements.
7884: * - The declaration matches Mixed and the content consists of character
7885: * data and child elements whose types match names in the content model.
7886: * - The declaration matches ANY, and the types of any child elements have
7887: * been declared.
1.2 veillard 7888: */
1.26 daniel 7889:
1.72 daniel 7890: void
1.69 daniel 7891: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7892: const xmlChar *openTag = CUR_PTR;
7893: xmlChar *name;
1.140 daniel 7894: xmlChar *oldname;
1.32 daniel 7895: xmlParserNodeInfo node_info;
1.118 daniel 7896: xmlNodePtr ret;
1.2 veillard 7897:
1.32 daniel 7898: /* Capture start position */
1.118 daniel 7899: if (ctxt->record_info) {
7900: node_info.begin_pos = ctxt->input->consumed +
7901: (CUR_PTR - ctxt->input->base);
7902: node_info.begin_line = ctxt->input->line;
7903: }
1.32 daniel 7904:
1.176 daniel 7905: if (ctxt->spaceNr == 0)
7906: spacePush(ctxt, -1);
7907: else
7908: spacePush(ctxt, *ctxt->space);
7909:
1.83 daniel 7910: name = xmlParseStartTag(ctxt);
7911: if (name == NULL) {
1.176 daniel 7912: spacePop(ctxt);
1.83 daniel 7913: return;
7914: }
1.140 daniel 7915: namePush(ctxt, name);
1.118 daniel 7916: ret = ctxt->node;
1.2 veillard 7917:
7918: /*
1.99 daniel 7919: * [ VC: Root Element Type ]
7920: * The Name in the document type declaration must match the element
7921: * type of the root element.
7922: */
1.105 daniel 7923: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7924: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7925: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7926:
7927: /*
1.2 veillard 7928: * Check for an Empty Element.
7929: */
1.152 daniel 7930: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7931: SKIP(2);
1.171 daniel 7932: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7933: (!ctxt->disableSAX))
1.83 daniel 7934: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7935: oldname = namePop(ctxt);
1.176 daniel 7936: spacePop(ctxt);
1.140 daniel 7937: if (oldname != NULL) {
7938: #ifdef DEBUG_STACK
7939: fprintf(stderr,"Close: popped %s\n", oldname);
7940: #endif
7941: xmlFree(oldname);
7942: }
1.72 daniel 7943: return;
1.2 veillard 7944: }
1.152 daniel 7945: if (RAW == '>') {
1.91 daniel 7946: NEXT;
7947: } else {
1.55 daniel 7948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7949: ctxt->sax->error(ctxt->userData,
7950: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7951: openTag);
1.59 daniel 7952: ctxt->wellFormed = 0;
1.180 daniel 7953: ctxt->disableSAX = 1;
1.123 daniel 7954: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7955:
7956: /*
7957: * end of parsing of this node.
7958: */
7959: nodePop(ctxt);
1.140 daniel 7960: oldname = namePop(ctxt);
1.176 daniel 7961: spacePop(ctxt);
1.140 daniel 7962: if (oldname != NULL) {
7963: #ifdef DEBUG_STACK
7964: fprintf(stderr,"Close: popped %s\n", oldname);
7965: #endif
7966: xmlFree(oldname);
7967: }
1.118 daniel 7968:
7969: /*
7970: * Capture end position and add node
7971: */
7972: if ( ret != NULL && ctxt->record_info ) {
7973: node_info.end_pos = ctxt->input->consumed +
7974: (CUR_PTR - ctxt->input->base);
7975: node_info.end_line = ctxt->input->line;
7976: node_info.node = ret;
7977: xmlParserAddNodeInfo(ctxt, &node_info);
7978: }
1.72 daniel 7979: return;
1.2 veillard 7980: }
7981:
7982: /*
7983: * Parse the content of the element:
7984: */
1.45 daniel 7985: xmlParseContent(ctxt);
1.153 daniel 7986: if (!IS_CHAR(RAW)) {
1.55 daniel 7987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7988: ctxt->sax->error(ctxt->userData,
1.57 daniel 7989: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 7990: ctxt->wellFormed = 0;
1.180 daniel 7991: ctxt->disableSAX = 1;
1.123 daniel 7992: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 7993:
7994: /*
7995: * end of parsing of this node.
7996: */
7997: nodePop(ctxt);
1.140 daniel 7998: oldname = namePop(ctxt);
1.176 daniel 7999: spacePop(ctxt);
1.140 daniel 8000: if (oldname != NULL) {
8001: #ifdef DEBUG_STACK
8002: fprintf(stderr,"Close: popped %s\n", oldname);
8003: #endif
8004: xmlFree(oldname);
8005: }
1.72 daniel 8006: return;
1.2 veillard 8007: }
8008:
8009: /*
1.27 daniel 8010: * parse the end of tag: '</' should be here.
1.2 veillard 8011: */
1.140 daniel 8012: xmlParseEndTag(ctxt);
1.118 daniel 8013:
8014: /*
8015: * Capture end position and add node
8016: */
8017: if ( ret != NULL && ctxt->record_info ) {
8018: node_info.end_pos = ctxt->input->consumed +
8019: (CUR_PTR - ctxt->input->base);
8020: node_info.end_line = ctxt->input->line;
8021: node_info.node = ret;
8022: xmlParserAddNodeInfo(ctxt, &node_info);
8023: }
1.2 veillard 8024: }
8025:
1.50 daniel 8026: /**
8027: * xmlParseVersionNum:
8028: * @ctxt: an XML parser context
8029: *
8030: * parse the XML version value.
1.29 daniel 8031: *
8032: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8033: *
8034: * Returns the string giving the XML version number, or NULL
1.29 daniel 8035: */
1.123 daniel 8036: xmlChar *
1.55 daniel 8037: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8038: xmlChar *buf = NULL;
8039: int len = 0;
8040: int size = 10;
8041: xmlChar cur;
1.29 daniel 8042:
1.135 daniel 8043: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8044: if (buf == NULL) {
8045: fprintf(stderr, "malloc of %d byte failed\n", size);
8046: return(NULL);
8047: }
8048: cur = CUR;
1.152 daniel 8049: while (((cur >= 'a') && (cur <= 'z')) ||
8050: ((cur >= 'A') && (cur <= 'Z')) ||
8051: ((cur >= '0') && (cur <= '9')) ||
8052: (cur == '_') || (cur == '.') ||
8053: (cur == ':') || (cur == '-')) {
1.135 daniel 8054: if (len + 1 >= size) {
8055: size *= 2;
8056: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8057: if (buf == NULL) {
8058: fprintf(stderr, "realloc of %d byte failed\n", size);
8059: return(NULL);
8060: }
8061: }
8062: buf[len++] = cur;
8063: NEXT;
8064: cur=CUR;
8065: }
8066: buf[len] = 0;
8067: return(buf);
1.29 daniel 8068: }
8069:
1.50 daniel 8070: /**
8071: * xmlParseVersionInfo:
8072: * @ctxt: an XML parser context
8073: *
8074: * parse the XML version.
1.29 daniel 8075: *
8076: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8077: *
8078: * [25] Eq ::= S? '=' S?
1.50 daniel 8079: *
1.68 daniel 8080: * Returns the version string, e.g. "1.0"
1.29 daniel 8081: */
8082:
1.123 daniel 8083: xmlChar *
1.55 daniel 8084: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8085: xmlChar *version = NULL;
8086: const xmlChar *q;
1.29 daniel 8087:
1.152 daniel 8088: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8089: (NXT(2) == 'r') && (NXT(3) == 's') &&
8090: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8091: (NXT(6) == 'n')) {
8092: SKIP(7);
1.42 daniel 8093: SKIP_BLANKS;
1.152 daniel 8094: if (RAW != '=') {
1.55 daniel 8095: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8096: ctxt->sax->error(ctxt->userData,
8097: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8098: ctxt->wellFormed = 0;
1.180 daniel 8099: ctxt->disableSAX = 1;
1.123 daniel 8100: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8101: return(NULL);
8102: }
1.40 daniel 8103: NEXT;
1.42 daniel 8104: SKIP_BLANKS;
1.152 daniel 8105: if (RAW == '"') {
1.40 daniel 8106: NEXT;
8107: q = CUR_PTR;
1.29 daniel 8108: version = xmlParseVersionNum(ctxt);
1.152 daniel 8109: if (RAW != '"') {
1.55 daniel 8110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8111: ctxt->sax->error(ctxt->userData,
8112: "String not closed\n%.50s\n", q);
1.59 daniel 8113: ctxt->wellFormed = 0;
1.180 daniel 8114: ctxt->disableSAX = 1;
1.123 daniel 8115: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8116: } else
1.40 daniel 8117: NEXT;
1.152 daniel 8118: } else if (RAW == '\''){
1.40 daniel 8119: NEXT;
8120: q = CUR_PTR;
1.29 daniel 8121: version = xmlParseVersionNum(ctxt);
1.152 daniel 8122: if (RAW != '\'') {
1.55 daniel 8123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8124: ctxt->sax->error(ctxt->userData,
8125: "String not closed\n%.50s\n", q);
1.123 daniel 8126: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8127: ctxt->wellFormed = 0;
1.180 daniel 8128: ctxt->disableSAX = 1;
1.55 daniel 8129: } else
1.40 daniel 8130: NEXT;
1.31 daniel 8131: } else {
1.55 daniel 8132: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8133: ctxt->sax->error(ctxt->userData,
1.59 daniel 8134: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8135: ctxt->wellFormed = 0;
1.180 daniel 8136: ctxt->disableSAX = 1;
1.123 daniel 8137: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8138: }
8139: }
8140: return(version);
8141: }
8142:
1.50 daniel 8143: /**
8144: * xmlParseEncName:
8145: * @ctxt: an XML parser context
8146: *
8147: * parse the XML encoding name
1.29 daniel 8148: *
8149: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8150: *
1.68 daniel 8151: * Returns the encoding name value or NULL
1.29 daniel 8152: */
1.123 daniel 8153: xmlChar *
1.55 daniel 8154: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8155: xmlChar *buf = NULL;
8156: int len = 0;
8157: int size = 10;
8158: xmlChar cur;
1.29 daniel 8159:
1.135 daniel 8160: cur = CUR;
8161: if (((cur >= 'a') && (cur <= 'z')) ||
8162: ((cur >= 'A') && (cur <= 'Z'))) {
8163: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8164: if (buf == NULL) {
8165: fprintf(stderr, "malloc of %d byte failed\n", size);
8166: return(NULL);
8167: }
8168:
8169: buf[len++] = cur;
1.40 daniel 8170: NEXT;
1.135 daniel 8171: cur = CUR;
1.152 daniel 8172: while (((cur >= 'a') && (cur <= 'z')) ||
8173: ((cur >= 'A') && (cur <= 'Z')) ||
8174: ((cur >= '0') && (cur <= '9')) ||
8175: (cur == '.') || (cur == '_') ||
8176: (cur == '-')) {
1.135 daniel 8177: if (len + 1 >= size) {
8178: size *= 2;
8179: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8180: if (buf == NULL) {
8181: fprintf(stderr, "realloc of %d byte failed\n", size);
8182: return(NULL);
8183: }
8184: }
8185: buf[len++] = cur;
8186: NEXT;
8187: cur = CUR;
8188: if (cur == 0) {
8189: SHRINK;
8190: GROW;
8191: cur = CUR;
8192: }
8193: }
8194: buf[len] = 0;
1.29 daniel 8195: } else {
1.55 daniel 8196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8197: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8198: ctxt->wellFormed = 0;
1.180 daniel 8199: ctxt->disableSAX = 1;
1.123 daniel 8200: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8201: }
1.135 daniel 8202: return(buf);
1.29 daniel 8203: }
8204:
1.50 daniel 8205: /**
8206: * xmlParseEncodingDecl:
8207: * @ctxt: an XML parser context
8208: *
8209: * parse the XML encoding declaration
1.29 daniel 8210: *
8211: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8212: *
8213: * TODO: this should setup the conversion filters.
8214: *
1.68 daniel 8215: * Returns the encoding value or NULL
1.29 daniel 8216: */
8217:
1.123 daniel 8218: xmlChar *
1.55 daniel 8219: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8220: xmlChar *encoding = NULL;
8221: const xmlChar *q;
1.29 daniel 8222:
1.42 daniel 8223: SKIP_BLANKS;
1.152 daniel 8224: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8225: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8226: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8227: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8228: SKIP(8);
1.42 daniel 8229: SKIP_BLANKS;
1.152 daniel 8230: if (RAW != '=') {
1.55 daniel 8231: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8232: ctxt->sax->error(ctxt->userData,
8233: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8234: ctxt->wellFormed = 0;
1.180 daniel 8235: ctxt->disableSAX = 1;
1.123 daniel 8236: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8237: return(NULL);
8238: }
1.40 daniel 8239: NEXT;
1.42 daniel 8240: SKIP_BLANKS;
1.152 daniel 8241: if (RAW == '"') {
1.40 daniel 8242: NEXT;
8243: q = CUR_PTR;
1.29 daniel 8244: encoding = xmlParseEncName(ctxt);
1.152 daniel 8245: if (RAW != '"') {
1.55 daniel 8246: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8247: ctxt->sax->error(ctxt->userData,
8248: "String not closed\n%.50s\n", q);
1.59 daniel 8249: ctxt->wellFormed = 0;
1.180 daniel 8250: ctxt->disableSAX = 1;
1.123 daniel 8251: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8252: } else
1.40 daniel 8253: NEXT;
1.152 daniel 8254: } else if (RAW == '\''){
1.40 daniel 8255: NEXT;
8256: q = CUR_PTR;
1.29 daniel 8257: encoding = xmlParseEncName(ctxt);
1.152 daniel 8258: if (RAW != '\'') {
1.55 daniel 8259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8260: ctxt->sax->error(ctxt->userData,
8261: "String not closed\n%.50s\n", q);
1.59 daniel 8262: ctxt->wellFormed = 0;
1.180 daniel 8263: ctxt->disableSAX = 1;
1.123 daniel 8264: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8265: } else
1.40 daniel 8266: NEXT;
1.152 daniel 8267: } else if (RAW == '"'){
1.55 daniel 8268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8269: ctxt->sax->error(ctxt->userData,
1.59 daniel 8270: "xmlParseEncodingDecl : expected ' or \"\n");
8271: ctxt->wellFormed = 0;
1.180 daniel 8272: ctxt->disableSAX = 1;
1.123 daniel 8273: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8274: }
1.193 daniel 8275: if (encoding != NULL) {
8276: xmlCharEncoding enc;
8277: xmlCharEncodingHandlerPtr handler;
8278:
1.195 daniel 8279: if (ctxt->input->encoding != NULL)
8280: xmlFree((xmlChar *) ctxt->input->encoding);
8281: ctxt->input->encoding = encoding;
8282:
1.193 daniel 8283: enc = xmlParseCharEncoding((const char *) encoding);
8284: /*
8285: * registered set of known encodings
8286: */
8287: if (enc != XML_CHAR_ENCODING_ERROR) {
8288: xmlSwitchEncoding(ctxt, enc);
8289: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8290: xmlFree(encoding);
8291: return(NULL);
8292: }
8293: } else {
8294: /*
8295: * fallback for unknown encodings
8296: */
8297: handler = xmlFindCharEncodingHandler((const char *) encoding);
8298: if (handler != NULL) {
8299: xmlSwitchToEncoding(ctxt, handler);
8300: } else {
8301: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8302: xmlFree(encoding);
8303: return(NULL);
8304: }
8305: }
8306: }
1.29 daniel 8307: }
8308: return(encoding);
8309: }
8310:
1.50 daniel 8311: /**
8312: * xmlParseSDDecl:
8313: * @ctxt: an XML parser context
8314: *
8315: * parse the XML standalone declaration
1.29 daniel 8316: *
8317: * [32] SDDecl ::= S 'standalone' Eq
8318: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8319: *
8320: * [ VC: Standalone Document Declaration ]
8321: * TODO The standalone document declaration must have the value "no"
8322: * if any external markup declarations contain declarations of:
8323: * - attributes with default values, if elements to which these
8324: * attributes apply appear in the document without specifications
8325: * of values for these attributes, or
8326: * - entities (other than amp, lt, gt, apos, quot), if references
8327: * to those entities appear in the document, or
8328: * - attributes with values subject to normalization, where the
8329: * attribute appears in the document with a value which will change
8330: * as a result of normalization, or
8331: * - element types with element content, if white space occurs directly
8332: * within any instance of those types.
1.68 daniel 8333: *
8334: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8335: */
8336:
1.55 daniel 8337: int
8338: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8339: int standalone = -1;
8340:
1.42 daniel 8341: SKIP_BLANKS;
1.152 daniel 8342: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8343: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8344: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8345: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8346: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8347: SKIP(10);
1.81 daniel 8348: SKIP_BLANKS;
1.152 daniel 8349: if (RAW != '=') {
1.55 daniel 8350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8351: ctxt->sax->error(ctxt->userData,
1.59 daniel 8352: "XML standalone declaration : expected '='\n");
1.123 daniel 8353: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8354: ctxt->wellFormed = 0;
1.180 daniel 8355: ctxt->disableSAX = 1;
1.32 daniel 8356: return(standalone);
8357: }
1.40 daniel 8358: NEXT;
1.42 daniel 8359: SKIP_BLANKS;
1.152 daniel 8360: if (RAW == '\''){
1.40 daniel 8361: NEXT;
1.152 daniel 8362: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8363: standalone = 0;
1.40 daniel 8364: SKIP(2);
1.152 daniel 8365: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8366: (NXT(2) == 's')) {
1.29 daniel 8367: standalone = 1;
1.40 daniel 8368: SKIP(3);
1.29 daniel 8369: } else {
1.55 daniel 8370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8371: ctxt->sax->error(ctxt->userData,
8372: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8373: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8374: ctxt->wellFormed = 0;
1.180 daniel 8375: ctxt->disableSAX = 1;
1.29 daniel 8376: }
1.152 daniel 8377: if (RAW != '\'') {
1.55 daniel 8378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8379: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8380: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8381: ctxt->wellFormed = 0;
1.180 daniel 8382: ctxt->disableSAX = 1;
1.55 daniel 8383: } else
1.40 daniel 8384: NEXT;
1.152 daniel 8385: } else if (RAW == '"'){
1.40 daniel 8386: NEXT;
1.152 daniel 8387: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8388: standalone = 0;
1.40 daniel 8389: SKIP(2);
1.152 daniel 8390: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8391: (NXT(2) == 's')) {
1.29 daniel 8392: standalone = 1;
1.40 daniel 8393: SKIP(3);
1.29 daniel 8394: } else {
1.55 daniel 8395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8396: ctxt->sax->error(ctxt->userData,
1.59 daniel 8397: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8398: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8399: ctxt->wellFormed = 0;
1.180 daniel 8400: ctxt->disableSAX = 1;
1.29 daniel 8401: }
1.152 daniel 8402: if (RAW != '"') {
1.55 daniel 8403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8404: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8405: ctxt->wellFormed = 0;
1.180 daniel 8406: ctxt->disableSAX = 1;
1.123 daniel 8407: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8408: } else
1.40 daniel 8409: NEXT;
1.37 daniel 8410: } else {
1.55 daniel 8411: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8412: ctxt->sax->error(ctxt->userData,
8413: "Standalone value not found\n");
1.59 daniel 8414: ctxt->wellFormed = 0;
1.180 daniel 8415: ctxt->disableSAX = 1;
1.123 daniel 8416: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8417: }
1.29 daniel 8418: }
8419: return(standalone);
8420: }
8421:
1.50 daniel 8422: /**
8423: * xmlParseXMLDecl:
8424: * @ctxt: an XML parser context
8425: *
8426: * parse an XML declaration header
1.29 daniel 8427: *
8428: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8429: */
8430:
1.55 daniel 8431: void
8432: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8433: xmlChar *version;
1.1 veillard 8434:
8435: /*
1.19 daniel 8436: * We know that '<?xml' is here.
1.1 veillard 8437: */
1.40 daniel 8438: SKIP(5);
1.1 veillard 8439:
1.153 daniel 8440: if (!IS_BLANK(RAW)) {
1.59 daniel 8441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8442: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8443: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8444: ctxt->wellFormed = 0;
1.180 daniel 8445: ctxt->disableSAX = 1;
1.59 daniel 8446: }
1.42 daniel 8447: SKIP_BLANKS;
1.1 veillard 8448:
8449: /*
1.29 daniel 8450: * We should have the VersionInfo here.
1.1 veillard 8451: */
1.29 daniel 8452: version = xmlParseVersionInfo(ctxt);
8453: if (version == NULL)
1.45 daniel 8454: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8455: ctxt->version = xmlStrdup(version);
1.119 daniel 8456: xmlFree(version);
1.29 daniel 8457:
8458: /*
8459: * We may have the encoding declaration
8460: */
1.153 daniel 8461: if (!IS_BLANK(RAW)) {
1.152 daniel 8462: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8463: SKIP(2);
8464: return;
8465: }
8466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8467: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8468: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8469: ctxt->wellFormed = 0;
1.180 daniel 8470: ctxt->disableSAX = 1;
1.59 daniel 8471: }
1.195 daniel 8472: xmlParseEncodingDecl(ctxt);
1.193 daniel 8473: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8474: /*
8475: * The XML REC instructs us to stop parsing right here
8476: */
8477: return;
8478: }
1.1 veillard 8479:
8480: /*
1.29 daniel 8481: * We may have the standalone status.
1.1 veillard 8482: */
1.164 daniel 8483: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8484: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8485: SKIP(2);
8486: return;
8487: }
8488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8489: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8490: ctxt->wellFormed = 0;
1.180 daniel 8491: ctxt->disableSAX = 1;
1.123 daniel 8492: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8493: }
8494: SKIP_BLANKS;
1.167 daniel 8495: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8496:
1.42 daniel 8497: SKIP_BLANKS;
1.152 daniel 8498: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8499: SKIP(2);
1.152 daniel 8500: } else if (RAW == '>') {
1.31 daniel 8501: /* Deprecated old WD ... */
1.55 daniel 8502: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8503: ctxt->sax->error(ctxt->userData,
8504: "XML declaration must end-up with '?>'\n");
1.59 daniel 8505: ctxt->wellFormed = 0;
1.180 daniel 8506: ctxt->disableSAX = 1;
1.123 daniel 8507: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8508: NEXT;
1.29 daniel 8509: } else {
1.55 daniel 8510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8511: ctxt->sax->error(ctxt->userData,
8512: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8513: ctxt->wellFormed = 0;
1.180 daniel 8514: ctxt->disableSAX = 1;
1.123 daniel 8515: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8516: MOVETO_ENDTAG(CUR_PTR);
8517: NEXT;
1.29 daniel 8518: }
1.1 veillard 8519: }
8520:
1.50 daniel 8521: /**
8522: * xmlParseMisc:
8523: * @ctxt: an XML parser context
8524: *
8525: * parse an XML Misc* optionnal field.
1.21 daniel 8526: *
1.22 daniel 8527: * [27] Misc ::= Comment | PI | S
1.1 veillard 8528: */
8529:
1.55 daniel 8530: void
8531: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8532: while (((RAW == '<') && (NXT(1) == '?')) ||
8533: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8534: (NXT(2) == '-') && (NXT(3) == '-')) ||
8535: IS_BLANK(CUR)) {
1.152 daniel 8536: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8537: xmlParsePI(ctxt);
1.40 daniel 8538: } else if (IS_BLANK(CUR)) {
8539: NEXT;
1.1 veillard 8540: } else
1.114 daniel 8541: xmlParseComment(ctxt);
1.1 veillard 8542: }
8543: }
8544:
1.50 daniel 8545: /**
1.181 daniel 8546: * xmlParseDocument:
1.50 daniel 8547: * @ctxt: an XML parser context
8548: *
8549: * parse an XML document (and build a tree if using the standard SAX
8550: * interface).
1.21 daniel 8551: *
1.22 daniel 8552: * [1] document ::= prolog element Misc*
1.29 daniel 8553: *
8554: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8555: *
1.68 daniel 8556: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8557: * as a result of the parsing.
1.1 veillard 8558: */
8559:
1.55 daniel 8560: int
8561: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8562: xmlChar start[4];
8563: xmlCharEncoding enc;
8564:
1.45 daniel 8565: xmlDefaultSAXHandlerInit();
8566:
1.91 daniel 8567: GROW;
8568:
1.14 veillard 8569: /*
1.44 daniel 8570: * SAX: beginning of the document processing.
8571: */
1.72 daniel 8572: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8573: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8574:
1.156 daniel 8575: /*
8576: * Get the 4 first bytes and decode the charset
8577: * if enc != XML_CHAR_ENCODING_NONE
8578: * plug some encoding conversion routines.
8579: */
8580: start[0] = RAW;
8581: start[1] = NXT(1);
8582: start[2] = NXT(2);
8583: start[3] = NXT(3);
8584: enc = xmlDetectCharEncoding(start, 4);
8585: if (enc != XML_CHAR_ENCODING_NONE) {
8586: xmlSwitchEncoding(ctxt, enc);
8587: }
8588:
1.1 veillard 8589:
1.59 daniel 8590: if (CUR == 0) {
8591: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8592: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8593: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8594: ctxt->wellFormed = 0;
1.180 daniel 8595: ctxt->disableSAX = 1;
1.59 daniel 8596: }
1.1 veillard 8597:
8598: /*
8599: * Check for the XMLDecl in the Prolog.
8600: */
1.91 daniel 8601: GROW;
1.152 daniel 8602: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8603: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8604: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 ! daniel 8605:
! 8606: /*
! 8607: * Note that we will switch encoding on the fly.
! 8608: */
1.19 daniel 8609: xmlParseXMLDecl(ctxt);
1.193 daniel 8610: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8611: /*
8612: * The XML REC instructs us to stop parsing right here
8613: */
8614: return(-1);
8615: }
1.167 daniel 8616: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8617: SKIP_BLANKS;
1.1 veillard 8618: } else {
1.72 daniel 8619: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8620: }
1.171 daniel 8621: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8622: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8623:
8624: /*
8625: * The Misc part of the Prolog
8626: */
1.91 daniel 8627: GROW;
1.16 daniel 8628: xmlParseMisc(ctxt);
1.1 veillard 8629:
8630: /*
1.29 daniel 8631: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8632: * (doctypedecl Misc*)?
8633: */
1.91 daniel 8634: GROW;
1.152 daniel 8635: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8636: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8637: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8638: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8639: (NXT(8) == 'E')) {
1.165 daniel 8640:
1.166 daniel 8641: ctxt->inSubset = 1;
1.22 daniel 8642: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8643: if (RAW == '[') {
1.140 daniel 8644: ctxt->instate = XML_PARSER_DTD;
8645: xmlParseInternalSubset(ctxt);
8646: }
1.165 daniel 8647:
8648: /*
8649: * Create and update the external subset.
8650: */
1.166 daniel 8651: ctxt->inSubset = 2;
1.171 daniel 8652: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8653: (!ctxt->disableSAX))
1.165 daniel 8654: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8655: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8656: ctxt->inSubset = 0;
1.165 daniel 8657:
8658:
1.96 daniel 8659: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8660: xmlParseMisc(ctxt);
1.21 daniel 8661: }
8662:
8663: /*
8664: * Time to start parsing the tree itself
1.1 veillard 8665: */
1.91 daniel 8666: GROW;
1.152 daniel 8667: if (RAW != '<') {
1.59 daniel 8668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8669: ctxt->sax->error(ctxt->userData,
1.151 daniel 8670: "Start tag expected, '<' not found\n");
1.140 daniel 8671: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8672: ctxt->wellFormed = 0;
1.180 daniel 8673: ctxt->disableSAX = 1;
1.140 daniel 8674: ctxt->instate = XML_PARSER_EOF;
8675: } else {
8676: ctxt->instate = XML_PARSER_CONTENT;
8677: xmlParseElement(ctxt);
8678: ctxt->instate = XML_PARSER_EPILOG;
8679:
8680:
8681: /*
8682: * The Misc part at the end
8683: */
8684: xmlParseMisc(ctxt);
8685:
1.152 daniel 8686: if (RAW != 0) {
1.140 daniel 8687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8688: ctxt->sax->error(ctxt->userData,
8689: "Extra content at the end of the document\n");
8690: ctxt->wellFormed = 0;
1.180 daniel 8691: ctxt->disableSAX = 1;
1.140 daniel 8692: ctxt->errNo = XML_ERR_DOCUMENT_END;
8693: }
8694: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8695: }
8696:
1.44 daniel 8697: /*
8698: * SAX: end of the document processing.
8699: */
1.171 daniel 8700: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8701: (!ctxt->disableSAX))
1.74 daniel 8702: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8703:
1.59 daniel 8704: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8705: return(0);
8706: }
8707:
1.98 daniel 8708: /************************************************************************
8709: * *
1.128 daniel 8710: * Progressive parsing interfaces *
8711: * *
8712: ************************************************************************/
8713:
8714: /**
8715: * xmlParseLookupSequence:
8716: * @ctxt: an XML parser context
8717: * @first: the first char to lookup
1.140 daniel 8718: * @next: the next char to lookup or zero
8719: * @third: the next char to lookup or zero
1.128 daniel 8720: *
1.140 daniel 8721: * Try to find if a sequence (first, next, third) or just (first next) or
8722: * (first) is available in the input stream.
8723: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8724: * to avoid rescanning sequences of bytes, it DOES change the state of the
8725: * parser, do not use liberally.
1.128 daniel 8726: *
1.140 daniel 8727: * Returns the index to the current parsing point if the full sequence
8728: * is available, -1 otherwise.
1.128 daniel 8729: */
8730: int
1.140 daniel 8731: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8732: xmlChar next, xmlChar third) {
8733: int base, len;
8734: xmlParserInputPtr in;
8735: const xmlChar *buf;
8736:
8737: in = ctxt->input;
8738: if (in == NULL) return(-1);
8739: base = in->cur - in->base;
8740: if (base < 0) return(-1);
8741: if (ctxt->checkIndex > base)
8742: base = ctxt->checkIndex;
8743: if (in->buf == NULL) {
8744: buf = in->base;
8745: len = in->length;
8746: } else {
8747: buf = in->buf->buffer->content;
8748: len = in->buf->buffer->use;
8749: }
8750: /* take into account the sequence length */
8751: if (third) len -= 2;
8752: else if (next) len --;
8753: for (;base < len;base++) {
8754: if (buf[base] == first) {
8755: if (third != 0) {
8756: if ((buf[base + 1] != next) ||
8757: (buf[base + 2] != third)) continue;
8758: } else if (next != 0) {
8759: if (buf[base + 1] != next) continue;
8760: }
8761: ctxt->checkIndex = 0;
8762: #ifdef DEBUG_PUSH
8763: if (next == 0)
8764: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8765: first, base);
8766: else if (third == 0)
8767: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8768: first, next, base);
8769: else
8770: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8771: first, next, third, base);
8772: #endif
8773: return(base - (in->cur - in->base));
8774: }
8775: }
8776: ctxt->checkIndex = base;
8777: #ifdef DEBUG_PUSH
8778: if (next == 0)
8779: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8780: else if (third == 0)
8781: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8782: else
8783: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8784: #endif
8785: return(-1);
1.128 daniel 8786: }
8787:
8788: /**
1.143 daniel 8789: * xmlParseTryOrFinish:
1.128 daniel 8790: * @ctxt: an XML parser context
1.143 daniel 8791: * @terminate: last chunk indicator
1.128 daniel 8792: *
8793: * Try to progress on parsing
8794: *
8795: * Returns zero if no parsing was possible
8796: */
8797: int
1.143 daniel 8798: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8799: int ret = 0;
1.140 daniel 8800: int avail;
8801: xmlChar cur, next;
8802:
8803: #ifdef DEBUG_PUSH
8804: switch (ctxt->instate) {
8805: case XML_PARSER_EOF:
8806: fprintf(stderr, "PP: try EOF\n"); break;
8807: case XML_PARSER_START:
8808: fprintf(stderr, "PP: try START\n"); break;
8809: case XML_PARSER_MISC:
8810: fprintf(stderr, "PP: try MISC\n");break;
8811: case XML_PARSER_COMMENT:
8812: fprintf(stderr, "PP: try COMMENT\n");break;
8813: case XML_PARSER_PROLOG:
8814: fprintf(stderr, "PP: try PROLOG\n");break;
8815: case XML_PARSER_START_TAG:
8816: fprintf(stderr, "PP: try START_TAG\n");break;
8817: case XML_PARSER_CONTENT:
8818: fprintf(stderr, "PP: try CONTENT\n");break;
8819: case XML_PARSER_CDATA_SECTION:
8820: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8821: case XML_PARSER_END_TAG:
8822: fprintf(stderr, "PP: try END_TAG\n");break;
8823: case XML_PARSER_ENTITY_DECL:
8824: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8825: case XML_PARSER_ENTITY_VALUE:
8826: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8827: case XML_PARSER_ATTRIBUTE_VALUE:
8828: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8829: case XML_PARSER_DTD:
8830: fprintf(stderr, "PP: try DTD\n");break;
8831: case XML_PARSER_EPILOG:
8832: fprintf(stderr, "PP: try EPILOG\n");break;
8833: case XML_PARSER_PI:
8834: fprintf(stderr, "PP: try PI\n");break;
8835: }
8836: #endif
1.128 daniel 8837:
8838: while (1) {
1.140 daniel 8839: /*
8840: * Pop-up of finished entities.
8841: */
1.152 daniel 8842: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8843: xmlPopInput(ctxt);
8844:
1.184 daniel 8845: if (ctxt->input ==NULL) break;
8846: if (ctxt->input->buf == NULL)
8847: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8848: else
1.184 daniel 8849: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8850: if (avail < 1)
8851: goto done;
1.128 daniel 8852: switch (ctxt->instate) {
8853: case XML_PARSER_EOF:
1.140 daniel 8854: /*
8855: * Document parsing is done !
8856: */
8857: goto done;
8858: case XML_PARSER_START:
8859: /*
8860: * Very first chars read from the document flow.
8861: */
1.184 daniel 8862: cur = ctxt->input->cur[0];
1.140 daniel 8863: if (IS_BLANK(cur)) {
8864: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8865: ctxt->sax->setDocumentLocator(ctxt->userData,
8866: &xmlDefaultSAXLocator);
8867: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8868: ctxt->sax->error(ctxt->userData,
8869: "Extra spaces at the beginning of the document are not allowed\n");
8870: ctxt->errNo = XML_ERR_DOCUMENT_START;
8871: ctxt->wellFormed = 0;
1.180 daniel 8872: ctxt->disableSAX = 1;
1.140 daniel 8873: SKIP_BLANKS;
8874: ret++;
1.184 daniel 8875: if (ctxt->input->buf == NULL)
8876: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8877: else
1.184 daniel 8878: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8879: }
8880: if (avail < 2)
8881: goto done;
8882:
1.184 daniel 8883: cur = ctxt->input->cur[0];
8884: next = ctxt->input->cur[1];
1.140 daniel 8885: if (cur == 0) {
8886: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8887: ctxt->sax->setDocumentLocator(ctxt->userData,
8888: &xmlDefaultSAXLocator);
8889: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8890: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8891: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8892: ctxt->wellFormed = 0;
1.180 daniel 8893: ctxt->disableSAX = 1;
1.140 daniel 8894: ctxt->instate = XML_PARSER_EOF;
8895: #ifdef DEBUG_PUSH
8896: fprintf(stderr, "PP: entering EOF\n");
8897: #endif
8898: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8899: ctxt->sax->endDocument(ctxt->userData);
8900: goto done;
8901: }
8902: if ((cur == '<') && (next == '?')) {
8903: /* PI or XML decl */
8904: if (avail < 5) return(ret);
1.143 daniel 8905: if ((!terminate) &&
8906: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8907: return(ret);
8908: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8909: ctxt->sax->setDocumentLocator(ctxt->userData,
8910: &xmlDefaultSAXLocator);
1.184 daniel 8911: if ((ctxt->input->cur[2] == 'x') &&
8912: (ctxt->input->cur[3] == 'm') &&
8913: (ctxt->input->cur[4] == 'l') &&
8914: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8915: ret += 5;
8916: #ifdef DEBUG_PUSH
8917: fprintf(stderr, "PP: Parsing XML Decl\n");
8918: #endif
8919: xmlParseXMLDecl(ctxt);
1.193 daniel 8920: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8921: /*
8922: * The XML REC instructs us to stop parsing right
8923: * here
8924: */
8925: ctxt->instate = XML_PARSER_EOF;
8926: return(0);
8927: }
1.167 daniel 8928: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8929: if ((ctxt->encoding == NULL) &&
8930: (ctxt->input->encoding != NULL))
8931: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8932: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8933: (!ctxt->disableSAX))
1.140 daniel 8934: ctxt->sax->startDocument(ctxt->userData);
8935: ctxt->instate = XML_PARSER_MISC;
8936: #ifdef DEBUG_PUSH
8937: fprintf(stderr, "PP: entering MISC\n");
8938: #endif
8939: } else {
8940: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8941: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8942: (!ctxt->disableSAX))
1.140 daniel 8943: ctxt->sax->startDocument(ctxt->userData);
8944: ctxt->instate = XML_PARSER_MISC;
8945: #ifdef DEBUG_PUSH
8946: fprintf(stderr, "PP: entering MISC\n");
8947: #endif
8948: }
8949: } else {
8950: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8951: ctxt->sax->setDocumentLocator(ctxt->userData,
8952: &xmlDefaultSAXLocator);
8953: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8954: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8955: (!ctxt->disableSAX))
1.140 daniel 8956: ctxt->sax->startDocument(ctxt->userData);
8957: ctxt->instate = XML_PARSER_MISC;
8958: #ifdef DEBUG_PUSH
8959: fprintf(stderr, "PP: entering MISC\n");
8960: #endif
8961: }
8962: break;
8963: case XML_PARSER_MISC:
8964: SKIP_BLANKS;
1.184 daniel 8965: if (ctxt->input->buf == NULL)
8966: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8967: else
1.184 daniel 8968: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8969: if (avail < 2)
8970: goto done;
1.184 daniel 8971: cur = ctxt->input->cur[0];
8972: next = ctxt->input->cur[1];
1.140 daniel 8973: if ((cur == '<') && (next == '?')) {
1.143 daniel 8974: if ((!terminate) &&
8975: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8976: goto done;
8977: #ifdef DEBUG_PUSH
8978: fprintf(stderr, "PP: Parsing PI\n");
8979: #endif
8980: xmlParsePI(ctxt);
8981: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8982: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 8983: if ((!terminate) &&
8984: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 8985: goto done;
8986: #ifdef DEBUG_PUSH
8987: fprintf(stderr, "PP: Parsing Comment\n");
8988: #endif
8989: xmlParseComment(ctxt);
8990: ctxt->instate = XML_PARSER_MISC;
8991: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 8992: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8993: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8994: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8995: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 8996: if ((!terminate) &&
8997: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8998: goto done;
8999: #ifdef DEBUG_PUSH
9000: fprintf(stderr, "PP: Parsing internal subset\n");
9001: #endif
1.166 daniel 9002: ctxt->inSubset = 1;
1.140 daniel 9003: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9004: if (RAW == '[') {
1.140 daniel 9005: ctxt->instate = XML_PARSER_DTD;
9006: #ifdef DEBUG_PUSH
9007: fprintf(stderr, "PP: entering DTD\n");
9008: #endif
9009: } else {
1.166 daniel 9010: /*
9011: * Create and update the external subset.
9012: */
9013: ctxt->inSubset = 2;
1.171 daniel 9014: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9015: (ctxt->sax->externalSubset != NULL))
9016: ctxt->sax->externalSubset(ctxt->userData,
9017: ctxt->intSubName, ctxt->extSubSystem,
9018: ctxt->extSubURI);
9019: ctxt->inSubset = 0;
1.140 daniel 9020: ctxt->instate = XML_PARSER_PROLOG;
9021: #ifdef DEBUG_PUSH
9022: fprintf(stderr, "PP: entering PROLOG\n");
9023: #endif
9024: }
9025: } else if ((cur == '<') && (next == '!') &&
9026: (avail < 9)) {
9027: goto done;
9028: } else {
9029: ctxt->instate = XML_PARSER_START_TAG;
9030: #ifdef DEBUG_PUSH
9031: fprintf(stderr, "PP: entering START_TAG\n");
9032: #endif
9033: }
9034: break;
1.128 daniel 9035: case XML_PARSER_PROLOG:
1.140 daniel 9036: SKIP_BLANKS;
1.184 daniel 9037: if (ctxt->input->buf == NULL)
9038: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9039: else
1.184 daniel 9040: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9041: if (avail < 2)
9042: goto done;
1.184 daniel 9043: cur = ctxt->input->cur[0];
9044: next = ctxt->input->cur[1];
1.140 daniel 9045: if ((cur == '<') && (next == '?')) {
1.143 daniel 9046: if ((!terminate) &&
9047: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9048: goto done;
9049: #ifdef DEBUG_PUSH
9050: fprintf(stderr, "PP: Parsing PI\n");
9051: #endif
9052: xmlParsePI(ctxt);
9053: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9054: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9055: if ((!terminate) &&
9056: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9057: goto done;
9058: #ifdef DEBUG_PUSH
9059: fprintf(stderr, "PP: Parsing Comment\n");
9060: #endif
9061: xmlParseComment(ctxt);
9062: ctxt->instate = XML_PARSER_PROLOG;
9063: } else if ((cur == '<') && (next == '!') &&
9064: (avail < 4)) {
9065: goto done;
9066: } else {
9067: ctxt->instate = XML_PARSER_START_TAG;
9068: #ifdef DEBUG_PUSH
9069: fprintf(stderr, "PP: entering START_TAG\n");
9070: #endif
9071: }
9072: break;
9073: case XML_PARSER_EPILOG:
9074: SKIP_BLANKS;
1.184 daniel 9075: if (ctxt->input->buf == NULL)
9076: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9077: else
1.184 daniel 9078: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9079: if (avail < 2)
9080: goto done;
1.184 daniel 9081: cur = ctxt->input->cur[0];
9082: next = ctxt->input->cur[1];
1.140 daniel 9083: if ((cur == '<') && (next == '?')) {
1.143 daniel 9084: if ((!terminate) &&
9085: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9086: goto done;
9087: #ifdef DEBUG_PUSH
9088: fprintf(stderr, "PP: Parsing PI\n");
9089: #endif
9090: xmlParsePI(ctxt);
9091: ctxt->instate = XML_PARSER_EPILOG;
9092: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9093: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9094: if ((!terminate) &&
9095: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9096: goto done;
9097: #ifdef DEBUG_PUSH
9098: fprintf(stderr, "PP: Parsing Comment\n");
9099: #endif
9100: xmlParseComment(ctxt);
9101: ctxt->instate = XML_PARSER_EPILOG;
9102: } else if ((cur == '<') && (next == '!') &&
9103: (avail < 4)) {
9104: goto done;
9105: } else {
9106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9107: ctxt->sax->error(ctxt->userData,
9108: "Extra content at the end of the document\n");
9109: ctxt->wellFormed = 0;
1.180 daniel 9110: ctxt->disableSAX = 1;
1.140 daniel 9111: ctxt->errNo = XML_ERR_DOCUMENT_END;
9112: ctxt->instate = XML_PARSER_EOF;
9113: #ifdef DEBUG_PUSH
9114: fprintf(stderr, "PP: entering EOF\n");
9115: #endif
1.171 daniel 9116: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9117: (!ctxt->disableSAX))
1.140 daniel 9118: ctxt->sax->endDocument(ctxt->userData);
9119: goto done;
9120: }
9121: break;
9122: case XML_PARSER_START_TAG: {
9123: xmlChar *name, *oldname;
9124:
1.184 daniel 9125: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9126: goto done;
1.184 daniel 9127: cur = ctxt->input->cur[0];
1.140 daniel 9128: if (cur != '<') {
9129: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9130: ctxt->sax->error(ctxt->userData,
9131: "Start tag expect, '<' not found\n");
9132: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9133: ctxt->wellFormed = 0;
1.180 daniel 9134: ctxt->disableSAX = 1;
1.140 daniel 9135: ctxt->instate = XML_PARSER_EOF;
9136: #ifdef DEBUG_PUSH
9137: fprintf(stderr, "PP: entering EOF\n");
9138: #endif
1.171 daniel 9139: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9140: (!ctxt->disableSAX))
1.140 daniel 9141: ctxt->sax->endDocument(ctxt->userData);
9142: goto done;
9143: }
1.143 daniel 9144: if ((!terminate) &&
9145: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9146: goto done;
1.176 daniel 9147: if (ctxt->spaceNr == 0)
9148: spacePush(ctxt, -1);
9149: else
9150: spacePush(ctxt, *ctxt->space);
1.140 daniel 9151: name = xmlParseStartTag(ctxt);
9152: if (name == NULL) {
1.176 daniel 9153: spacePop(ctxt);
1.140 daniel 9154: ctxt->instate = XML_PARSER_EOF;
9155: #ifdef DEBUG_PUSH
9156: fprintf(stderr, "PP: entering EOF\n");
9157: #endif
1.171 daniel 9158: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9159: (!ctxt->disableSAX))
1.140 daniel 9160: ctxt->sax->endDocument(ctxt->userData);
9161: goto done;
9162: }
9163: namePush(ctxt, xmlStrdup(name));
9164:
9165: /*
9166: * [ VC: Root Element Type ]
9167: * The Name in the document type declaration must match
9168: * the element type of the root element.
9169: */
9170: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9171: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9172: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9173:
9174: /*
9175: * Check for an Empty Element.
9176: */
1.152 daniel 9177: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9178: SKIP(2);
1.171 daniel 9179: if ((ctxt->sax != NULL) &&
9180: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9181: ctxt->sax->endElement(ctxt->userData, name);
9182: xmlFree(name);
9183: oldname = namePop(ctxt);
1.176 daniel 9184: spacePop(ctxt);
1.140 daniel 9185: if (oldname != NULL) {
9186: #ifdef DEBUG_STACK
9187: fprintf(stderr,"Close: popped %s\n", oldname);
9188: #endif
9189: xmlFree(oldname);
9190: }
9191: if (ctxt->name == NULL) {
9192: ctxt->instate = XML_PARSER_EPILOG;
9193: #ifdef DEBUG_PUSH
9194: fprintf(stderr, "PP: entering EPILOG\n");
9195: #endif
9196: } else {
9197: ctxt->instate = XML_PARSER_CONTENT;
9198: #ifdef DEBUG_PUSH
9199: fprintf(stderr, "PP: entering CONTENT\n");
9200: #endif
9201: }
9202: break;
9203: }
1.152 daniel 9204: if (RAW == '>') {
1.140 daniel 9205: NEXT;
9206: } else {
9207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9208: ctxt->sax->error(ctxt->userData,
9209: "Couldn't find end of Start Tag %s\n",
9210: name);
9211: ctxt->wellFormed = 0;
1.180 daniel 9212: ctxt->disableSAX = 1;
1.140 daniel 9213: ctxt->errNo = XML_ERR_GT_REQUIRED;
9214:
9215: /*
9216: * end of parsing of this node.
9217: */
9218: nodePop(ctxt);
9219: oldname = namePop(ctxt);
1.176 daniel 9220: spacePop(ctxt);
1.140 daniel 9221: if (oldname != NULL) {
9222: #ifdef DEBUG_STACK
9223: fprintf(stderr,"Close: popped %s\n", oldname);
9224: #endif
9225: xmlFree(oldname);
9226: }
9227: }
9228: xmlFree(name);
9229: ctxt->instate = XML_PARSER_CONTENT;
9230: #ifdef DEBUG_PUSH
9231: fprintf(stderr, "PP: entering CONTENT\n");
9232: #endif
9233: break;
9234: }
1.128 daniel 9235: case XML_PARSER_CONTENT:
1.140 daniel 9236: /*
9237: * Handle preparsed entities and charRef
9238: */
9239: if (ctxt->token != 0) {
9240: xmlChar cur[2] = { 0 , 0 } ;
9241:
9242: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9243: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9244: (ctxt->sax->characters != NULL))
1.140 daniel 9245: ctxt->sax->characters(ctxt->userData, cur, 1);
9246: ctxt->token = 0;
9247: }
1.184 daniel 9248: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9249: goto done;
1.184 daniel 9250: cur = ctxt->input->cur[0];
9251: next = ctxt->input->cur[1];
1.140 daniel 9252: if ((cur == '<') && (next == '?')) {
1.143 daniel 9253: if ((!terminate) &&
9254: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9255: goto done;
9256: #ifdef DEBUG_PUSH
9257: fprintf(stderr, "PP: Parsing PI\n");
9258: #endif
9259: xmlParsePI(ctxt);
9260: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9261: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9262: if ((!terminate) &&
9263: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9264: goto done;
9265: #ifdef DEBUG_PUSH
9266: fprintf(stderr, "PP: Parsing Comment\n");
9267: #endif
9268: xmlParseComment(ctxt);
9269: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9270: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9271: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9272: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9273: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9274: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9275: SKIP(9);
9276: ctxt->instate = XML_PARSER_CDATA_SECTION;
9277: #ifdef DEBUG_PUSH
9278: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9279: #endif
9280: break;
9281: } else if ((cur == '<') && (next == '!') &&
9282: (avail < 9)) {
9283: goto done;
9284: } else if ((cur == '<') && (next == '/')) {
9285: ctxt->instate = XML_PARSER_END_TAG;
9286: #ifdef DEBUG_PUSH
9287: fprintf(stderr, "PP: entering END_TAG\n");
9288: #endif
9289: break;
9290: } else if (cur == '<') {
9291: ctxt->instate = XML_PARSER_START_TAG;
9292: #ifdef DEBUG_PUSH
9293: fprintf(stderr, "PP: entering START_TAG\n");
9294: #endif
9295: break;
9296: } else if (cur == '&') {
1.143 daniel 9297: if ((!terminate) &&
9298: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9299: goto done;
9300: #ifdef DEBUG_PUSH
9301: fprintf(stderr, "PP: Parsing Reference\n");
9302: #endif
9303: /* TODO: check generation of subtrees if noent !!! */
9304: xmlParseReference(ctxt);
9305: } else {
1.156 daniel 9306: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9307: /*
1.181 daniel 9308: * Goal of the following test is:
1.140 daniel 9309: * - minimize calls to the SAX 'character' callback
9310: * when they are mergeable
9311: * - handle an problem for isBlank when we only parse
9312: * a sequence of blank chars and the next one is
9313: * not available to check against '<' presence.
9314: * - tries to homogenize the differences in SAX
9315: * callbacks beween the push and pull versions
9316: * of the parser.
9317: */
9318: if ((ctxt->inputNr == 1) &&
9319: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9320: if ((!terminate) &&
9321: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9322: goto done;
9323: }
9324: ctxt->checkIndex = 0;
9325: #ifdef DEBUG_PUSH
9326: fprintf(stderr, "PP: Parsing char data\n");
9327: #endif
9328: xmlParseCharData(ctxt, 0);
9329: }
9330: /*
9331: * Pop-up of finished entities.
9332: */
1.152 daniel 9333: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9334: xmlPopInput(ctxt);
9335: break;
9336: case XML_PARSER_CDATA_SECTION: {
9337: /*
9338: * The Push mode need to have the SAX callback for
9339: * cdataBlock merge back contiguous callbacks.
9340: */
9341: int base;
9342:
9343: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9344: if (base < 0) {
9345: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9346: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9347: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9348: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9349: XML_PARSER_BIG_BUFFER_SIZE);
9350: }
9351: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9352: ctxt->checkIndex = 0;
9353: }
9354: goto done;
9355: } else {
1.171 daniel 9356: if ((ctxt->sax != NULL) && (base > 0) &&
9357: (!ctxt->disableSAX)) {
1.140 daniel 9358: if (ctxt->sax->cdataBlock != NULL)
9359: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9360: ctxt->input->cur, base);
1.140 daniel 9361: }
9362: SKIP(base + 3);
9363: ctxt->checkIndex = 0;
9364: ctxt->instate = XML_PARSER_CONTENT;
9365: #ifdef DEBUG_PUSH
9366: fprintf(stderr, "PP: entering CONTENT\n");
9367: #endif
9368: }
9369: break;
9370: }
1.141 daniel 9371: case XML_PARSER_END_TAG:
1.140 daniel 9372: if (avail < 2)
9373: goto done;
1.143 daniel 9374: if ((!terminate) &&
9375: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9376: goto done;
9377: xmlParseEndTag(ctxt);
9378: if (ctxt->name == NULL) {
9379: ctxt->instate = XML_PARSER_EPILOG;
9380: #ifdef DEBUG_PUSH
9381: fprintf(stderr, "PP: entering EPILOG\n");
9382: #endif
9383: } else {
9384: ctxt->instate = XML_PARSER_CONTENT;
9385: #ifdef DEBUG_PUSH
9386: fprintf(stderr, "PP: entering CONTENT\n");
9387: #endif
9388: }
9389: break;
9390: case XML_PARSER_DTD: {
9391: /*
9392: * Sorry but progressive parsing of the internal subset
9393: * is not expected to be supported. We first check that
9394: * the full content of the internal subset is available and
9395: * the parsing is launched only at that point.
9396: * Internal subset ends up with "']' S? '>'" in an unescaped
9397: * section and not in a ']]>' sequence which are conditional
9398: * sections (whoever argued to keep that crap in XML deserve
9399: * a place in hell !).
9400: */
9401: int base, i;
9402: xmlChar *buf;
9403: xmlChar quote = 0;
9404:
1.184 daniel 9405: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9406: if (base < 0) return(0);
9407: if (ctxt->checkIndex > base)
9408: base = ctxt->checkIndex;
1.184 daniel 9409: buf = ctxt->input->buf->buffer->content;
9410: for (;base < ctxt->input->buf->buffer->use;base++) {
1.140 daniel 9411: if (quote != 0) {
9412: if (buf[base] == quote)
9413: quote = 0;
9414: continue;
9415: }
9416: if (buf[base] == '"') {
9417: quote = '"';
9418: continue;
9419: }
9420: if (buf[base] == '\'') {
9421: quote = '\'';
9422: continue;
9423: }
9424: if (buf[base] == ']') {
1.184 daniel 9425: if (base +1 >= ctxt->input->buf->buffer->use)
1.140 daniel 9426: break;
9427: if (buf[base + 1] == ']') {
9428: /* conditional crap, skip both ']' ! */
9429: base++;
9430: continue;
9431: }
1.184 daniel 9432: for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
1.140 daniel 9433: if (buf[base + i] == '>')
9434: goto found_end_int_subset;
9435: }
9436: break;
9437: }
9438: }
9439: /*
9440: * We didn't found the end of the Internal subset
9441: */
9442: if (quote == 0)
9443: ctxt->checkIndex = base;
9444: #ifdef DEBUG_PUSH
9445: if (next == 0)
9446: fprintf(stderr, "PP: lookup of int subset end filed\n");
9447: #endif
9448: goto done;
9449:
9450: found_end_int_subset:
9451: xmlParseInternalSubset(ctxt);
1.166 daniel 9452: ctxt->inSubset = 2;
1.171 daniel 9453: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9454: (ctxt->sax->externalSubset != NULL))
9455: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9456: ctxt->extSubSystem, ctxt->extSubURI);
9457: ctxt->inSubset = 0;
1.140 daniel 9458: ctxt->instate = XML_PARSER_PROLOG;
9459: ctxt->checkIndex = 0;
9460: #ifdef DEBUG_PUSH
9461: fprintf(stderr, "PP: entering PROLOG\n");
9462: #endif
9463: break;
9464: }
9465: case XML_PARSER_COMMENT:
9466: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9467: ctxt->instate = XML_PARSER_CONTENT;
9468: #ifdef DEBUG_PUSH
9469: fprintf(stderr, "PP: entering CONTENT\n");
9470: #endif
9471: break;
9472: case XML_PARSER_PI:
9473: fprintf(stderr, "PP: internal error, state == PI\n");
9474: ctxt->instate = XML_PARSER_CONTENT;
9475: #ifdef DEBUG_PUSH
9476: fprintf(stderr, "PP: entering CONTENT\n");
9477: #endif
9478: break;
1.128 daniel 9479: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9480: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9481: ctxt->instate = XML_PARSER_DTD;
9482: #ifdef DEBUG_PUSH
9483: fprintf(stderr, "PP: entering DTD\n");
9484: #endif
9485: break;
1.128 daniel 9486: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9487: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9488: ctxt->instate = XML_PARSER_CONTENT;
9489: #ifdef DEBUG_PUSH
9490: fprintf(stderr, "PP: entering DTD\n");
9491: #endif
9492: break;
1.128 daniel 9493: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9494: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9495: ctxt->instate = XML_PARSER_START_TAG;
9496: #ifdef DEBUG_PUSH
9497: fprintf(stderr, "PP: entering START_TAG\n");
9498: #endif
9499: break;
9500: case XML_PARSER_SYSTEM_LITERAL:
9501: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9502: ctxt->instate = XML_PARSER_START_TAG;
9503: #ifdef DEBUG_PUSH
9504: fprintf(stderr, "PP: entering START_TAG\n");
9505: #endif
9506: break;
1.128 daniel 9507: }
9508: }
1.140 daniel 9509: done:
9510: #ifdef DEBUG_PUSH
9511: fprintf(stderr, "PP: done %d\n", ret);
9512: #endif
1.128 daniel 9513: return(ret);
9514: }
9515:
9516: /**
1.143 daniel 9517: * xmlParseTry:
9518: * @ctxt: an XML parser context
9519: *
9520: * Try to progress on parsing
9521: *
9522: * Returns zero if no parsing was possible
9523: */
9524: int
9525: xmlParseTry(xmlParserCtxtPtr ctxt) {
9526: return(xmlParseTryOrFinish(ctxt, 0));
9527: }
9528:
9529: /**
1.128 daniel 9530: * xmlParseChunk:
9531: * @ctxt: an XML parser context
9532: * @chunk: an char array
9533: * @size: the size in byte of the chunk
9534: * @terminate: last chunk indicator
9535: *
9536: * Parse a Chunk of memory
9537: *
9538: * Returns zero if no error, the xmlParserErrors otherwise.
9539: */
1.140 daniel 9540: int
1.128 daniel 9541: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9542: int terminate) {
1.132 daniel 9543: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9544: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9545: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9546: int cur = ctxt->input->cur - ctxt->input->base;
9547:
1.132 daniel 9548: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9549: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9550: ctxt->input->cur = ctxt->input->base + cur;
9551: #ifdef DEBUG_PUSH
9552: fprintf(stderr, "PP: pushed %d\n", size);
9553: #endif
9554:
1.150 daniel 9555: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9556: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9557: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9558: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9559: if (terminate) {
1.151 daniel 9560: /*
9561: * Grab the encoding if it was added on-the-fly
9562: */
9563: if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
9564: (ctxt->myDoc->encoding == NULL)) {
9565: ctxt->myDoc->encoding = ctxt->encoding;
9566: ctxt->encoding = NULL;
9567: }
9568:
9569: /*
9570: * Check for termination
9571: */
1.140 daniel 9572: if ((ctxt->instate != XML_PARSER_EOF) &&
9573: (ctxt->instate != XML_PARSER_EPILOG)) {
9574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9575: ctxt->sax->error(ctxt->userData,
9576: "Extra content at the end of the document\n");
9577: ctxt->wellFormed = 0;
1.180 daniel 9578: ctxt->disableSAX = 1;
1.140 daniel 9579: ctxt->errNo = XML_ERR_DOCUMENT_END;
9580: }
9581: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9582: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9583: (!ctxt->disableSAX))
1.140 daniel 9584: ctxt->sax->endDocument(ctxt->userData);
9585: }
9586: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9587: }
9588: return((xmlParserErrors) ctxt->errNo);
9589: }
9590:
9591: /************************************************************************
9592: * *
1.98 daniel 9593: * I/O front end functions to the parser *
9594: * *
9595: ************************************************************************/
9596:
1.50 daniel 9597: /**
1.181 daniel 9598: * xmlCreatePushParserCtxt:
1.140 daniel 9599: * @sax: a SAX handler
9600: * @user_data: The user data returned on SAX callbacks
9601: * @chunk: a pointer to an array of chars
9602: * @size: number of chars in the array
9603: * @filename: an optional file name or URI
9604: *
9605: * Create a parser context for using the XML parser in push mode
9606: * To allow content encoding detection, @size should be >= 4
9607: * The value of @filename is used for fetching external entities
9608: * and error/warning reports.
9609: *
9610: * Returns the new parser context or NULL
9611: */
9612: xmlParserCtxtPtr
9613: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9614: const char *chunk, int size, const char *filename) {
9615: xmlParserCtxtPtr ctxt;
9616: xmlParserInputPtr inputStream;
9617: xmlParserInputBufferPtr buf;
9618: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9619:
9620: /*
1.156 daniel 9621: * plug some encoding conversion routines
1.140 daniel 9622: */
9623: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9624: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9625:
9626: buf = xmlAllocParserInputBuffer(enc);
9627: if (buf == NULL) return(NULL);
9628:
9629: ctxt = xmlNewParserCtxt();
9630: if (ctxt == NULL) {
9631: xmlFree(buf);
9632: return(NULL);
9633: }
9634: if (sax != NULL) {
9635: if (ctxt->sax != &xmlDefaultSAXHandler)
9636: xmlFree(ctxt->sax);
9637: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9638: if (ctxt->sax == NULL) {
9639: xmlFree(buf);
9640: xmlFree(ctxt);
9641: return(NULL);
9642: }
9643: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9644: if (user_data != NULL)
9645: ctxt->userData = user_data;
9646: }
9647: if (filename == NULL) {
9648: ctxt->directory = NULL;
9649: } else {
9650: ctxt->directory = xmlParserGetDirectory(filename);
9651: }
9652:
9653: inputStream = xmlNewInputStream(ctxt);
9654: if (inputStream == NULL) {
9655: xmlFreeParserCtxt(ctxt);
9656: return(NULL);
9657: }
9658:
9659: if (filename == NULL)
9660: inputStream->filename = NULL;
9661: else
9662: inputStream->filename = xmlMemStrdup(filename);
9663: inputStream->buf = buf;
9664: inputStream->base = inputStream->buf->buffer->content;
9665: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9666: if (enc != XML_CHAR_ENCODING_NONE) {
9667: xmlSwitchEncoding(ctxt, enc);
9668: }
1.140 daniel 9669:
9670: inputPush(ctxt, inputStream);
9671:
9672: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9673: (ctxt->input->buf != NULL)) {
9674: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9675: #ifdef DEBUG_PUSH
9676: fprintf(stderr, "PP: pushed %d\n", size);
9677: #endif
9678: }
1.190 daniel 9679:
9680: return(ctxt);
9681: }
9682:
9683: /**
9684: * xmlCreateIOParserCtxt:
9685: * @sax: a SAX handler
9686: * @user_data: The user data returned on SAX callbacks
9687: * @ioread: an I/O read function
9688: * @ioclose: an I/O close function
9689: * @ioctx: an I/O handler
9690: * @enc: the charset encoding if known
9691: *
9692: * Create a parser context for using the XML parser with an existing
9693: * I/O stream
9694: *
9695: * Returns the new parser context or NULL
9696: */
9697: xmlParserCtxtPtr
9698: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9699: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9700: void *ioctx, xmlCharEncoding enc) {
9701: xmlParserCtxtPtr ctxt;
9702: xmlParserInputPtr inputStream;
9703: xmlParserInputBufferPtr buf;
9704:
9705: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9706: if (buf == NULL) return(NULL);
9707:
9708: ctxt = xmlNewParserCtxt();
9709: if (ctxt == NULL) {
9710: xmlFree(buf);
9711: return(NULL);
9712: }
9713: if (sax != NULL) {
9714: if (ctxt->sax != &xmlDefaultSAXHandler)
9715: xmlFree(ctxt->sax);
9716: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9717: if (ctxt->sax == NULL) {
9718: xmlFree(buf);
9719: xmlFree(ctxt);
9720: return(NULL);
9721: }
9722: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9723: if (user_data != NULL)
9724: ctxt->userData = user_data;
9725: }
9726:
9727: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9728: if (inputStream == NULL) {
9729: xmlFreeParserCtxt(ctxt);
9730: return(NULL);
9731: }
9732: inputPush(ctxt, inputStream);
1.140 daniel 9733:
9734: return(ctxt);
9735: }
9736:
9737: /**
1.181 daniel 9738: * xmlCreateDocParserCtxt:
1.123 daniel 9739: * @cur: a pointer to an array of xmlChar
1.50 daniel 9740: *
1.192 daniel 9741: * Creates a parser context for an XML in-memory document.
1.69 daniel 9742: *
9743: * Returns the new parser context or NULL
1.16 daniel 9744: */
1.69 daniel 9745: xmlParserCtxtPtr
1.123 daniel 9746: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9747: xmlParserCtxtPtr ctxt;
1.40 daniel 9748: xmlParserInputPtr input;
1.16 daniel 9749:
1.97 daniel 9750: ctxt = xmlNewParserCtxt();
1.16 daniel 9751: if (ctxt == NULL) {
9752: return(NULL);
9753: }
1.96 daniel 9754: input = xmlNewInputStream(ctxt);
1.40 daniel 9755: if (input == NULL) {
1.97 daniel 9756: xmlFreeParserCtxt(ctxt);
1.40 daniel 9757: return(NULL);
9758: }
9759:
9760: input->base = cur;
9761: input->cur = cur;
9762:
9763: inputPush(ctxt, input);
1.69 daniel 9764: return(ctxt);
9765: }
9766:
9767: /**
1.181 daniel 9768: * xmlSAXParseDoc:
1.69 daniel 9769: * @sax: the SAX handler block
1.123 daniel 9770: * @cur: a pointer to an array of xmlChar
1.69 daniel 9771: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9772: * documents
9773: *
9774: * parse an XML in-memory document and build a tree.
9775: * It use the given SAX function block to handle the parsing callback.
9776: * If sax is NULL, fallback to the default DOM tree building routines.
9777: *
9778: * Returns the resulting document tree
9779: */
9780:
9781: xmlDocPtr
1.123 daniel 9782: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9783: xmlDocPtr ret;
9784: xmlParserCtxtPtr ctxt;
9785:
9786: if (cur == NULL) return(NULL);
1.16 daniel 9787:
9788:
1.69 daniel 9789: ctxt = xmlCreateDocParserCtxt(cur);
9790: if (ctxt == NULL) return(NULL);
1.74 daniel 9791: if (sax != NULL) {
9792: ctxt->sax = sax;
9793: ctxt->userData = NULL;
9794: }
1.69 daniel 9795:
1.16 daniel 9796: xmlParseDocument(ctxt);
1.72 daniel 9797: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9798: else {
9799: ret = NULL;
1.72 daniel 9800: xmlFreeDoc(ctxt->myDoc);
9801: ctxt->myDoc = NULL;
1.59 daniel 9802: }
1.86 daniel 9803: if (sax != NULL)
9804: ctxt->sax = NULL;
1.69 daniel 9805: xmlFreeParserCtxt(ctxt);
1.16 daniel 9806:
1.1 veillard 9807: return(ret);
9808: }
9809:
1.50 daniel 9810: /**
1.181 daniel 9811: * xmlParseDoc:
1.123 daniel 9812: * @cur: a pointer to an array of xmlChar
1.55 daniel 9813: *
9814: * parse an XML in-memory document and build a tree.
9815: *
1.68 daniel 9816: * Returns the resulting document tree
1.55 daniel 9817: */
9818:
1.69 daniel 9819: xmlDocPtr
1.123 daniel 9820: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9821: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9822: }
9823:
9824: /**
1.181 daniel 9825: * xmlSAXParseDTD:
1.76 daniel 9826: * @sax: the SAX handler block
9827: * @ExternalID: a NAME* containing the External ID of the DTD
9828: * @SystemID: a NAME* containing the URL to the DTD
9829: *
9830: * Load and parse an external subset.
9831: *
9832: * Returns the resulting xmlDtdPtr or NULL in case of error.
9833: */
9834:
9835: xmlDtdPtr
1.123 daniel 9836: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9837: const xmlChar *SystemID) {
1.76 daniel 9838: xmlDtdPtr ret = NULL;
9839: xmlParserCtxtPtr ctxt;
1.83 daniel 9840: xmlParserInputPtr input = NULL;
1.76 daniel 9841: xmlCharEncoding enc;
9842:
9843: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9844:
1.97 daniel 9845: ctxt = xmlNewParserCtxt();
1.76 daniel 9846: if (ctxt == NULL) {
9847: return(NULL);
9848: }
9849:
9850: /*
9851: * Set-up the SAX context
9852: */
9853: if (ctxt == NULL) return(NULL);
9854: if (sax != NULL) {
1.93 veillard 9855: if (ctxt->sax != NULL)
1.119 daniel 9856: xmlFree(ctxt->sax);
1.76 daniel 9857: ctxt->sax = sax;
9858: ctxt->userData = NULL;
9859: }
9860:
9861: /*
9862: * Ask the Entity resolver to load the damn thing
9863: */
9864:
9865: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9866: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9867: if (input == NULL) {
1.86 daniel 9868: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9869: xmlFreeParserCtxt(ctxt);
9870: return(NULL);
9871: }
9872:
9873: /*
1.156 daniel 9874: * plug some encoding conversion routines here.
1.76 daniel 9875: */
9876: xmlPushInput(ctxt, input);
1.156 daniel 9877: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9878: xmlSwitchEncoding(ctxt, enc);
9879:
1.95 veillard 9880: if (input->filename == NULL)
1.156 daniel 9881: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9882: input->line = 1;
9883: input->col = 1;
9884: input->base = ctxt->input->cur;
9885: input->cur = ctxt->input->cur;
9886: input->free = NULL;
9887:
9888: /*
9889: * let's parse that entity knowing it's an external subset.
9890: */
1.191 daniel 9891: ctxt->inSubset = 2;
9892: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9893: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9894: ExternalID, SystemID);
1.79 daniel 9895: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9896:
9897: if (ctxt->myDoc != NULL) {
9898: if (ctxt->wellFormed) {
1.191 daniel 9899: ret = ctxt->myDoc->extSubset;
9900: ctxt->myDoc->extSubset = NULL;
1.76 daniel 9901: } else {
9902: ret = NULL;
9903: }
9904: xmlFreeDoc(ctxt->myDoc);
9905: ctxt->myDoc = NULL;
9906: }
1.86 daniel 9907: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9908: xmlFreeParserCtxt(ctxt);
9909:
9910: return(ret);
9911: }
9912:
9913: /**
1.181 daniel 9914: * xmlParseDTD:
1.76 daniel 9915: * @ExternalID: a NAME* containing the External ID of the DTD
9916: * @SystemID: a NAME* containing the URL to the DTD
9917: *
9918: * Load and parse an external subset.
9919: *
9920: * Returns the resulting xmlDtdPtr or NULL in case of error.
9921: */
9922:
9923: xmlDtdPtr
1.123 daniel 9924: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9925: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9926: }
9927:
9928: /**
1.181 daniel 9929: * xmlSAXParseBalancedChunk:
1.144 daniel 9930: * @ctx: an XML parser context (possibly NULL)
9931: * @sax: the SAX handler bloc (possibly NULL)
9932: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9933: * @input: a parser input stream
9934: * @enc: the encoding
9935: *
9936: * Parse a well-balanced chunk of an XML document
9937: * The user has to provide SAX callback block whose routines will be
9938: * called by the parser
9939: * The allowed sequence for the Well Balanced Chunk is the one defined by
9940: * the content production in the XML grammar:
9941: *
9942: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9943: *
1.176 daniel 9944: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9945: * the error code otherwise
9946: */
9947:
9948: int
9949: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9950: void *user_data, xmlParserInputPtr input,
9951: xmlCharEncoding enc) {
9952: xmlParserCtxtPtr ctxt;
9953: int ret;
9954:
9955: if (input == NULL) return(-1);
9956:
9957: if (ctx != NULL)
9958: ctxt = ctx;
9959: else {
9960: ctxt = xmlNewParserCtxt();
9961: if (ctxt == NULL)
9962: return(-1);
9963: if (sax == NULL)
9964: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9965: }
9966:
9967: /*
9968: * Set-up the SAX context
9969: */
9970: if (sax != NULL) {
9971: if (ctxt->sax != NULL)
9972: xmlFree(ctxt->sax);
9973: ctxt->sax = sax;
9974: ctxt->userData = user_data;
9975: }
9976:
9977: /*
9978: * plug some encoding conversion routines here.
9979: */
9980: xmlPushInput(ctxt, input);
9981: if (enc != XML_CHAR_ENCODING_NONE)
9982: xmlSwitchEncoding(ctxt, enc);
9983:
9984: /*
9985: * let's parse that entity knowing it's an external subset.
9986: */
9987: xmlParseContent(ctxt);
9988: ret = ctxt->errNo;
9989:
9990: if (ctx == NULL) {
9991: if (sax != NULL)
9992: ctxt->sax = NULL;
9993: else
9994: xmlFreeDoc(ctxt->myDoc);
9995: xmlFreeParserCtxt(ctxt);
9996: }
9997: return(ret);
9998: }
9999:
10000: /**
1.181 daniel 10001: * xmlParseExternalEntity:
10002: * @doc: the document the chunk pertains to
10003: * @sax: the SAX handler bloc (possibly NULL)
10004: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10005: * @depth: Used for loop detection, use 0
1.181 daniel 10006: * @URL: the URL for the entity to load
10007: * @ID: the System ID for the entity to load
10008: * @list: the return value for the set of parsed nodes
10009: *
10010: * Parse an external general entity
10011: * An external general parsed entity is well-formed if it matches the
10012: * production labeled extParsedEnt.
10013: *
10014: * [78] extParsedEnt ::= TextDecl? content
10015: *
10016: * Returns 0 if the entity is well formed, -1 in case of args problem and
10017: * the parser error code otherwise
10018: */
10019:
10020: int
10021: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10022: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10023: xmlParserCtxtPtr ctxt;
10024: xmlDocPtr newDoc;
10025: xmlSAXHandlerPtr oldsax = NULL;
10026: int ret = 0;
10027:
1.185 daniel 10028: if (depth > 40) {
10029: return(XML_ERR_ENTITY_LOOP);
10030: }
10031:
10032:
1.181 daniel 10033:
10034: if (list != NULL)
10035: *list = NULL;
10036: if ((URL == NULL) && (ID == NULL))
10037: return(-1);
10038:
10039:
10040: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10041: if (ctxt == NULL) return(-1);
10042: ctxt->userData = ctxt;
10043: if (sax != NULL) {
10044: oldsax = ctxt->sax;
10045: ctxt->sax = sax;
10046: if (user_data != NULL)
10047: ctxt->userData = user_data;
10048: }
10049: newDoc = xmlNewDoc(BAD_CAST "1.0");
10050: if (newDoc == NULL) {
10051: xmlFreeParserCtxt(ctxt);
10052: return(-1);
10053: }
10054: if (doc != NULL) {
10055: newDoc->intSubset = doc->intSubset;
10056: newDoc->extSubset = doc->extSubset;
10057: }
10058: if (doc->URL != NULL) {
10059: newDoc->URL = xmlStrdup(doc->URL);
10060: }
10061: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10062: if (newDoc->children == NULL) {
10063: if (sax != NULL)
10064: ctxt->sax = oldsax;
10065: xmlFreeParserCtxt(ctxt);
10066: newDoc->intSubset = NULL;
10067: newDoc->extSubset = NULL;
10068: xmlFreeDoc(newDoc);
10069: return(-1);
10070: }
10071: nodePush(ctxt, newDoc->children);
10072: if (doc == NULL) {
10073: ctxt->myDoc = newDoc;
10074: } else {
10075: ctxt->myDoc = doc;
10076: newDoc->children->doc = doc;
10077: }
10078:
10079: /*
10080: * Parse a possible text declaration first
10081: */
10082: GROW;
10083: if ((RAW == '<') && (NXT(1) == '?') &&
10084: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10085: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10086: xmlParseTextDecl(ctxt);
10087: }
10088:
10089: /*
10090: * Doing validity checking on chunk doesn't make sense
10091: */
10092: ctxt->instate = XML_PARSER_CONTENT;
10093: ctxt->validate = 0;
1.185 daniel 10094: ctxt->depth = depth;
1.181 daniel 10095:
10096: xmlParseContent(ctxt);
10097:
10098: if ((RAW == '<') && (NXT(1) == '/')) {
10099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10100: ctxt->sax->error(ctxt->userData,
10101: "chunk is not well balanced\n");
10102: ctxt->wellFormed = 0;
10103: ctxt->disableSAX = 1;
10104: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10105: } else if (RAW != 0) {
10106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10107: ctxt->sax->error(ctxt->userData,
10108: "extra content at the end of well balanced chunk\n");
10109: ctxt->wellFormed = 0;
10110: ctxt->disableSAX = 1;
10111: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10112: }
10113: if (ctxt->node != newDoc->children) {
10114: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10115: ctxt->sax->error(ctxt->userData,
10116: "chunk is not well balanced\n");
10117: ctxt->wellFormed = 0;
10118: ctxt->disableSAX = 1;
10119: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10120: }
10121:
10122: if (!ctxt->wellFormed) {
10123: if (ctxt->errNo == 0)
10124: ret = 1;
10125: else
10126: ret = ctxt->errNo;
10127: } else {
10128: if (list != NULL) {
10129: xmlNodePtr cur;
10130:
10131: /*
10132: * Return the newly created nodeset after unlinking it from
10133: * they pseudo parent.
10134: */
10135: cur = newDoc->children->children;
10136: *list = cur;
10137: while (cur != NULL) {
10138: cur->parent = NULL;
10139: cur = cur->next;
10140: }
10141: newDoc->children->children = NULL;
10142: }
10143: ret = 0;
10144: }
10145: if (sax != NULL)
10146: ctxt->sax = oldsax;
10147: xmlFreeParserCtxt(ctxt);
10148: newDoc->intSubset = NULL;
10149: newDoc->extSubset = NULL;
10150: xmlFreeDoc(newDoc);
10151:
10152: return(ret);
10153: }
10154:
10155: /**
10156: * xmlParseBalancedChunk:
1.176 daniel 10157: * @doc: the document the chunk pertains to
10158: * @sax: the SAX handler bloc (possibly NULL)
10159: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10160: * @depth: Used for loop detection, use 0
1.176 daniel 10161: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10162: * @list: the return value for the set of parsed nodes
10163: *
10164: * Parse a well-balanced chunk of an XML document
10165: * called by the parser
10166: * The allowed sequence for the Well Balanced Chunk is the one defined by
10167: * the content production in the XML grammar:
1.144 daniel 10168: *
1.175 daniel 10169: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10170: *
1.176 daniel 10171: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10172: * the parser error code otherwise
1.144 daniel 10173: */
10174:
1.175 daniel 10175: int
10176: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10177: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10178: xmlParserCtxtPtr ctxt;
1.175 daniel 10179: xmlDocPtr newDoc;
1.181 daniel 10180: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10181: int size;
1.176 daniel 10182: int ret = 0;
1.175 daniel 10183:
1.185 daniel 10184: if (depth > 40) {
10185: return(XML_ERR_ENTITY_LOOP);
10186: }
10187:
1.175 daniel 10188:
1.176 daniel 10189: if (list != NULL)
10190: *list = NULL;
10191: if (string == NULL)
10192: return(-1);
10193:
10194: size = xmlStrlen(string);
10195:
1.183 daniel 10196: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10197: if (ctxt == NULL) return(-1);
10198: ctxt->userData = ctxt;
1.175 daniel 10199: if (sax != NULL) {
1.176 daniel 10200: oldsax = ctxt->sax;
10201: ctxt->sax = sax;
10202: if (user_data != NULL)
10203: ctxt->userData = user_data;
1.175 daniel 10204: }
10205: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10206: if (newDoc == NULL) {
10207: xmlFreeParserCtxt(ctxt);
10208: return(-1);
10209: }
1.175 daniel 10210: if (doc != NULL) {
10211: newDoc->intSubset = doc->intSubset;
10212: newDoc->extSubset = doc->extSubset;
10213: }
1.176 daniel 10214: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10215: if (newDoc->children == NULL) {
10216: if (sax != NULL)
10217: ctxt->sax = oldsax;
10218: xmlFreeParserCtxt(ctxt);
10219: newDoc->intSubset = NULL;
10220: newDoc->extSubset = NULL;
10221: xmlFreeDoc(newDoc);
10222: return(-1);
10223: }
10224: nodePush(ctxt, newDoc->children);
10225: if (doc == NULL) {
10226: ctxt->myDoc = newDoc;
10227: } else {
10228: ctxt->myDoc = doc;
10229: newDoc->children->doc = doc;
10230: }
10231: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10232: ctxt->depth = depth;
1.176 daniel 10233:
10234: /*
10235: * Doing validity checking on chunk doesn't make sense
10236: */
10237: ctxt->validate = 0;
10238:
1.175 daniel 10239: xmlParseContent(ctxt);
1.176 daniel 10240:
10241: if ((RAW == '<') && (NXT(1) == '/')) {
10242: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10243: ctxt->sax->error(ctxt->userData,
10244: "chunk is not well balanced\n");
10245: ctxt->wellFormed = 0;
1.180 daniel 10246: ctxt->disableSAX = 1;
1.176 daniel 10247: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10248: } else if (RAW != 0) {
10249: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10250: ctxt->sax->error(ctxt->userData,
10251: "extra content at the end of well balanced chunk\n");
10252: ctxt->wellFormed = 0;
1.180 daniel 10253: ctxt->disableSAX = 1;
1.176 daniel 10254: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10255: }
10256: if (ctxt->node != newDoc->children) {
10257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10258: ctxt->sax->error(ctxt->userData,
10259: "chunk is not well balanced\n");
10260: ctxt->wellFormed = 0;
1.180 daniel 10261: ctxt->disableSAX = 1;
1.176 daniel 10262: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10263: }
1.175 daniel 10264:
1.176 daniel 10265: if (!ctxt->wellFormed) {
10266: if (ctxt->errNo == 0)
10267: ret = 1;
10268: else
10269: ret = ctxt->errNo;
10270: } else {
10271: if (list != NULL) {
10272: xmlNodePtr cur;
1.175 daniel 10273:
1.176 daniel 10274: /*
10275: * Return the newly created nodeset after unlinking it from
10276: * they pseudo parent.
10277: */
10278: cur = newDoc->children->children;
10279: *list = cur;
10280: while (cur != NULL) {
10281: cur->parent = NULL;
10282: cur = cur->next;
10283: }
10284: newDoc->children->children = NULL;
10285: }
10286: ret = 0;
1.175 daniel 10287: }
1.176 daniel 10288: if (sax != NULL)
10289: ctxt->sax = oldsax;
1.175 daniel 10290: xmlFreeParserCtxt(ctxt);
10291: newDoc->intSubset = NULL;
10292: newDoc->extSubset = NULL;
1.176 daniel 10293: xmlFreeDoc(newDoc);
1.175 daniel 10294:
1.176 daniel 10295: return(ret);
1.144 daniel 10296: }
10297:
10298: /**
1.181 daniel 10299: * xmlParseBalancedChunkFile:
1.144 daniel 10300: * @doc: the document the chunk pertains to
10301: *
10302: * Parse a well-balanced chunk of an XML document contained in a file
10303: *
10304: * Returns the resulting list of nodes resulting from the parsing,
10305: * they are not added to @node
10306: */
10307:
10308: xmlNodePtr
10309: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10310: /* TODO !!! */
10311: return(NULL);
1.144 daniel 10312: }
10313:
10314: /**
1.181 daniel 10315: * xmlRecoverDoc:
1.123 daniel 10316: * @cur: a pointer to an array of xmlChar
1.59 daniel 10317: *
10318: * parse an XML in-memory document and build a tree.
10319: * In the case the document is not Well Formed, a tree is built anyway
10320: *
1.68 daniel 10321: * Returns the resulting document tree
1.59 daniel 10322: */
10323:
1.69 daniel 10324: xmlDocPtr
1.123 daniel 10325: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10326: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10327: }
10328:
10329: /**
1.181 daniel 10330: * xmlCreateEntityParserCtxt:
10331: * @URL: the entity URL
10332: * @ID: the entity PUBLIC ID
10333: * @base: a posible base for the target URI
10334: *
10335: * Create a parser context for an external entity
10336: * Automatic support for ZLIB/Compress compressed document is provided
10337: * by default if found at compile-time.
10338: *
10339: * Returns the new parser context or NULL
10340: */
10341: xmlParserCtxtPtr
10342: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10343: const xmlChar *base) {
10344: xmlParserCtxtPtr ctxt;
10345: xmlParserInputPtr inputStream;
10346: char *directory = NULL;
10347:
10348: ctxt = xmlNewParserCtxt();
10349: if (ctxt == NULL) {
10350: return(NULL);
10351: }
10352:
1.182 daniel 10353: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
1.181 daniel 10354: if (inputStream == NULL) {
10355: xmlFreeParserCtxt(ctxt);
10356: return(NULL);
10357: }
10358:
10359: inputPush(ctxt, inputStream);
10360:
10361: if ((ctxt->directory == NULL) && (directory == NULL))
1.182 daniel 10362: directory = xmlParserGetDirectory((char *)URL);
1.181 daniel 10363: if ((ctxt->directory == NULL) && (directory != NULL))
10364: ctxt->directory = directory;
10365:
10366: return(ctxt);
10367: }
10368:
10369: /**
10370: * xmlCreateFileParserCtxt:
1.50 daniel 10371: * @filename: the filename
10372: *
1.69 daniel 10373: * Create a parser context for a file content.
10374: * Automatic support for ZLIB/Compress compressed document is provided
10375: * by default if found at compile-time.
1.50 daniel 10376: *
1.69 daniel 10377: * Returns the new parser context or NULL
1.9 httpng 10378: */
1.69 daniel 10379: xmlParserCtxtPtr
10380: xmlCreateFileParserCtxt(const char *filename)
10381: {
10382: xmlParserCtxtPtr ctxt;
1.40 daniel 10383: xmlParserInputPtr inputStream;
1.91 daniel 10384: xmlParserInputBufferPtr buf;
1.111 daniel 10385: char *directory = NULL;
1.9 httpng 10386:
1.91 daniel 10387: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10388: if (buf == NULL) return(NULL);
1.9 httpng 10389:
1.97 daniel 10390: ctxt = xmlNewParserCtxt();
1.16 daniel 10391: if (ctxt == NULL) {
10392: return(NULL);
10393: }
1.97 daniel 10394:
1.96 daniel 10395: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10396: if (inputStream == NULL) {
1.97 daniel 10397: xmlFreeParserCtxt(ctxt);
1.40 daniel 10398: return(NULL);
10399: }
10400:
1.119 daniel 10401: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10402: inputStream->buf = buf;
10403: inputStream->base = inputStream->buf->buffer->content;
10404: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10405:
1.40 daniel 10406: inputPush(ctxt, inputStream);
1.110 daniel 10407: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10408: directory = xmlParserGetDirectory(filename);
10409: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10410: ctxt->directory = directory;
1.106 daniel 10411:
1.69 daniel 10412: return(ctxt);
10413: }
10414:
10415: /**
1.181 daniel 10416: * xmlSAXParseFile:
1.69 daniel 10417: * @sax: the SAX handler block
10418: * @filename: the filename
10419: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10420: * documents
10421: *
10422: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10423: * compressed document is provided by default if found at compile-time.
10424: * It use the given SAX function block to handle the parsing callback.
10425: * If sax is NULL, fallback to the default DOM tree building routines.
10426: *
10427: * Returns the resulting document tree
10428: */
10429:
1.79 daniel 10430: xmlDocPtr
10431: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10432: int recovery) {
10433: xmlDocPtr ret;
10434: xmlParserCtxtPtr ctxt;
1.111 daniel 10435: char *directory = NULL;
1.69 daniel 10436:
10437: ctxt = xmlCreateFileParserCtxt(filename);
10438: if (ctxt == NULL) return(NULL);
1.74 daniel 10439: if (sax != NULL) {
1.93 veillard 10440: if (ctxt->sax != NULL)
1.119 daniel 10441: xmlFree(ctxt->sax);
1.74 daniel 10442: ctxt->sax = sax;
10443: ctxt->userData = NULL;
10444: }
1.106 daniel 10445:
1.110 daniel 10446: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10447: directory = xmlParserGetDirectory(filename);
10448: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10449: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10450:
10451: xmlParseDocument(ctxt);
1.40 daniel 10452:
1.72 daniel 10453: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10454: else {
10455: ret = NULL;
1.72 daniel 10456: xmlFreeDoc(ctxt->myDoc);
10457: ctxt->myDoc = NULL;
1.59 daniel 10458: }
1.86 daniel 10459: if (sax != NULL)
10460: ctxt->sax = NULL;
1.69 daniel 10461: xmlFreeParserCtxt(ctxt);
1.20 daniel 10462:
10463: return(ret);
10464: }
10465:
1.55 daniel 10466: /**
1.181 daniel 10467: * xmlParseFile:
1.55 daniel 10468: * @filename: the filename
10469: *
10470: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10471: * compressed document is provided by default if found at compile-time.
10472: *
1.68 daniel 10473: * Returns the resulting document tree
1.55 daniel 10474: */
10475:
1.79 daniel 10476: xmlDocPtr
10477: xmlParseFile(const char *filename) {
1.59 daniel 10478: return(xmlSAXParseFile(NULL, filename, 0));
10479: }
10480:
10481: /**
1.181 daniel 10482: * xmlRecoverFile:
1.59 daniel 10483: * @filename: the filename
10484: *
10485: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10486: * compressed document is provided by default if found at compile-time.
10487: * In the case the document is not Well Formed, a tree is built anyway
10488: *
1.68 daniel 10489: * Returns the resulting document tree
1.59 daniel 10490: */
10491:
1.79 daniel 10492: xmlDocPtr
10493: xmlRecoverFile(const char *filename) {
1.59 daniel 10494: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10495: }
1.32 daniel 10496:
1.50 daniel 10497: /**
1.181 daniel 10498: * xmlCreateMemoryParserCtxt:
10499: * @buffer: a pointer to a zero terminated char array
10500: * @size: the size of the array (without the trailing 0)
1.50 daniel 10501: *
1.69 daniel 10502: * Create a parser context for an XML in-memory document.
1.50 daniel 10503: *
1.69 daniel 10504: * Returns the new parser context or NULL
1.20 daniel 10505: */
1.69 daniel 10506: xmlParserCtxtPtr
10507: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10508: xmlParserCtxtPtr ctxt;
1.40 daniel 10509: xmlParserInputPtr input;
10510:
1.179 daniel 10511: if (buffer[size] != 0)
1.181 daniel 10512: return(NULL);
1.40 daniel 10513:
1.97 daniel 10514: ctxt = xmlNewParserCtxt();
1.181 daniel 10515: if (ctxt == NULL)
1.20 daniel 10516: return(NULL);
1.97 daniel 10517:
1.96 daniel 10518: input = xmlNewInputStream(ctxt);
1.40 daniel 10519: if (input == NULL) {
1.97 daniel 10520: xmlFreeParserCtxt(ctxt);
1.40 daniel 10521: return(NULL);
10522: }
1.20 daniel 10523:
1.40 daniel 10524: input->filename = NULL;
10525: input->line = 1;
10526: input->col = 1;
1.96 daniel 10527: input->buf = NULL;
1.91 daniel 10528: input->consumed = 0;
1.75 daniel 10529:
1.116 daniel 10530: input->base = BAD_CAST buffer;
10531: input->cur = BAD_CAST buffer;
1.69 daniel 10532: input->free = NULL;
1.20 daniel 10533:
1.40 daniel 10534: inputPush(ctxt, input);
1.69 daniel 10535: return(ctxt);
10536: }
10537:
10538: /**
1.181 daniel 10539: * xmlSAXParseMemory:
1.69 daniel 10540: * @sax: the SAX handler block
10541: * @buffer: an pointer to a char array
1.127 daniel 10542: * @size: the size of the array
10543: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10544: * documents
10545: *
10546: * parse an XML in-memory block and use the given SAX function block
10547: * to handle the parsing callback. If sax is NULL, fallback to the default
10548: * DOM tree building routines.
10549: *
10550: * Returns the resulting document tree
10551: */
10552: xmlDocPtr
10553: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10554: xmlDocPtr ret;
10555: xmlParserCtxtPtr ctxt;
10556:
10557: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10558: if (ctxt == NULL) return(NULL);
1.74 daniel 10559: if (sax != NULL) {
10560: ctxt->sax = sax;
10561: ctxt->userData = NULL;
10562: }
1.20 daniel 10563:
10564: xmlParseDocument(ctxt);
1.40 daniel 10565:
1.72 daniel 10566: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10567: else {
10568: ret = NULL;
1.72 daniel 10569: xmlFreeDoc(ctxt->myDoc);
10570: ctxt->myDoc = NULL;
1.59 daniel 10571: }
1.86 daniel 10572: if (sax != NULL)
10573: ctxt->sax = NULL;
1.69 daniel 10574: xmlFreeParserCtxt(ctxt);
1.16 daniel 10575:
1.9 httpng 10576: return(ret);
1.17 daniel 10577: }
10578:
1.55 daniel 10579: /**
1.181 daniel 10580: * xmlParseMemory:
1.68 daniel 10581: * @buffer: an pointer to a char array
1.55 daniel 10582: * @size: the size of the array
10583: *
10584: * parse an XML in-memory block and build a tree.
10585: *
1.68 daniel 10586: * Returns the resulting document tree
1.55 daniel 10587: */
10588:
10589: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10590: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10591: }
10592:
10593: /**
1.181 daniel 10594: * xmlRecoverMemory:
1.68 daniel 10595: * @buffer: an pointer to a char array
1.59 daniel 10596: * @size: the size of the array
10597: *
10598: * parse an XML in-memory block and build a tree.
10599: * In the case the document is not Well Formed, a tree is built anyway
10600: *
1.68 daniel 10601: * Returns the resulting document tree
1.59 daniel 10602: */
10603:
10604: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10605: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10606: }
10607:
10608:
1.50 daniel 10609: /**
10610: * xmlSetupParserForBuffer:
10611: * @ctxt: an XML parser context
1.123 daniel 10612: * @buffer: a xmlChar * buffer
1.50 daniel 10613: * @filename: a file name
10614: *
1.19 daniel 10615: * Setup the parser context to parse a new buffer; Clears any prior
10616: * contents from the parser context. The buffer parameter must not be
10617: * NULL, but the filename parameter can be
10618: */
1.55 daniel 10619: void
1.123 daniel 10620: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10621: const char* filename)
10622: {
1.96 daniel 10623: xmlParserInputPtr input;
1.40 daniel 10624:
1.96 daniel 10625: input = xmlNewInputStream(ctxt);
10626: if (input == NULL) {
10627: perror("malloc");
1.119 daniel 10628: xmlFree(ctxt);
1.145 daniel 10629: return;
1.96 daniel 10630: }
10631:
10632: xmlClearParserCtxt(ctxt);
10633: if (filename != NULL)
1.119 daniel 10634: input->filename = xmlMemStrdup(filename);
1.96 daniel 10635: input->base = buffer;
10636: input->cur = buffer;
10637: inputPush(ctxt, input);
1.17 daniel 10638: }
10639:
1.123 daniel 10640: /**
10641: * xmlSAXUserParseFile:
10642: * @sax: a SAX handler
10643: * @user_data: The user data returned on SAX callbacks
10644: * @filename: a file name
10645: *
10646: * parse an XML file and call the given SAX handler routines.
10647: * Automatic support for ZLIB/Compress compressed document is provided
10648: *
10649: * Returns 0 in case of success or a error number otherwise
10650: */
1.131 daniel 10651: int
10652: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10653: const char *filename) {
1.123 daniel 10654: int ret = 0;
10655: xmlParserCtxtPtr ctxt;
10656:
10657: ctxt = xmlCreateFileParserCtxt(filename);
10658: if (ctxt == NULL) return -1;
1.134 daniel 10659: if (ctxt->sax != &xmlDefaultSAXHandler)
10660: xmlFree(ctxt->sax);
1.123 daniel 10661: ctxt->sax = sax;
1.140 daniel 10662: if (user_data != NULL)
10663: ctxt->userData = user_data;
1.123 daniel 10664:
10665: xmlParseDocument(ctxt);
10666:
10667: if (ctxt->wellFormed)
10668: ret = 0;
10669: else {
10670: if (ctxt->errNo != 0)
10671: ret = ctxt->errNo;
10672: else
10673: ret = -1;
10674: }
10675: if (sax != NULL)
10676: ctxt->sax = NULL;
10677: xmlFreeParserCtxt(ctxt);
10678:
10679: return ret;
10680: }
10681:
10682: /**
10683: * xmlSAXUserParseMemory:
10684: * @sax: a SAX handler
10685: * @user_data: The user data returned on SAX callbacks
10686: * @buffer: an in-memory XML document input
1.127 daniel 10687: * @size: the length of the XML document in bytes
1.123 daniel 10688: *
10689: * A better SAX parsing routine.
10690: * parse an XML in-memory buffer and call the given SAX handler routines.
10691: *
10692: * Returns 0 in case of success or a error number otherwise
10693: */
10694: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10695: char *buffer, int size) {
10696: int ret = 0;
10697: xmlParserCtxtPtr ctxt;
10698:
10699: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10700: if (ctxt == NULL) return -1;
10701: ctxt->sax = sax;
10702: ctxt->userData = user_data;
10703:
10704: xmlParseDocument(ctxt);
10705:
10706: if (ctxt->wellFormed)
10707: ret = 0;
10708: else {
10709: if (ctxt->errNo != 0)
10710: ret = ctxt->errNo;
10711: else
10712: ret = -1;
10713: }
10714: if (sax != NULL)
10715: ctxt->sax = NULL;
10716: xmlFreeParserCtxt(ctxt);
10717:
10718: return ret;
10719: }
10720:
1.32 daniel 10721:
1.98 daniel 10722: /************************************************************************
10723: * *
1.127 daniel 10724: * Miscellaneous *
1.98 daniel 10725: * *
10726: ************************************************************************/
10727:
1.132 daniel 10728: /**
10729: * xmlCleanupParser:
10730: *
10731: * Cleanup function for the XML parser. It tries to reclaim all
10732: * parsing related global memory allocated for the parser processing.
10733: * It doesn't deallocate any document related memory. Calling this
10734: * function should not prevent reusing the parser.
10735: */
10736:
10737: void
10738: xmlCleanupParser(void) {
10739: xmlCleanupCharEncodingHandlers();
1.133 daniel 10740: xmlCleanupPredefinedEntities();
1.132 daniel 10741: }
1.98 daniel 10742:
1.50 daniel 10743: /**
10744: * xmlParserFindNodeInfo:
10745: * @ctxt: an XML parser context
10746: * @node: an XML node within the tree
10747: *
10748: * Find the parser node info struct for a given node
10749: *
1.68 daniel 10750: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10751: */
10752: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10753: const xmlNode* node)
10754: {
10755: unsigned long pos;
10756:
10757: /* Find position where node should be at */
10758: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10759: if ( ctx->node_seq.buffer[pos].node == node )
10760: return &ctx->node_seq.buffer[pos];
10761: else
10762: return NULL;
10763: }
10764:
10765:
1.50 daniel 10766: /**
1.181 daniel 10767: * xmlInitNodeInfoSeq:
1.50 daniel 10768: * @seq: a node info sequence pointer
10769: *
10770: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10771: */
1.55 daniel 10772: void
10773: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10774: {
10775: seq->length = 0;
10776: seq->maximum = 0;
10777: seq->buffer = NULL;
10778: }
10779:
1.50 daniel 10780: /**
1.181 daniel 10781: * xmlClearNodeInfoSeq:
1.50 daniel 10782: * @seq: a node info sequence pointer
10783: *
10784: * -- Clear (release memory and reinitialize) node
1.32 daniel 10785: * info sequence
10786: */
1.55 daniel 10787: void
10788: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10789: {
10790: if ( seq->buffer != NULL )
1.119 daniel 10791: xmlFree(seq->buffer);
1.32 daniel 10792: xmlInitNodeInfoSeq(seq);
10793: }
10794:
10795:
1.50 daniel 10796: /**
10797: * xmlParserFindNodeInfoIndex:
10798: * @seq: a node info sequence pointer
10799: * @node: an XML node pointer
10800: *
10801: *
1.32 daniel 10802: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10803: * the given node is or should be at in a sorted sequence
1.68 daniel 10804: *
10805: * Returns a long indicating the position of the record
1.32 daniel 10806: */
10807: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10808: const xmlNode* node)
10809: {
10810: unsigned long upper, lower, middle;
10811: int found = 0;
10812:
10813: /* Do a binary search for the key */
10814: lower = 1;
10815: upper = seq->length;
10816: middle = 0;
10817: while ( lower <= upper && !found) {
10818: middle = lower + (upper - lower) / 2;
10819: if ( node == seq->buffer[middle - 1].node )
10820: found = 1;
10821: else if ( node < seq->buffer[middle - 1].node )
10822: upper = middle - 1;
10823: else
10824: lower = middle + 1;
10825: }
10826:
10827: /* Return position */
10828: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10829: return middle;
10830: else
10831: return middle - 1;
10832: }
10833:
10834:
1.50 daniel 10835: /**
10836: * xmlParserAddNodeInfo:
10837: * @ctxt: an XML parser context
1.68 daniel 10838: * @info: a node info sequence pointer
1.50 daniel 10839: *
10840: * Insert node info record into the sorted sequence
1.32 daniel 10841: */
1.55 daniel 10842: void
10843: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10844: const xmlParserNodeInfo* info)
1.32 daniel 10845: {
10846: unsigned long pos;
10847: static unsigned int block_size = 5;
10848:
10849: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10850: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10851: if ( pos < ctxt->node_seq.length
10852: && ctxt->node_seq.buffer[pos].node == info->node ) {
10853: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10854: }
10855:
10856: /* Otherwise, we need to add new node to buffer */
10857: else {
10858: /* Expand buffer by 5 if needed */
1.55 daniel 10859: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10860: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10861: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10862: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10863:
1.55 daniel 10864: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10865: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10866: else
1.119 daniel 10867: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10868:
10869: if ( tmp_buffer == NULL ) {
1.55 daniel 10870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10871: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10872: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10873: return;
10874: }
1.55 daniel 10875: ctxt->node_seq.buffer = tmp_buffer;
10876: ctxt->node_seq.maximum += block_size;
1.32 daniel 10877: }
10878:
10879: /* If position is not at end, move elements out of the way */
1.55 daniel 10880: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10881: unsigned long i;
10882:
1.55 daniel 10883: for ( i = ctxt->node_seq.length; i > pos; i-- )
10884: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10885: }
10886:
10887: /* Copy element and increase length */
1.55 daniel 10888: ctxt->node_seq.buffer[pos] = *info;
10889: ctxt->node_seq.length++;
1.32 daniel 10890: }
10891: }
1.77 daniel 10892:
1.98 daniel 10893:
10894: /**
1.181 daniel 10895: * xmlSubstituteEntitiesDefault:
1.98 daniel 10896: * @val: int 0 or 1
10897: *
10898: * Set and return the previous value for default entity support.
10899: * Initially the parser always keep entity references instead of substituting
10900: * entity values in the output. This function has to be used to change the
10901: * default parser behaviour
10902: * SAX::subtituteEntities() has to be used for changing that on a file by
10903: * file basis.
10904: *
10905: * Returns the last value for 0 for no substitution, 1 for substitution.
10906: */
10907:
10908: int
10909: xmlSubstituteEntitiesDefault(int val) {
10910: int old = xmlSubstituteEntitiesDefaultValue;
10911:
10912: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10913: return(old);
10914: }
10915:
10916: /**
10917: * xmlKeepBlanksDefault:
10918: * @val: int 0 or 1
10919: *
10920: * Set and return the previous value for default blanks text nodes support.
10921: * The 1.x version of the parser used an heuristic to try to detect
10922: * ignorable white spaces. As a result the SAX callback was generating
10923: * ignorableWhitespace() callbacks instead of characters() one, and when
10924: * using the DOM output text nodes containing those blanks were not generated.
10925: * The 2.x and later version will switch to the XML standard way and
10926: * ignorableWhitespace() are only generated when running the parser in
10927: * validating mode and when the current element doesn't allow CDATA or
10928: * mixed content.
10929: * This function is provided as a way to force the standard behaviour
10930: * on 1.X libs and to switch back to the old mode for compatibility when
10931: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10932: * by using xmlIsBlankNode() commodity function to detect the "empty"
10933: * nodes generated.
10934: * This value also affect autogeneration of indentation when saving code
10935: * if blanks sections are kept, indentation is not generated.
10936: *
10937: * Returns the last value for 0 for no substitution, 1 for substitution.
10938: */
10939:
10940: int
10941: xmlKeepBlanksDefault(int val) {
10942: int old = xmlKeepBlanksDefaultValue;
10943:
10944: xmlKeepBlanksDefaultValue = val;
10945: xmlIndentTreeOutput = !val;
1.98 daniel 10946: return(old);
10947: }
1.77 daniel 10948:
Webmaster