Annotation of XML/parser.c, revision 1.201
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.200 daniel 65:
66: /*
67: * Version handling
68: */
69: const char *xmlParserVersion = LIBXML_VERSION_STRING;
70:
71: /*
72: * xmlCheckVersion:
73: * @version: the include version number
74: *
75: * check the compiled lib version against the include one.
76: * This can warn or immediately kill the application
77: */
78: void
79: xmlCheckVersion(int version) {
80: int myversion = LIBXML_VERSION;
81:
82: if ((myversion / 10000) != (version / 10000)) {
83: fprintf(stderr,
84: "Fatal: program compiled against libxml %d using libxml %d\n",
85: (version / 10000), (myversion / 10000));
86: exit(1);
87: }
88: if ((myversion / 100) < (version / 100)) {
89: fprintf(stderr,
90: "Warning: program compiled against libxml %d using older %d\n",
91: (version / 100), (myversion / 100));
92: }
93: }
94:
95:
1.91 daniel 96: /************************************************************************
97: * *
98: * Input handling functions for progressive parsing *
99: * *
100: ************************************************************************/
101:
102: /* #define DEBUG_INPUT */
1.140 daniel 103: /* #define DEBUG_STACK */
104: /* #define DEBUG_PUSH */
105:
1.91 daniel 106:
1.110 daniel 107: #define INPUT_CHUNK 250
108: /* we need to keep enough input to show errors in context */
109: #define LINE_LEN 80
1.91 daniel 110:
111: #ifdef DEBUG_INPUT
112: #define CHECK_BUFFER(in) check_buffer(in)
113:
114: void check_buffer(xmlParserInputPtr in) {
115: if (in->base != in->buf->buffer->content) {
116: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
117: }
118: if (in->cur < in->base) {
119: fprintf(stderr, "xmlParserInput: cur < base problem\n");
120: }
121: if (in->cur > in->base + in->buf->buffer->use) {
122: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
123: }
124: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
125: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
126: in->buf->buffer->use, in->buf->buffer->size);
127: }
128:
1.110 daniel 129: #else
130: #define CHECK_BUFFER(in)
131: #endif
132:
1.91 daniel 133:
134: /**
135: * xmlParserInputRead:
136: * @in: an XML parser input
137: * @len: an indicative size for the lookahead
138: *
139: * This function refresh the input for the parser. It doesn't try to
140: * preserve pointers to the input buffer, and discard already read data
141: *
1.123 daniel 142: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 143: * end of this entity
144: */
145: int
146: xmlParserInputRead(xmlParserInputPtr in, int len) {
147: int ret;
148: int used;
149: int index;
150:
151: #ifdef DEBUG_INPUT
152: fprintf(stderr, "Read\n");
153: #endif
154: if (in->buf == NULL) return(-1);
155: if (in->base == NULL) return(-1);
156: if (in->cur == NULL) return(-1);
157: if (in->buf->buffer == NULL) return(-1);
158:
159: CHECK_BUFFER(in);
160:
161: used = in->cur - in->buf->buffer->content;
162: ret = xmlBufferShrink(in->buf->buffer, used);
163: if (ret > 0) {
164: in->cur -= ret;
165: in->consumed += ret;
166: }
167: ret = xmlParserInputBufferRead(in->buf, len);
168: if (in->base != in->buf->buffer->content) {
169: /*
170: * the buffer has been realloced
171: */
172: index = in->cur - in->base;
173: in->base = in->buf->buffer->content;
174: in->cur = &in->buf->buffer->content[index];
175: }
176:
177: CHECK_BUFFER(in);
178:
179: return(ret);
180: }
181:
182: /**
183: * xmlParserInputGrow:
184: * @in: an XML parser input
185: * @len: an indicative size for the lookahead
186: *
187: * This function increase the input for the parser. It tries to
188: * preserve pointers to the input buffer, and keep already read data
189: *
1.123 daniel 190: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 191: * end of this entity
192: */
193: int
194: xmlParserInputGrow(xmlParserInputPtr in, int len) {
195: int ret;
196: int index;
197:
198: #ifdef DEBUG_INPUT
199: fprintf(stderr, "Grow\n");
200: #endif
201: if (in->buf == NULL) return(-1);
202: if (in->base == NULL) return(-1);
203: if (in->cur == NULL) return(-1);
204: if (in->buf->buffer == NULL) return(-1);
205:
206: CHECK_BUFFER(in);
207:
208: index = in->cur - in->base;
209: if (in->buf->buffer->use > index + INPUT_CHUNK) {
210:
211: CHECK_BUFFER(in);
212:
213: return(0);
214: }
1.189 daniel 215: if (in->buf->readcallback != NULL)
1.140 daniel 216: ret = xmlParserInputBufferGrow(in->buf, len);
217: else
218: return(0);
1.135 daniel 219:
220: /*
221: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
222: * block, but we use it really as an integer to do some
223: * pointer arithmetic. Insure will raise it as a bug but in
224: * that specific case, that's not !
225: */
1.91 daniel 226: if (in->base != in->buf->buffer->content) {
227: /*
228: * the buffer has been realloced
229: */
230: index = in->cur - in->base;
231: in->base = in->buf->buffer->content;
232: in->cur = &in->buf->buffer->content[index];
233: }
234:
235: CHECK_BUFFER(in);
236:
237: return(ret);
238: }
239:
240: /**
241: * xmlParserInputShrink:
242: * @in: an XML parser input
243: *
244: * This function removes used input for the parser.
245: */
246: void
247: xmlParserInputShrink(xmlParserInputPtr in) {
248: int used;
249: int ret;
250: int index;
251:
252: #ifdef DEBUG_INPUT
253: fprintf(stderr, "Shrink\n");
254: #endif
255: if (in->buf == NULL) return;
256: if (in->base == NULL) return;
257: if (in->cur == NULL) return;
258: if (in->buf->buffer == NULL) return;
259:
260: CHECK_BUFFER(in);
261:
262: used = in->cur - in->buf->buffer->content;
263: if (used > INPUT_CHUNK) {
1.110 daniel 264: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 265: if (ret > 0) {
266: in->cur -= ret;
267: in->consumed += ret;
268: }
269: }
270:
271: CHECK_BUFFER(in);
272:
273: if (in->buf->buffer->use > INPUT_CHUNK) {
274: return;
275: }
276: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
277: if (in->base != in->buf->buffer->content) {
278: /*
279: * the buffer has been realloced
280: */
281: index = in->cur - in->base;
282: in->base = in->buf->buffer->content;
283: in->cur = &in->buf->buffer->content[index];
284: }
285:
286: CHECK_BUFFER(in);
287: }
288:
1.45 daniel 289: /************************************************************************
290: * *
291: * Parser stacks related functions and macros *
292: * *
293: ************************************************************************/
1.79 daniel 294:
295: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 296: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 297: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 298: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
299: const xmlChar ** str);
1.79 daniel 300:
1.1 veillard 301: /*
1.40 daniel 302: * Generic function for accessing stacks in the Parser Context
1.1 veillard 303: */
304:
1.140 daniel 305: #define PUSH_AND_POP(scope, type, name) \
306: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 307: if (ctxt->name##Nr >= ctxt->name##Max) { \
308: ctxt->name##Max *= 2; \
1.119 daniel 309: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 310: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
311: if (ctxt->name##Tab == NULL) { \
1.31 daniel 312: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 313: return(0); \
1.31 daniel 314: } \
315: } \
1.40 daniel 316: ctxt->name##Tab[ctxt->name##Nr] = value; \
317: ctxt->name = value; \
318: return(ctxt->name##Nr++); \
1.31 daniel 319: } \
1.140 daniel 320: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 321: type ret; \
1.40 daniel 322: if (ctxt->name##Nr <= 0) return(0); \
323: ctxt->name##Nr--; \
1.50 daniel 324: if (ctxt->name##Nr > 0) \
325: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
326: else \
327: ctxt->name = NULL; \
1.69 daniel 328: ret = ctxt->name##Tab[ctxt->name##Nr]; \
329: ctxt->name##Tab[ctxt->name##Nr] = 0; \
330: return(ret); \
1.31 daniel 331: } \
332:
1.140 daniel 333: PUSH_AND_POP(extern, xmlParserInputPtr, input)
334: PUSH_AND_POP(extern, xmlNodePtr, node)
335: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 336:
1.176 daniel 337: int spacePush(xmlParserCtxtPtr ctxt, int val) {
338: if (ctxt->spaceNr >= ctxt->spaceMax) {
339: ctxt->spaceMax *= 2;
340: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
341: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
342: if (ctxt->spaceTab == NULL) {
343: fprintf(stderr, "realloc failed !\n");
344: return(0);
345: }
346: }
347: ctxt->spaceTab[ctxt->spaceNr] = val;
348: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
349: return(ctxt->spaceNr++);
350: }
351:
352: int spacePop(xmlParserCtxtPtr ctxt) {
353: int ret;
354: if (ctxt->spaceNr <= 0) return(0);
355: ctxt->spaceNr--;
356: if (ctxt->spaceNr > 0)
357: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
358: else
359: ctxt->space = NULL;
360: ret = ctxt->spaceTab[ctxt->spaceNr];
361: ctxt->spaceTab[ctxt->spaceNr] = -1;
362: return(ret);
363: }
364:
1.55 daniel 365: /*
366: * Macros for accessing the content. Those should be used only by the parser,
367: * and not exported.
368: *
369: * Dirty macros, i.e. one need to make assumption on the context to use them
370: *
1.123 daniel 371: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 372: * To be used with extreme caution since operations consuming
373: * characters may move the input buffer to a different location !
1.123 daniel 374: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 375: * in ISO-Latin or UTF-8.
1.151 daniel 376: * This should be used internally by the parser
1.55 daniel 377: * only to compare to ASCII values otherwise it would break when
378: * running with UTF-8 encoding.
1.123 daniel 379: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 380: * to compare on ASCII based substring.
1.123 daniel 381: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 382: * strings within the parser.
383: *
1.77 daniel 384: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 385: *
386: * NEXT Skip to the next character, this does the proper decoding
387: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 388: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 389: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 390: */
1.45 daniel 391:
1.152 daniel 392: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 393: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 394: #define NXT(val) ctxt->input->cur[(val)]
395: #define CUR_PTR ctxt->input->cur
1.154 daniel 396:
1.164 daniel 397: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
398: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 399: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
400: if ((*ctxt->input->cur == 0) && \
401: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
402: xmlPopInput(ctxt)
1.164 daniel 403:
1.97 daniel 404: #define SHRINK xmlParserInputShrink(ctxt->input); \
405: if ((*ctxt->input->cur == 0) && \
406: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
407: xmlPopInput(ctxt)
408:
409: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
410: if ((*ctxt->input->cur == 0) && \
411: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
412: xmlPopInput(ctxt)
1.55 daniel 413:
1.155 daniel 414: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 415:
1.151 daniel 416: #define NEXT xmlNextChar(ctxt);
1.154 daniel 417:
1.153 daniel 418: #define NEXTL(l) \
419: if (*(ctxt->input->cur) == '\n') { \
420: ctxt->input->line++; ctxt->input->col = 1; \
421: } else ctxt->input->col++; \
1.154 daniel 422: ctxt->token = 0; ctxt->input->cur += l; \
423: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
424: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
425:
1.152 daniel 426: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 427: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 428:
1.152 daniel 429: #define COPY_BUF(l,b,i,v) \
430: if (l == 1) b[i++] = (xmlChar) v; \
431: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 432:
433: /**
434: * xmlNextChar:
435: * @ctxt: the XML parser context
436: *
437: * Skip to the next char input char.
438: */
1.55 daniel 439:
1.151 daniel 440: void
441: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.201 ! daniel 442: if (ctxt->instate == XML_PARSER_EOF)
! 443: return;
! 444:
1.176 daniel 445: /*
446: * TODO: 2.11 End-of-Line Handling
447: * the literal two-character sequence "#xD#xA" or a standalone
448: * literal #xD, an XML processor must pass to the application
449: * the single character #xA.
450: */
1.151 daniel 451: if (ctxt->token != 0) ctxt->token = 0;
452: else {
453: if ((*ctxt->input->cur == 0) &&
454: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
455: (ctxt->instate != XML_PARSER_COMMENT)) {
456: /*
457: * If we are at the end of the current entity and
458: * the context allows it, we pop consumed entities
459: * automatically.
460: * TODO: the auto closing should be blocked in other cases
461: */
462: xmlPopInput(ctxt);
463: } else {
464: if (*(ctxt->input->cur) == '\n') {
465: ctxt->input->line++; ctxt->input->col = 1;
466: } else ctxt->input->col++;
1.198 daniel 467: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 468: /*
469: * We are supposed to handle UTF8, check it's valid
470: * From rfc2044: encoding of the Unicode values on UTF-8:
471: *
472: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
473: * 0000 0000-0000 007F 0xxxxxxx
474: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
475: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
476: *
1.160 daniel 477: * Check for the 0x110000 limit too
1.151 daniel 478: */
479: const unsigned char *cur = ctxt->input->cur;
480: unsigned char c;
1.91 daniel 481:
1.151 daniel 482: c = *cur;
483: if (c & 0x80) {
484: if (cur[1] == 0)
485: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486: if ((cur[1] & 0xc0) != 0x80)
487: goto encoding_error;
488: if ((c & 0xe0) == 0xe0) {
489: unsigned int val;
490:
491: if (cur[2] == 0)
492: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
493: if ((cur[2] & 0xc0) != 0x80)
494: goto encoding_error;
495: if ((c & 0xf0) == 0xf0) {
496: if (cur[3] == 0)
497: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
498: if (((c & 0xf8) != 0xf0) ||
499: ((cur[3] & 0xc0) != 0x80))
500: goto encoding_error;
501: /* 4-byte code */
502: ctxt->input->cur += 4;
503: val = (cur[0] & 0x7) << 18;
504: val |= (cur[1] & 0x3f) << 12;
505: val |= (cur[2] & 0x3f) << 6;
506: val |= cur[3] & 0x3f;
507: } else {
508: /* 3-byte code */
509: ctxt->input->cur += 3;
510: val = (cur[0] & 0xf) << 12;
511: val |= (cur[1] & 0x3f) << 6;
512: val |= cur[2] & 0x3f;
513: }
514: if (((val > 0xd7ff) && (val < 0xe000)) ||
515: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 516: (val >= 0x110000)) {
1.151 daniel 517: if ((ctxt->sax != NULL) &&
518: (ctxt->sax->error != NULL))
519: ctxt->sax->error(ctxt->userData,
1.196 daniel 520: "Char 0x%X out of allowed range\n", val);
1.151 daniel 521: ctxt->errNo = XML_ERR_INVALID_ENCODING;
522: ctxt->wellFormed = 0;
1.180 daniel 523: ctxt->disableSAX = 1;
1.151 daniel 524: }
525: } else
526: /* 2-byte code */
527: ctxt->input->cur += 2;
528: } else
529: /* 1-byte code */
530: ctxt->input->cur++;
531: } else {
532: /*
533: * Assume it's a fixed lenght encoding (1) with
534: * a compatibke encoding for the ASCII set, since
535: * XML constructs only use < 128 chars
536: */
537: ctxt->input->cur++;
538: }
539: ctxt->nbChars++;
540: if (*ctxt->input->cur == 0)
541: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
542: }
543: }
1.154 daniel 544: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
545: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 546: if ((*ctxt->input->cur == 0) &&
547: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
548: xmlPopInput(ctxt);
1.151 daniel 549: return;
550: encoding_error:
551: /*
552: * If we detect an UTF8 error that probably mean that the
553: * input encoding didn't get properly advertized in the
554: * declaration header. Report the error and switch the encoding
555: * to ISO-Latin-1 (if you don't like this policy, just declare the
556: * encoding !)
557: */
1.198 daniel 558: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.151 daniel 559: ctxt->sax->error(ctxt->userData,
560: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 561: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
562: ctxt->input->cur[0], ctxt->input->cur[1],
563: ctxt->input->cur[2], ctxt->input->cur[3]);
564: }
1.151 daniel 565: ctxt->errNo = XML_ERR_INVALID_ENCODING;
566:
1.198 daniel 567: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.151 daniel 568: ctxt->input->cur++;
569: return;
570: }
1.42 daniel 571:
1.152 daniel 572: /**
573: * xmlCurrentChar:
574: * @ctxt: the XML parser context
575: * @len: pointer to the length of the char read
576: *
577: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 578: * bytes in the input buffer. Implement the end of line normalization:
579: * 2.11 End-of-Line Handling
580: * Wherever an external parsed entity or the literal entity value
581: * of an internal parsed entity contains either the literal two-character
582: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
583: * must pass to the application the single character #xA.
584: * This behavior can conveniently be produced by normalizing all
585: * line breaks to #xA on input, before parsing.)
1.152 daniel 586: *
587: * Returns the current char value and its lenght
588: */
589:
590: int
591: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1.201 ! daniel 592: if (ctxt->instate == XML_PARSER_EOF)
! 593: return(0);
! 594:
1.152 daniel 595: if (ctxt->token != 0) {
596: *len = 0;
597: return(ctxt->token);
598: }
1.198 daniel 599: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.152 daniel 600: /*
601: * We are supposed to handle UTF8, check it's valid
602: * From rfc2044: encoding of the Unicode values on UTF-8:
603: *
604: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
605: * 0000 0000-0000 007F 0xxxxxxx
606: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
607: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
608: *
1.160 daniel 609: * Check for the 0x110000 limit too
1.152 daniel 610: */
611: const unsigned char *cur = ctxt->input->cur;
612: unsigned char c;
613: unsigned int val;
614:
615: c = *cur;
616: if (c & 0x80) {
617: if (cur[1] == 0)
618: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
619: if ((cur[1] & 0xc0) != 0x80)
620: goto encoding_error;
621: if ((c & 0xe0) == 0xe0) {
622:
623: if (cur[2] == 0)
624: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
625: if ((cur[2] & 0xc0) != 0x80)
626: goto encoding_error;
627: if ((c & 0xf0) == 0xf0) {
628: if (cur[3] == 0)
629: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
630: if (((c & 0xf8) != 0xf0) ||
631: ((cur[3] & 0xc0) != 0x80))
632: goto encoding_error;
633: /* 4-byte code */
634: *len = 4;
635: val = (cur[0] & 0x7) << 18;
636: val |= (cur[1] & 0x3f) << 12;
637: val |= (cur[2] & 0x3f) << 6;
638: val |= cur[3] & 0x3f;
639: } else {
640: /* 3-byte code */
641: *len = 3;
642: val = (cur[0] & 0xf) << 12;
643: val |= (cur[1] & 0x3f) << 6;
644: val |= cur[2] & 0x3f;
645: }
646: } else {
647: /* 2-byte code */
648: *len = 2;
649: val = (cur[0] & 0x1f) << 6;
1.168 daniel 650: val |= cur[1] & 0x3f;
1.152 daniel 651: }
652: if (!IS_CHAR(val)) {
653: if ((ctxt->sax != NULL) &&
654: (ctxt->sax->error != NULL))
655: ctxt->sax->error(ctxt->userData,
1.196 daniel 656: "Char 0x%X out of allowed range\n", val);
1.152 daniel 657: ctxt->errNo = XML_ERR_INVALID_ENCODING;
658: ctxt->wellFormed = 0;
1.180 daniel 659: ctxt->disableSAX = 1;
1.152 daniel 660: }
661: return(val);
662: } else {
663: /* 1-byte code */
664: *len = 1;
1.180 daniel 665: if (*ctxt->input->cur == 0xD) {
666: if (ctxt->input->cur[1] == 0xA) {
667: ctxt->nbChars++;
668: ctxt->input->cur++;
669: }
670: return(0xA);
671: }
1.152 daniel 672: return((int) *ctxt->input->cur);
673: }
674: }
675: /*
676: * Assume it's a fixed lenght encoding (1) with
677: * a compatibke encoding for the ASCII set, since
678: * XML constructs only use < 128 chars
679: */
680: *len = 1;
1.180 daniel 681: if (*ctxt->input->cur == 0xD) {
682: if (ctxt->input->cur[1] == 0xA) {
683: ctxt->nbChars++;
684: ctxt->input->cur++;
685: }
686: return(0xA);
687: }
1.152 daniel 688: return((int) *ctxt->input->cur);
689: encoding_error:
690: /*
691: * If we detect an UTF8 error that probably mean that the
692: * input encoding didn't get properly advertized in the
693: * declaration header. Report the error and switch the encoding
694: * to ISO-Latin-1 (if you don't like this policy, just declare the
695: * encoding !)
696: */
1.198 daniel 697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.152 daniel 698: ctxt->sax->error(ctxt->userData,
699: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 700: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
701: ctxt->input->cur[0], ctxt->input->cur[1],
702: ctxt->input->cur[2], ctxt->input->cur[3]);
703: }
1.152 daniel 704: ctxt->errNo = XML_ERR_INVALID_ENCODING;
705:
1.198 daniel 706: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.152 daniel 707: *len = 1;
708: return((int) *ctxt->input->cur);
709: }
710:
711: /**
1.162 daniel 712: * xmlStringCurrentChar:
713: * @ctxt: the XML parser context
714: * @cur: pointer to the beginning of the char
715: * @len: pointer to the length of the char read
716: *
717: * The current char value, if using UTF-8 this may actaully span multiple
718: * bytes in the input buffer.
719: *
720: * Returns the current char value and its lenght
721: */
722:
723: int
724: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1.198 daniel 725: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.162 daniel 726: /*
727: * We are supposed to handle UTF8, check it's valid
728: * From rfc2044: encoding of the Unicode values on UTF-8:
729: *
730: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
731: * 0000 0000-0000 007F 0xxxxxxx
732: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
733: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
734: *
735: * Check for the 0x110000 limit too
736: */
737: unsigned char c;
738: unsigned int val;
739:
740: c = *cur;
741: if (c & 0x80) {
742: if ((cur[1] & 0xc0) != 0x80)
743: goto encoding_error;
744: if ((c & 0xe0) == 0xe0) {
745:
746: if ((cur[2] & 0xc0) != 0x80)
747: goto encoding_error;
748: if ((c & 0xf0) == 0xf0) {
749: if (((c & 0xf8) != 0xf0) ||
750: ((cur[3] & 0xc0) != 0x80))
751: goto encoding_error;
752: /* 4-byte code */
753: *len = 4;
754: val = (cur[0] & 0x7) << 18;
755: val |= (cur[1] & 0x3f) << 12;
756: val |= (cur[2] & 0x3f) << 6;
757: val |= cur[3] & 0x3f;
758: } else {
759: /* 3-byte code */
760: *len = 3;
761: val = (cur[0] & 0xf) << 12;
762: val |= (cur[1] & 0x3f) << 6;
763: val |= cur[2] & 0x3f;
764: }
765: } else {
766: /* 2-byte code */
767: *len = 2;
768: val = (cur[0] & 0x1f) << 6;
769: val |= cur[2] & 0x3f;
770: }
771: if (!IS_CHAR(val)) {
772: if ((ctxt->sax != NULL) &&
773: (ctxt->sax->error != NULL))
774: ctxt->sax->error(ctxt->userData,
1.196 daniel 775: "Char 0x%X out of allowed range\n", val);
1.162 daniel 776: ctxt->errNo = XML_ERR_INVALID_ENCODING;
777: ctxt->wellFormed = 0;
1.180 daniel 778: ctxt->disableSAX = 1;
1.162 daniel 779: }
780: return(val);
781: } else {
782: /* 1-byte code */
783: *len = 1;
784: return((int) *cur);
785: }
786: }
787: /*
788: * Assume it's a fixed lenght encoding (1) with
789: * a compatibke encoding for the ASCII set, since
790: * XML constructs only use < 128 chars
791: */
792: *len = 1;
793: return((int) *cur);
794: encoding_error:
795: /*
796: * If we detect an UTF8 error that probably mean that the
797: * input encoding didn't get properly advertized in the
798: * declaration header. Report the error and switch the encoding
799: * to ISO-Latin-1 (if you don't like this policy, just declare the
800: * encoding !)
801: */
1.198 daniel 802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.162 daniel 803: ctxt->sax->error(ctxt->userData,
804: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 805: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
806: ctxt->input->cur[0], ctxt->input->cur[1],
807: ctxt->input->cur[2], ctxt->input->cur[3]);
808: }
1.162 daniel 809: ctxt->errNo = XML_ERR_INVALID_ENCODING;
810:
811: *len = 1;
812: return((int) *cur);
813: }
814:
815: /**
1.152 daniel 816: * xmlCopyChar:
817: * @len: pointer to the length of the char read (or zero)
818: * @array: pointer to an arry of xmlChar
819: * @val: the char value
820: *
821: * append the char value in the array
822: *
823: * Returns the number of xmlChar written
824: */
825:
826: int
827: xmlCopyChar(int len, xmlChar *out, int val) {
828: /*
829: * We are supposed to handle UTF8, check it's valid
830: * From rfc2044: encoding of the Unicode values on UTF-8:
831: *
832: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
833: * 0000 0000-0000 007F 0xxxxxxx
834: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
835: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
836: */
837: if (len == 0) {
838: if (val < 0) len = 0;
1.160 daniel 839: else if (val < 0x80) len = 1;
840: else if (val < 0x800) len = 2;
841: else if (val < 0x10000) len = 3;
842: else if (val < 0x110000) len = 4;
1.152 daniel 843: if (len == 0) {
844: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
845: val);
846: return(0);
847: }
848: }
849: if (len > 1) {
850: int bits;
851:
852: if (val < 0x80) { *out++= val; bits= -6; }
853: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
854: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
855: else { *out++= (val >> 18) | 0xF0; bits= 12; }
856:
857: for ( ; bits >= 0; bits-= 6)
858: *out++= ((val >> bits) & 0x3F) | 0x80 ;
859:
860: return(len);
861: }
862: *out = (xmlChar) val;
863: return(1);
1.155 daniel 864: }
865:
866: /**
867: * xmlSkipBlankChars:
868: * @ctxt: the XML parser context
869: *
870: * skip all blanks character found at that point in the input streams.
871: * It pops up finished entities in the process if allowable at that point.
872: *
873: * Returns the number of space chars skipped
874: */
875:
876: int
877: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
878: int cur, res = 0;
879:
880: do {
881: cur = CUR;
882: while (IS_BLANK(cur)) {
883: NEXT;
884: cur = CUR;
885: res++;
886: }
887: while ((cur == 0) && (ctxt->inputNr > 1) &&
888: (ctxt->instate != XML_PARSER_COMMENT)) {
889: xmlPopInput(ctxt);
890: cur = CUR;
891: }
892: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
893: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
894: } while (IS_BLANK(cur));
895: return(res);
1.152 daniel 896: }
897:
1.97 daniel 898: /************************************************************************
899: * *
900: * Commodity functions to handle entities processing *
901: * *
902: ************************************************************************/
1.40 daniel 903:
1.50 daniel 904: /**
905: * xmlPopInput:
906: * @ctxt: an XML parser context
907: *
1.40 daniel 908: * xmlPopInput: the current input pointed by ctxt->input came to an end
909: * pop it and return the next char.
1.45 daniel 910: *
1.123 daniel 911: * Returns the current xmlChar in the parser context
1.40 daniel 912: */
1.123 daniel 913: xmlChar
1.55 daniel 914: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 915: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 916: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 917: if ((*ctxt->input->cur == 0) &&
918: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
919: return(xmlPopInput(ctxt));
1.40 daniel 920: return(CUR);
921: }
922:
1.50 daniel 923: /**
924: * xmlPushInput:
925: * @ctxt: an XML parser context
926: * @input: an XML parser input fragment (entity, XML fragment ...).
927: *
1.40 daniel 928: * xmlPushInput: switch to a new input stream which is stacked on top
929: * of the previous one(s).
930: */
1.55 daniel 931: void
932: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 933: if (input == NULL) return;
934: inputPush(ctxt, input);
1.164 daniel 935: GROW;
1.40 daniel 936: }
937:
1.50 daniel 938: /**
1.69 daniel 939: * xmlFreeInputStream:
1.127 daniel 940: * @input: an xmlParserInputPtr
1.69 daniel 941: *
942: * Free up an input stream.
943: */
944: void
945: xmlFreeInputStream(xmlParserInputPtr input) {
946: if (input == NULL) return;
947:
1.119 daniel 948: if (input->filename != NULL) xmlFree((char *) input->filename);
949: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 950: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 951: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 952: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 953: input->free((xmlChar *) input->base);
1.93 veillard 954: if (input->buf != NULL)
955: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 956: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 957: xmlFree(input);
1.69 daniel 958: }
959:
960: /**
1.96 daniel 961: * xmlNewInputStream:
962: * @ctxt: an XML parser context
963: *
964: * Create a new input stream structure
965: * Returns the new input stream or NULL
966: */
967: xmlParserInputPtr
968: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
969: xmlParserInputPtr input;
970:
1.119 daniel 971: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 972: if (input == NULL) {
1.190 daniel 973: if (ctxt != NULL) {
974: ctxt->errNo = XML_ERR_NO_MEMORY;
975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
976: ctxt->sax->error(ctxt->userData,
977: "malloc: couldn't allocate a new input stream\n");
978: ctxt->errNo = XML_ERR_NO_MEMORY;
979: }
1.96 daniel 980: return(NULL);
981: }
1.165 daniel 982: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 983: input->line = 1;
984: input->col = 1;
1.167 daniel 985: input->standalone = -1;
1.96 daniel 986: return(input);
987: }
988:
989: /**
1.190 daniel 990: * xmlNewIOInputStream:
991: * @ctxt: an XML parser context
992: * @input: an I/O Input
993: * @enc: the charset encoding if known
994: *
995: * Create a new input stream structure encapsulating the @input into
996: * a stream suitable for the parser.
997: *
998: * Returns the new input stream or NULL
999: */
1000: xmlParserInputPtr
1001: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1002: xmlCharEncoding enc) {
1003: xmlParserInputPtr inputStream;
1004:
1005: inputStream = xmlNewInputStream(ctxt);
1006: if (inputStream == NULL) {
1007: return(NULL);
1008: }
1009: inputStream->filename = NULL;
1010: inputStream->buf = input;
1011: inputStream->base = inputStream->buf->buffer->content;
1012: inputStream->cur = inputStream->buf->buffer->content;
1013: if (enc != XML_CHAR_ENCODING_NONE) {
1014: xmlSwitchEncoding(ctxt, enc);
1015: }
1016:
1017: return(inputStream);
1018: }
1019:
1020: /**
1.50 daniel 1021: * xmlNewEntityInputStream:
1022: * @ctxt: an XML parser context
1023: * @entity: an Entity pointer
1024: *
1.82 daniel 1025: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 1026: *
1027: * Returns the new input stream or NULL
1.45 daniel 1028: */
1.50 daniel 1029: xmlParserInputPtr
1030: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1031: xmlParserInputPtr input;
1032:
1033: if (entity == NULL) {
1.123 daniel 1034: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1036: ctxt->sax->error(ctxt->userData,
1.45 daniel 1037: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 1038: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 1039: return(NULL);
1.45 daniel 1040: }
1041: if (entity->content == NULL) {
1.159 daniel 1042: switch (entity->etype) {
1.113 daniel 1043: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 1044: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 1045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1046: ctxt->sax->error(ctxt->userData,
1047: "xmlNewEntityInputStream unparsed entity !\n");
1048: break;
1049: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1050: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1051: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1052: (char *) entity->ExternalID, ctxt));
1.113 daniel 1053: case XML_INTERNAL_GENERAL_ENTITY:
1054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1055: ctxt->sax->error(ctxt->userData,
1056: "Internal entity %s without content !\n", entity->name);
1057: break;
1058: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1059: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1061: ctxt->sax->error(ctxt->userData,
1062: "Internal parameter entity %s without content !\n", entity->name);
1063: break;
1064: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1065: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1067: ctxt->sax->error(ctxt->userData,
1068: "Predefined entity %s without content !\n", entity->name);
1069: break;
1070: }
1.50 daniel 1071: return(NULL);
1.45 daniel 1072: }
1.96 daniel 1073: input = xmlNewInputStream(ctxt);
1.45 daniel 1074: if (input == NULL) {
1.50 daniel 1075: return(NULL);
1.45 daniel 1076: }
1.156 daniel 1077: input->filename = (char *) entity->SystemID;
1.45 daniel 1078: input->base = entity->content;
1079: input->cur = entity->content;
1.140 daniel 1080: input->length = entity->length;
1.50 daniel 1081: return(input);
1.45 daniel 1082: }
1083:
1.59 daniel 1084: /**
1085: * xmlNewStringInputStream:
1086: * @ctxt: an XML parser context
1.96 daniel 1087: * @buffer: an memory buffer
1.59 daniel 1088: *
1089: * Create a new input stream based on a memory buffer.
1.68 daniel 1090: * Returns the new input stream
1.59 daniel 1091: */
1092: xmlParserInputPtr
1.123 daniel 1093: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1094: xmlParserInputPtr input;
1095:
1.96 daniel 1096: if (buffer == NULL) {
1.123 daniel 1097: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1099: ctxt->sax->error(ctxt->userData,
1.59 daniel 1100: "internal: xmlNewStringInputStream string = NULL\n");
1101: return(NULL);
1102: }
1.96 daniel 1103: input = xmlNewInputStream(ctxt);
1.59 daniel 1104: if (input == NULL) {
1105: return(NULL);
1106: }
1.96 daniel 1107: input->base = buffer;
1108: input->cur = buffer;
1.140 daniel 1109: input->length = xmlStrlen(buffer);
1.59 daniel 1110: return(input);
1111: }
1112:
1.76 daniel 1113: /**
1114: * xmlNewInputFromFile:
1115: * @ctxt: an XML parser context
1116: * @filename: the filename to use as entity
1117: *
1118: * Create a new input stream based on a file.
1119: *
1120: * Returns the new input stream or NULL in case of error
1121: */
1122: xmlParserInputPtr
1.79 daniel 1123: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1124: xmlParserInputBufferPtr buf;
1.76 daniel 1125: xmlParserInputPtr inputStream;
1.111 daniel 1126: char *directory = NULL;
1.76 daniel 1127:
1.96 daniel 1128: if (ctxt == NULL) return(NULL);
1.91 daniel 1129: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1130: if (buf == NULL) {
1.140 daniel 1131: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1132:
1.94 daniel 1133: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1134: #ifdef WIN32
1135: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1136: #else
1137: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1138: #endif
1139: buf = xmlParserInputBufferCreateFilename(name,
1140: XML_CHAR_ENCODING_NONE);
1.106 daniel 1141: if (buf != NULL)
1.142 daniel 1142: directory = xmlParserGetDirectory(name);
1.106 daniel 1143: }
1144: if ((buf == NULL) && (ctxt->directory != NULL)) {
1145: #ifdef WIN32
1146: sprintf(name, "%s\\%s", ctxt->directory, filename);
1147: #else
1148: sprintf(name, "%s/%s", ctxt->directory, filename);
1149: #endif
1150: buf = xmlParserInputBufferCreateFilename(name,
1151: XML_CHAR_ENCODING_NONE);
1152: if (buf != NULL)
1.142 daniel 1153: directory = xmlParserGetDirectory(name);
1.106 daniel 1154: }
1155: if (buf == NULL)
1.94 daniel 1156: return(NULL);
1157: }
1158: if (directory == NULL)
1159: directory = xmlParserGetDirectory(filename);
1.76 daniel 1160:
1.96 daniel 1161: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1162: if (inputStream == NULL) {
1.119 daniel 1163: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1164: return(NULL);
1165: }
1166:
1.119 daniel 1167: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1168: inputStream->directory = directory;
1.91 daniel 1169: inputStream->buf = buf;
1.76 daniel 1170:
1.91 daniel 1171: inputStream->base = inputStream->buf->buffer->content;
1172: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1173: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1174: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1175: return(inputStream);
1176: }
1177:
1.77 daniel 1178: /************************************************************************
1179: * *
1.97 daniel 1180: * Commodity functions to handle parser contexts *
1181: * *
1182: ************************************************************************/
1183:
1184: /**
1185: * xmlInitParserCtxt:
1186: * @ctxt: an XML parser context
1187: *
1188: * Initialize a parser context
1189: */
1190:
1191: void
1192: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1193: {
1194: xmlSAXHandler *sax;
1195:
1.168 daniel 1196: xmlDefaultSAXHandlerInit();
1197:
1.119 daniel 1198: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1199: if (sax == NULL) {
1200: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1201: }
1.180 daniel 1202: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1203:
1204: /* Allocate the Input stack */
1.119 daniel 1205: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1206: ctxt->inputNr = 0;
1207: ctxt->inputMax = 5;
1208: ctxt->input = NULL;
1.165 daniel 1209:
1.97 daniel 1210: ctxt->version = NULL;
1211: ctxt->encoding = NULL;
1212: ctxt->standalone = -1;
1.98 daniel 1213: ctxt->hasExternalSubset = 0;
1214: ctxt->hasPErefs = 0;
1.97 daniel 1215: ctxt->html = 0;
1.98 daniel 1216: ctxt->external = 0;
1.140 daniel 1217: ctxt->instate = XML_PARSER_START;
1.97 daniel 1218: ctxt->token = 0;
1.106 daniel 1219: ctxt->directory = NULL;
1.97 daniel 1220:
1221: /* Allocate the Node stack */
1.119 daniel 1222: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1223: ctxt->nodeNr = 0;
1224: ctxt->nodeMax = 10;
1225: ctxt->node = NULL;
1226:
1.140 daniel 1227: /* Allocate the Name stack */
1228: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1229: ctxt->nameNr = 0;
1230: ctxt->nameMax = 10;
1231: ctxt->name = NULL;
1232:
1.176 daniel 1233: /* Allocate the space stack */
1234: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1235: ctxt->spaceNr = 1;
1236: ctxt->spaceMax = 10;
1237: ctxt->spaceTab[0] = -1;
1238: ctxt->space = &ctxt->spaceTab[0];
1239:
1.160 daniel 1240: if (sax == NULL) {
1241: ctxt->sax = &xmlDefaultSAXHandler;
1242: } else {
1.97 daniel 1243: ctxt->sax = sax;
1244: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1245: }
1246: ctxt->userData = ctxt;
1247: ctxt->myDoc = NULL;
1248: ctxt->wellFormed = 1;
1.99 daniel 1249: ctxt->valid = 1;
1.100 daniel 1250: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1251: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1252: ctxt->vctxt.userData = ctxt;
1.149 daniel 1253: if (ctxt->validate) {
1254: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1255: if (xmlGetWarningsDefaultValue == 0)
1256: ctxt->vctxt.warning = NULL;
1257: else
1258: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1259: /* Allocate the Node stack */
1260: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1261: ctxt->vctxt.nodeNr = 0;
1262: ctxt->vctxt.nodeMax = 4;
1263: ctxt->vctxt.node = NULL;
1.149 daniel 1264: } else {
1265: ctxt->vctxt.error = NULL;
1266: ctxt->vctxt.warning = NULL;
1267: }
1.97 daniel 1268: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1269: ctxt->record_info = 0;
1.135 daniel 1270: ctxt->nbChars = 0;
1.140 daniel 1271: ctxt->checkIndex = 0;
1.180 daniel 1272: ctxt->inSubset = 0;
1.140 daniel 1273: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1274: ctxt->depth = 0;
1.198 daniel 1275: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.97 daniel 1276: xmlInitNodeInfoSeq(&ctxt->node_seq);
1277: }
1278:
1279: /**
1280: * xmlFreeParserCtxt:
1281: * @ctxt: an XML parser context
1282: *
1283: * Free all the memory used by a parser context. However the parsed
1284: * document in ctxt->myDoc is not freed.
1285: */
1286:
1287: void
1288: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1289: {
1290: xmlParserInputPtr input;
1.140 daniel 1291: xmlChar *oldname;
1.97 daniel 1292:
1293: if (ctxt == NULL) return;
1294:
1295: while ((input = inputPop(ctxt)) != NULL) {
1296: xmlFreeInputStream(input);
1297: }
1.140 daniel 1298: while ((oldname = namePop(ctxt)) != NULL) {
1299: xmlFree(oldname);
1300: }
1.176 daniel 1301: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1302: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1303: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1304: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1305: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1306: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1307: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1308: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1309: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1310: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1311: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1312: xmlFree(ctxt->sax);
1313: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1314: xmlFree(ctxt);
1.97 daniel 1315: }
1316:
1317: /**
1318: * xmlNewParserCtxt:
1319: *
1320: * Allocate and initialize a new parser context.
1321: *
1322: * Returns the xmlParserCtxtPtr or NULL
1323: */
1324:
1325: xmlParserCtxtPtr
1326: xmlNewParserCtxt()
1327: {
1328: xmlParserCtxtPtr ctxt;
1329:
1.119 daniel 1330: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1331: if (ctxt == NULL) {
1332: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1333: perror("malloc");
1334: return(NULL);
1335: }
1.165 daniel 1336: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1337: xmlInitParserCtxt(ctxt);
1338: return(ctxt);
1339: }
1340:
1341: /**
1342: * xmlClearParserCtxt:
1343: * @ctxt: an XML parser context
1344: *
1345: * Clear (release owned resources) and reinitialize a parser context
1346: */
1347:
1348: void
1349: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1350: {
1351: xmlClearNodeInfoSeq(&ctxt->node_seq);
1352: xmlInitParserCtxt(ctxt);
1353: }
1354:
1355: /************************************************************************
1356: * *
1.77 daniel 1357: * Commodity functions to handle entities *
1358: * *
1359: ************************************************************************/
1360:
1.174 daniel 1361: /**
1362: * xmlCheckEntity:
1363: * @ctxt: an XML parser context
1364: * @content: the entity content string
1365: *
1366: * Parse an entity content and checks the WF constraints
1367: *
1368: */
1369:
1370: void
1371: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1372: }
1.97 daniel 1373:
1374: /**
1375: * xmlParseCharRef:
1376: * @ctxt: an XML parser context
1377: *
1378: * parse Reference declarations
1379: *
1380: * [66] CharRef ::= '&#' [0-9]+ ';' |
1381: * '&#x' [0-9a-fA-F]+ ';'
1382: *
1.98 daniel 1383: * [ WFC: Legal Character ]
1384: * Characters referred to using character references must match the
1385: * production for Char.
1386: *
1.135 daniel 1387: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1388: */
1.97 daniel 1389: int
1390: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1391: int val = 0;
1392:
1.111 daniel 1393: if (ctxt->token != 0) {
1394: val = ctxt->token;
1395: ctxt->token = 0;
1396: return(val);
1397: }
1.152 daniel 1398: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1399: (NXT(2) == 'x')) {
1400: SKIP(3);
1.152 daniel 1401: while (RAW != ';') {
1402: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1403: val = val * 16 + (CUR - '0');
1.152 daniel 1404: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1405: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1406: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1407: val = val * 16 + (CUR - 'A') + 10;
1408: else {
1.123 daniel 1409: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1411: ctxt->sax->error(ctxt->userData,
1412: "xmlParseCharRef: invalid hexadecimal value\n");
1413: ctxt->wellFormed = 0;
1.180 daniel 1414: ctxt->disableSAX = 1;
1.97 daniel 1415: val = 0;
1416: break;
1417: }
1418: NEXT;
1419: }
1.164 daniel 1420: if (RAW == ';') {
1421: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1422: ctxt->nbChars ++;
1423: ctxt->input->cur++;
1424: }
1.152 daniel 1425: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1426: SKIP(2);
1.152 daniel 1427: while (RAW != ';') {
1428: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1429: val = val * 10 + (CUR - '0');
1430: else {
1.123 daniel 1431: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1433: ctxt->sax->error(ctxt->userData,
1434: "xmlParseCharRef: invalid decimal value\n");
1435: ctxt->wellFormed = 0;
1.180 daniel 1436: ctxt->disableSAX = 1;
1.97 daniel 1437: val = 0;
1438: break;
1439: }
1440: NEXT;
1441: }
1.164 daniel 1442: if (RAW == ';') {
1443: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1444: ctxt->nbChars ++;
1445: ctxt->input->cur++;
1446: }
1.97 daniel 1447: } else {
1.123 daniel 1448: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1450: ctxt->sax->error(ctxt->userData,
1451: "xmlParseCharRef: invalid value\n");
1.97 daniel 1452: ctxt->wellFormed = 0;
1.180 daniel 1453: ctxt->disableSAX = 1;
1.97 daniel 1454: }
1.98 daniel 1455:
1.97 daniel 1456: /*
1.98 daniel 1457: * [ WFC: Legal Character ]
1458: * Characters referred to using character references must match the
1459: * production for Char.
1.97 daniel 1460: */
1461: if (IS_CHAR(val)) {
1462: return(val);
1463: } else {
1.123 daniel 1464: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1466: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1467: val);
1468: ctxt->wellFormed = 0;
1.180 daniel 1469: ctxt->disableSAX = 1;
1.97 daniel 1470: }
1471: return(0);
1.77 daniel 1472: }
1473:
1.96 daniel 1474: /**
1.135 daniel 1475: * xmlParseStringCharRef:
1476: * @ctxt: an XML parser context
1477: * @str: a pointer to an index in the string
1478: *
1479: * parse Reference declarations, variant parsing from a string rather
1480: * than an an input flow.
1481: *
1482: * [66] CharRef ::= '&#' [0-9]+ ';' |
1483: * '&#x' [0-9a-fA-F]+ ';'
1484: *
1485: * [ WFC: Legal Character ]
1486: * Characters referred to using character references must match the
1487: * production for Char.
1488: *
1489: * Returns the value parsed (as an int), 0 in case of error, str will be
1490: * updated to the current value of the index
1491: */
1492: int
1493: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1494: const xmlChar *ptr;
1495: xmlChar cur;
1496: int val = 0;
1497:
1498: if ((str == NULL) || (*str == NULL)) return(0);
1499: ptr = *str;
1500: cur = *ptr;
1.137 daniel 1501: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1502: ptr += 3;
1503: cur = *ptr;
1504: while (cur != ';') {
1505: if ((cur >= '0') && (cur <= '9'))
1506: val = val * 16 + (cur - '0');
1507: else if ((cur >= 'a') && (cur <= 'f'))
1508: val = val * 16 + (cur - 'a') + 10;
1509: else if ((cur >= 'A') && (cur <= 'F'))
1510: val = val * 16 + (cur - 'A') + 10;
1511: else {
1512: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1513: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1514: ctxt->sax->error(ctxt->userData,
1.198 daniel 1515: "xmlParseStringCharRef: invalid hexadecimal value\n");
1.135 daniel 1516: ctxt->wellFormed = 0;
1.180 daniel 1517: ctxt->disableSAX = 1;
1.135 daniel 1518: val = 0;
1519: break;
1520: }
1521: ptr++;
1522: cur = *ptr;
1523: }
1524: if (cur == ';')
1525: ptr++;
1.145 daniel 1526: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1527: ptr += 2;
1528: cur = *ptr;
1529: while (cur != ';') {
1530: if ((cur >= '0') && (cur <= '9'))
1531: val = val * 10 + (cur - '0');
1532: else {
1533: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1535: ctxt->sax->error(ctxt->userData,
1.198 daniel 1536: "xmlParseStringCharRef: invalid decimal value\n");
1.135 daniel 1537: ctxt->wellFormed = 0;
1.180 daniel 1538: ctxt->disableSAX = 1;
1.135 daniel 1539: val = 0;
1540: break;
1541: }
1542: ptr++;
1543: cur = *ptr;
1544: }
1545: if (cur == ';')
1546: ptr++;
1547: } else {
1548: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1550: ctxt->sax->error(ctxt->userData,
1551: "xmlParseCharRef: invalid value\n");
1552: ctxt->wellFormed = 0;
1.180 daniel 1553: ctxt->disableSAX = 1;
1.135 daniel 1554: return(0);
1555: }
1556: *str = ptr;
1557:
1558: /*
1559: * [ WFC: Legal Character ]
1560: * Characters referred to using character references must match the
1561: * production for Char.
1562: */
1563: if (IS_CHAR(val)) {
1564: return(val);
1565: } else {
1566: ctxt->errNo = XML_ERR_INVALID_CHAR;
1567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1568: ctxt->sax->error(ctxt->userData,
1569: "CharRef: invalid xmlChar value %d\n", val);
1570: ctxt->wellFormed = 0;
1.180 daniel 1571: ctxt->disableSAX = 1;
1.135 daniel 1572: }
1573: return(0);
1574: }
1575:
1576: /**
1.96 daniel 1577: * xmlParserHandleReference:
1578: * @ctxt: the parser context
1579: *
1.97 daniel 1580: * [67] Reference ::= EntityRef | CharRef
1581: *
1.96 daniel 1582: * [68] EntityRef ::= '&' Name ';'
1583: *
1.98 daniel 1584: * [ WFC: Entity Declared ]
1585: * the Name given in the entity reference must match that in an entity
1586: * declaration, except that well-formed documents need not declare any
1587: * of the following entities: amp, lt, gt, apos, quot.
1588: *
1589: * [ WFC: Parsed Entity ]
1590: * An entity reference must not contain the name of an unparsed entity
1591: *
1.97 daniel 1592: * [66] CharRef ::= '&#' [0-9]+ ';' |
1593: * '&#x' [0-9a-fA-F]+ ';'
1594: *
1.96 daniel 1595: * A PEReference may have been detectect in the current input stream
1596: * the handling is done accordingly to
1597: * http://www.w3.org/TR/REC-xml#entproc
1598: */
1599: void
1600: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1601: xmlParserInputPtr input;
1.123 daniel 1602: xmlChar *name;
1.97 daniel 1603: xmlEntityPtr ent = NULL;
1604:
1.126 daniel 1605: if (ctxt->token != 0) {
1606: return;
1607: }
1.152 daniel 1608: if (RAW != '&') return;
1.97 daniel 1609: GROW;
1.152 daniel 1610: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1611: switch(ctxt->instate) {
1.140 daniel 1612: case XML_PARSER_ENTITY_DECL:
1613: case XML_PARSER_PI:
1.109 daniel 1614: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1615: case XML_PARSER_COMMENT:
1.168 daniel 1616: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1617: /* we just ignore it there */
1618: return;
1619: case XML_PARSER_START_TAG:
1.109 daniel 1620: return;
1.140 daniel 1621: case XML_PARSER_END_TAG:
1.97 daniel 1622: return;
1623: case XML_PARSER_EOF:
1.123 daniel 1624: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1627: ctxt->wellFormed = 0;
1.180 daniel 1628: ctxt->disableSAX = 1;
1.97 daniel 1629: return;
1630: case XML_PARSER_PROLOG:
1.140 daniel 1631: case XML_PARSER_START:
1632: case XML_PARSER_MISC:
1.123 daniel 1633: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1634: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1635: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1636: ctxt->wellFormed = 0;
1.180 daniel 1637: ctxt->disableSAX = 1;
1.97 daniel 1638: return;
1639: case XML_PARSER_EPILOG:
1.123 daniel 1640: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1643: ctxt->wellFormed = 0;
1.180 daniel 1644: ctxt->disableSAX = 1;
1.97 daniel 1645: return;
1646: case XML_PARSER_DTD:
1.123 daniel 1647: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649: ctxt->sax->error(ctxt->userData,
1650: "CharRef are forbiden in DTDs!\n");
1651: ctxt->wellFormed = 0;
1.180 daniel 1652: ctxt->disableSAX = 1;
1.97 daniel 1653: return;
1654: case XML_PARSER_ENTITY_VALUE:
1655: /*
1656: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1657: * substitution here since we need the literal
1.97 daniel 1658: * entity value to be able to save the internal
1659: * subset of the document.
1660: * This will be handled by xmlDecodeEntities
1661: */
1662: return;
1663: case XML_PARSER_CONTENT:
1664: case XML_PARSER_ATTRIBUTE_VALUE:
1665: ctxt->token = xmlParseCharRef(ctxt);
1666: return;
1667: }
1668: return;
1669: }
1670:
1671: switch(ctxt->instate) {
1.109 daniel 1672: case XML_PARSER_CDATA_SECTION:
1673: return;
1.140 daniel 1674: case XML_PARSER_PI:
1.97 daniel 1675: case XML_PARSER_COMMENT:
1.168 daniel 1676: case XML_PARSER_SYSTEM_LITERAL:
1677: case XML_PARSER_CONTENT:
1.97 daniel 1678: return;
1.140 daniel 1679: case XML_PARSER_START_TAG:
1680: return;
1681: case XML_PARSER_END_TAG:
1682: return;
1.97 daniel 1683: case XML_PARSER_EOF:
1.123 daniel 1684: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1686: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1687: ctxt->wellFormed = 0;
1.180 daniel 1688: ctxt->disableSAX = 1;
1.97 daniel 1689: return;
1690: case XML_PARSER_PROLOG:
1.140 daniel 1691: case XML_PARSER_START:
1692: case XML_PARSER_MISC:
1.123 daniel 1693: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1694: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1695: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1696: ctxt->wellFormed = 0;
1.180 daniel 1697: ctxt->disableSAX = 1;
1.97 daniel 1698: return;
1699: case XML_PARSER_EPILOG:
1.123 daniel 1700: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1703: ctxt->wellFormed = 0;
1.180 daniel 1704: ctxt->disableSAX = 1;
1.97 daniel 1705: return;
1706: case XML_PARSER_ENTITY_VALUE:
1707: /*
1708: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1709: * substitution here since we need the literal
1.97 daniel 1710: * entity value to be able to save the internal
1711: * subset of the document.
1712: * This will be handled by xmlDecodeEntities
1713: */
1714: return;
1715: case XML_PARSER_ATTRIBUTE_VALUE:
1716: /*
1717: * NOTE: in the case of attributes values, we don't do the
1718: * substitution here unless we are in a mode where
1719: * the parser is explicitely asked to substitute
1720: * entities. The SAX callback is called with values
1721: * without entity substitution.
1722: * This will then be handled by xmlDecodeEntities
1723: */
1.113 daniel 1724: return;
1.97 daniel 1725: case XML_PARSER_ENTITY_DECL:
1726: /*
1727: * we just ignore it there
1728: * the substitution will be done once the entity is referenced
1729: */
1730: return;
1731: case XML_PARSER_DTD:
1.123 daniel 1732: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1733: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1734: ctxt->sax->error(ctxt->userData,
1735: "Entity references are forbiden in DTDs!\n");
1736: ctxt->wellFormed = 0;
1.180 daniel 1737: ctxt->disableSAX = 1;
1.97 daniel 1738: return;
1739: }
1740:
1741: NEXT;
1742: name = xmlScanName(ctxt);
1743: if (name == NULL) {
1.123 daniel 1744: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1746: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1747: ctxt->wellFormed = 0;
1.180 daniel 1748: ctxt->disableSAX = 1;
1.97 daniel 1749: ctxt->token = '&';
1750: return;
1751: }
1752: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1753: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1755: ctxt->sax->error(ctxt->userData,
1756: "Entity reference: ';' expected\n");
1757: ctxt->wellFormed = 0;
1.180 daniel 1758: ctxt->disableSAX = 1;
1.97 daniel 1759: ctxt->token = '&';
1.119 daniel 1760: xmlFree(name);
1.97 daniel 1761: return;
1762: }
1763: SKIP(xmlStrlen(name) + 1);
1764: if (ctxt->sax != NULL) {
1765: if (ctxt->sax->getEntity != NULL)
1766: ent = ctxt->sax->getEntity(ctxt->userData, name);
1767: }
1.98 daniel 1768:
1769: /*
1770: * [ WFC: Entity Declared ]
1771: * the Name given in the entity reference must match that in an entity
1772: * declaration, except that well-formed documents need not declare any
1773: * of the following entities: amp, lt, gt, apos, quot.
1774: */
1.97 daniel 1775: if (ent == NULL)
1776: ent = xmlGetPredefinedEntity(name);
1777: if (ent == NULL) {
1.123 daniel 1778: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1780: ctxt->sax->error(ctxt->userData,
1.98 daniel 1781: "Entity reference: entity %s not declared\n",
1782: name);
1.97 daniel 1783: ctxt->wellFormed = 0;
1.180 daniel 1784: ctxt->disableSAX = 1;
1.119 daniel 1785: xmlFree(name);
1.97 daniel 1786: return;
1787: }
1.98 daniel 1788:
1789: /*
1790: * [ WFC: Parsed Entity ]
1791: * An entity reference must not contain the name of an unparsed entity
1792: */
1.159 daniel 1793: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1794: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1796: ctxt->sax->error(ctxt->userData,
1797: "Entity reference to unparsed entity %s\n", name);
1798: ctxt->wellFormed = 0;
1.180 daniel 1799: ctxt->disableSAX = 1;
1.98 daniel 1800: }
1801:
1.159 daniel 1802: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1803: ctxt->token = ent->content[0];
1.119 daniel 1804: xmlFree(name);
1.97 daniel 1805: return;
1806: }
1807: input = xmlNewEntityInputStream(ctxt, ent);
1808: xmlPushInput(ctxt, input);
1.119 daniel 1809: xmlFree(name);
1.96 daniel 1810: return;
1811: }
1812:
1813: /**
1814: * xmlParserHandlePEReference:
1815: * @ctxt: the parser context
1816: *
1817: * [69] PEReference ::= '%' Name ';'
1818: *
1.98 daniel 1819: * [ WFC: No Recursion ]
1820: * TODO A parsed entity must not contain a recursive
1821: * reference to itself, either directly or indirectly.
1822: *
1823: * [ WFC: Entity Declared ]
1824: * In a document without any DTD, a document with only an internal DTD
1825: * subset which contains no parameter entity references, or a document
1826: * with "standalone='yes'", ... ... The declaration of a parameter
1827: * entity must precede any reference to it...
1828: *
1829: * [ VC: Entity Declared ]
1830: * In a document with an external subset or external parameter entities
1831: * with "standalone='no'", ... ... The declaration of a parameter entity
1832: * must precede any reference to it...
1833: *
1834: * [ WFC: In DTD ]
1835: * Parameter-entity references may only appear in the DTD.
1836: * NOTE: misleading but this is handled.
1837: *
1838: * A PEReference may have been detected in the current input stream
1.96 daniel 1839: * the handling is done accordingly to
1840: * http://www.w3.org/TR/REC-xml#entproc
1841: * i.e.
1842: * - Included in literal in entity values
1843: * - Included as Paraemeter Entity reference within DTDs
1844: */
1845: void
1846: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1847: xmlChar *name;
1.96 daniel 1848: xmlEntityPtr entity = NULL;
1849: xmlParserInputPtr input;
1850:
1.126 daniel 1851: if (ctxt->token != 0) {
1852: return;
1853: }
1.152 daniel 1854: if (RAW != '%') return;
1.96 daniel 1855: switch(ctxt->instate) {
1.109 daniel 1856: case XML_PARSER_CDATA_SECTION:
1857: return;
1.97 daniel 1858: case XML_PARSER_COMMENT:
1859: return;
1.140 daniel 1860: case XML_PARSER_START_TAG:
1861: return;
1862: case XML_PARSER_END_TAG:
1863: return;
1.96 daniel 1864: case XML_PARSER_EOF:
1.123 daniel 1865: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1866: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1867: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1868: ctxt->wellFormed = 0;
1.180 daniel 1869: ctxt->disableSAX = 1;
1.96 daniel 1870: return;
1871: case XML_PARSER_PROLOG:
1.140 daniel 1872: case XML_PARSER_START:
1873: case XML_PARSER_MISC:
1.123 daniel 1874: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1877: ctxt->wellFormed = 0;
1.180 daniel 1878: ctxt->disableSAX = 1;
1.96 daniel 1879: return;
1.97 daniel 1880: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1881: case XML_PARSER_CONTENT:
1882: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1883: case XML_PARSER_PI:
1.168 daniel 1884: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1885: /* we just ignore it there */
1886: return;
1887: case XML_PARSER_EPILOG:
1.123 daniel 1888: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1889: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1890: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1891: ctxt->wellFormed = 0;
1.180 daniel 1892: ctxt->disableSAX = 1;
1.96 daniel 1893: return;
1.97 daniel 1894: case XML_PARSER_ENTITY_VALUE:
1895: /*
1896: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1897: * substitution here since we need the literal
1.97 daniel 1898: * entity value to be able to save the internal
1899: * subset of the document.
1900: * This will be handled by xmlDecodeEntities
1901: */
1902: return;
1.96 daniel 1903: case XML_PARSER_DTD:
1.98 daniel 1904: /*
1905: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1906: * In the internal DTD subset, parameter-entity references
1907: * can occur only where markup declarations can occur, not
1908: * within markup declarations.
1909: * In that case this is handled in xmlParseMarkupDecl
1910: */
1911: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1912: return;
1.96 daniel 1913: }
1914:
1915: NEXT;
1916: name = xmlParseName(ctxt);
1917: if (name == NULL) {
1.123 daniel 1918: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1919: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1920: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1921: ctxt->wellFormed = 0;
1.180 daniel 1922: ctxt->disableSAX = 1;
1.96 daniel 1923: } else {
1.152 daniel 1924: if (RAW == ';') {
1.96 daniel 1925: NEXT;
1.98 daniel 1926: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1927: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1928: if (entity == NULL) {
1.98 daniel 1929:
1930: /*
1931: * [ WFC: Entity Declared ]
1932: * In a document without any DTD, a document with only an
1933: * internal DTD subset which contains no parameter entity
1934: * references, or a document with "standalone='yes'", ...
1935: * ... The declaration of a parameter entity must precede
1936: * any reference to it...
1937: */
1938: if ((ctxt->standalone == 1) ||
1939: ((ctxt->hasExternalSubset == 0) &&
1940: (ctxt->hasPErefs == 0))) {
1941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1942: ctxt->sax->error(ctxt->userData,
1943: "PEReference: %%%s; not found\n", name);
1944: ctxt->wellFormed = 0;
1.180 daniel 1945: ctxt->disableSAX = 1;
1.98 daniel 1946: } else {
1947: /*
1948: * [ VC: Entity Declared ]
1949: * In a document with an external subset or external
1950: * parameter entities with "standalone='no'", ...
1951: * ... The declaration of a parameter entity must precede
1952: * any reference to it...
1953: */
1954: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1955: ctxt->sax->warning(ctxt->userData,
1956: "PEReference: %%%s; not found\n", name);
1957: ctxt->valid = 0;
1958: }
1.96 daniel 1959: } else {
1.159 daniel 1960: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1961: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1962: /*
1.156 daniel 1963: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1964: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1965: */
1966: input = xmlNewEntityInputStream(ctxt, entity);
1967: xmlPushInput(ctxt, input);
1.164 daniel 1968: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1969: (RAW == '<') && (NXT(1) == '?') &&
1970: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1971: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1972: xmlParseTextDecl(ctxt);
1.164 daniel 1973: }
1974: if (ctxt->token == 0)
1975: ctxt->token = ' ';
1.96 daniel 1976: } else {
1977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1978: ctxt->sax->error(ctxt->userData,
1979: "xmlHandlePEReference: %s is not a parameter entity\n",
1980: name);
1981: ctxt->wellFormed = 0;
1.180 daniel 1982: ctxt->disableSAX = 1;
1.96 daniel 1983: }
1984: }
1985: } else {
1.123 daniel 1986: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1988: ctxt->sax->error(ctxt->userData,
1989: "xmlHandlePEReference: expecting ';'\n");
1990: ctxt->wellFormed = 0;
1.180 daniel 1991: ctxt->disableSAX = 1;
1.96 daniel 1992: }
1.119 daniel 1993: xmlFree(name);
1.97 daniel 1994: }
1995: }
1996:
1997: /*
1998: * Macro used to grow the current buffer.
1999: */
2000: #define growBuffer(buffer) { \
2001: buffer##_size *= 2; \
1.145 daniel 2002: buffer = (xmlChar *) \
2003: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 2004: if (buffer == NULL) { \
2005: perror("realloc failed"); \
1.145 daniel 2006: return(NULL); \
1.97 daniel 2007: } \
1.96 daniel 2008: }
1.77 daniel 2009:
2010: /**
2011: * xmlDecodeEntities:
2012: * @ctxt: the parser context
2013: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2014: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 2015: * @end: an end marker xmlChar, 0 if none
2016: * @end2: an end marker xmlChar, 0 if none
2017: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 2018: *
2019: * [67] Reference ::= EntityRef | CharRef
2020: *
2021: * [69] PEReference ::= '%' Name ';'
2022: *
2023: * Returns A newly allocated string with the substitution done. The caller
2024: * must deallocate it !
2025: */
1.123 daniel 2026: xmlChar *
1.77 daniel 2027: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 2028: xmlChar end, xmlChar end2, xmlChar end3) {
2029: xmlChar *buffer = NULL;
1.78 daniel 2030: int buffer_size = 0;
1.161 daniel 2031: int nbchars = 0;
1.78 daniel 2032:
1.123 daniel 2033: xmlChar *current = NULL;
1.77 daniel 2034: xmlEntityPtr ent;
2035: unsigned int max = (unsigned int) len;
1.161 daniel 2036: int c,l;
1.77 daniel 2037:
1.185 daniel 2038: if (ctxt->depth > 40) {
2039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2040: ctxt->sax->error(ctxt->userData,
2041: "Detected entity reference loop\n");
2042: ctxt->wellFormed = 0;
2043: ctxt->disableSAX = 1;
2044: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2045: return(NULL);
2046: }
2047:
1.77 daniel 2048: /*
2049: * allocate a translation buffer.
2050: */
1.140 daniel 2051: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2052: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2053: if (buffer == NULL) {
2054: perror("xmlDecodeEntities: malloc failed");
2055: return(NULL);
2056: }
2057:
1.78 daniel 2058: /*
2059: * Ok loop until we reach one of the ending char or a size limit.
2060: */
1.161 daniel 2061: c = CUR_CHAR(l);
2062: while ((nbchars < max) && (c != end) &&
2063: (c != end2) && (c != end3)) {
1.77 daniel 2064:
1.161 daniel 2065: if (c == 0) break;
2066: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2067: int val = xmlParseCharRef(ctxt);
1.161 daniel 2068: COPY_BUF(0,buffer,nbchars,val);
2069: NEXTL(l);
2070: } else if ((c == '&') && (ctxt->token != '&') &&
2071: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2072: ent = xmlParseEntityRef(ctxt);
2073: if ((ent != NULL) &&
2074: (ctxt->replaceEntities != 0)) {
2075: current = ent->content;
2076: while (*current != 0) {
1.161 daniel 2077: buffer[nbchars++] = *current++;
2078: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2079: growBuffer(buffer);
1.77 daniel 2080: }
2081: }
1.98 daniel 2082: } else if (ent != NULL) {
1.123 daniel 2083: const xmlChar *cur = ent->name;
1.98 daniel 2084:
1.161 daniel 2085: buffer[nbchars++] = '&';
2086: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2087: growBuffer(buffer);
2088: }
1.161 daniel 2089: while (*cur != 0) {
2090: buffer[nbchars++] = *cur++;
2091: }
2092: buffer[nbchars++] = ';';
1.77 daniel 2093: }
1.161 daniel 2094: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2095: /*
1.77 daniel 2096: * a PEReference induce to switch the entity flow,
2097: * we break here to flush the current set of chars
2098: * parsed if any. We will be called back later.
1.97 daniel 2099: */
1.91 daniel 2100: if (nbchars != 0) break;
1.77 daniel 2101:
2102: xmlParsePEReference(ctxt);
1.79 daniel 2103:
1.97 daniel 2104: /*
1.79 daniel 2105: * Pop-up of finished entities.
1.97 daniel 2106: */
1.152 daniel 2107: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2108: xmlPopInput(ctxt);
2109:
1.98 daniel 2110: break;
1.77 daniel 2111: } else {
1.161 daniel 2112: COPY_BUF(l,buffer,nbchars,c);
2113: NEXTL(l);
2114: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2115: growBuffer(buffer);
2116: }
1.77 daniel 2117: }
1.161 daniel 2118: c = CUR_CHAR(l);
1.77 daniel 2119: }
1.161 daniel 2120: buffer[nbchars++] = 0;
1.77 daniel 2121: return(buffer);
2122: }
2123:
1.135 daniel 2124: /**
2125: * xmlStringDecodeEntities:
2126: * @ctxt: the parser context
2127: * @str: the input string
2128: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2129: * @end: an end marker xmlChar, 0 if none
2130: * @end2: an end marker xmlChar, 0 if none
2131: * @end3: an end marker xmlChar, 0 if none
2132: *
2133: * [67] Reference ::= EntityRef | CharRef
2134: *
2135: * [69] PEReference ::= '%' Name ';'
2136: *
2137: * Returns A newly allocated string with the substitution done. The caller
2138: * must deallocate it !
2139: */
2140: xmlChar *
2141: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2142: xmlChar end, xmlChar end2, xmlChar end3) {
2143: xmlChar *buffer = NULL;
2144: int buffer_size = 0;
2145:
2146: xmlChar *current = NULL;
2147: xmlEntityPtr ent;
1.176 daniel 2148: int c,l;
2149: int nbchars = 0;
1.135 daniel 2150:
1.185 daniel 2151: if (ctxt->depth > 40) {
2152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153: ctxt->sax->error(ctxt->userData,
2154: "Detected entity reference loop\n");
2155: ctxt->wellFormed = 0;
2156: ctxt->disableSAX = 1;
2157: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2158: return(NULL);
2159: }
2160:
1.135 daniel 2161: /*
2162: * allocate a translation buffer.
2163: */
1.140 daniel 2164: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2165: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2166: if (buffer == NULL) {
2167: perror("xmlDecodeEntities: malloc failed");
2168: return(NULL);
2169: }
2170:
2171: /*
2172: * Ok loop until we reach one of the ending char or a size limit.
2173: */
1.176 daniel 2174: c = CUR_SCHAR(str, l);
2175: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2176:
1.176 daniel 2177: if (c == 0) break;
2178: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2179: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2180: if (val != 0) {
2181: COPY_BUF(0,buffer,nbchars,val);
2182: }
2183: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2184: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2185: if ((ent != NULL) && (ent->content != NULL)) {
2186: xmlChar *rep;
2187:
2188: ctxt->depth++;
2189: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2190: 0, 0, 0);
2191: ctxt->depth--;
2192: if (rep != NULL) {
2193: current = rep;
2194: while (*current != 0) {
2195: buffer[nbchars++] = *current++;
2196: if (nbchars >
2197: buffer_size - XML_PARSER_BUFFER_SIZE) {
2198: growBuffer(buffer);
2199: }
1.135 daniel 2200: }
1.185 daniel 2201: xmlFree(rep);
1.135 daniel 2202: }
2203: } else if (ent != NULL) {
2204: int i = xmlStrlen(ent->name);
2205: const xmlChar *cur = ent->name;
2206:
1.176 daniel 2207: buffer[nbchars++] = '&';
2208: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2209: growBuffer(buffer);
2210: }
2211: for (;i > 0;i--)
1.176 daniel 2212: buffer[nbchars++] = *cur++;
2213: buffer[nbchars++] = ';';
1.135 daniel 2214: }
1.176 daniel 2215: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2216: ent = xmlParseStringPEReference(ctxt, &str);
2217: if (ent != NULL) {
1.185 daniel 2218: xmlChar *rep;
2219:
2220: ctxt->depth++;
2221: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2222: 0, 0, 0);
2223: ctxt->depth--;
2224: if (rep != NULL) {
2225: current = rep;
2226: while (*current != 0) {
2227: buffer[nbchars++] = *current++;
2228: if (nbchars >
2229: buffer_size - XML_PARSER_BUFFER_SIZE) {
2230: growBuffer(buffer);
2231: }
1.135 daniel 2232: }
1.185 daniel 2233: xmlFree(rep);
1.135 daniel 2234: }
2235: }
2236: } else {
1.176 daniel 2237: COPY_BUF(l,buffer,nbchars,c);
2238: str += l;
2239: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2240: growBuffer(buffer);
2241: }
2242: }
1.176 daniel 2243: c = CUR_SCHAR(str, l);
1.135 daniel 2244: }
1.176 daniel 2245: buffer[nbchars++] = 0;
1.135 daniel 2246: return(buffer);
2247: }
2248:
1.1 veillard 2249:
1.28 daniel 2250: /************************************************************************
2251: * *
1.75 daniel 2252: * Commodity functions to handle encodings *
2253: * *
2254: ************************************************************************/
2255:
1.172 daniel 2256: /*
2257: * xmlCheckLanguageID
2258: * @lang: pointer to the string value
2259: *
2260: * Checks that the value conforms to the LanguageID production:
2261: *
2262: * [33] LanguageID ::= Langcode ('-' Subcode)*
2263: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2264: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2265: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2266: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2267: * [38] Subcode ::= ([a-z] | [A-Z])+
2268: *
2269: * Returns 1 if correct 0 otherwise
2270: **/
2271: int
2272: xmlCheckLanguageID(const xmlChar *lang) {
2273: const xmlChar *cur = lang;
2274:
2275: if (cur == NULL)
2276: return(0);
2277: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2278: ((cur[0] == 'I') && (cur[1] == '-'))) {
2279: /*
2280: * IANA code
2281: */
2282: cur += 2;
2283: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2284: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2285: cur++;
2286: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2287: ((cur[0] == 'X') && (cur[1] == '-'))) {
2288: /*
2289: * User code
2290: */
2291: cur += 2;
2292: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2293: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2294: cur++;
2295: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2296: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2297: /*
2298: * ISO639
2299: */
2300: cur++;
2301: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2302: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2303: cur++;
2304: else
2305: return(0);
2306: } else
2307: return(0);
2308: while (cur[0] != 0) {
2309: if (cur[0] != '-')
2310: return(0);
2311: cur++;
2312: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2313: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2314: cur++;
2315: else
2316: return(0);
2317: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2318: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2319: cur++;
2320: }
2321: return(1);
2322: }
2323:
1.75 daniel 2324: /**
2325: * xmlSwitchEncoding:
2326: * @ctxt: the parser context
1.124 daniel 2327: * @enc: the encoding value (number)
1.75 daniel 2328: *
2329: * change the input functions when discovering the character encoding
2330: * of a given entity.
1.193 daniel 2331: *
2332: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2333: */
1.193 daniel 2334: int
1.75 daniel 2335: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2336: {
1.156 daniel 2337: xmlCharEncodingHandlerPtr handler;
2338:
1.193 daniel 2339: switch (enc) {
2340: case XML_CHAR_ENCODING_ERROR:
2341: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2343: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2344: ctxt->wellFormed = 0;
2345: ctxt->disableSAX = 1;
2346: break;
2347: case XML_CHAR_ENCODING_NONE:
2348: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2349: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2350: return(0);
2351: case XML_CHAR_ENCODING_UTF8:
2352: /* default encoding, no conversion should be needed */
1.198 daniel 2353: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2354: return(0);
2355: default:
2356: break;
2357: }
1.156 daniel 2358: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2359: if (handler == NULL) {
2360: /*
2361: * Default handlers.
2362: */
2363: switch (enc) {
2364: case XML_CHAR_ENCODING_ERROR:
2365: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2367: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2368: ctxt->wellFormed = 0;
2369: ctxt->disableSAX = 1;
1.198 daniel 2370: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2371: break;
2372: case XML_CHAR_ENCODING_NONE:
2373: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2374: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2375: return(0);
2376: case XML_CHAR_ENCODING_UTF8:
2377: /* default encoding, no conversion should be needed */
1.198 daniel 2378: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2379: return(0);
2380: case XML_CHAR_ENCODING_UTF16LE:
2381: break;
2382: case XML_CHAR_ENCODING_UTF16BE:
2383: break;
2384: case XML_CHAR_ENCODING_UCS4LE:
2385: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2387: ctxt->sax->error(ctxt->userData,
2388: "char encoding USC4 little endian not supported\n");
2389: break;
2390: case XML_CHAR_ENCODING_UCS4BE:
2391: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2393: ctxt->sax->error(ctxt->userData,
2394: "char encoding USC4 big endian not supported\n");
2395: break;
2396: case XML_CHAR_ENCODING_EBCDIC:
2397: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2398: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399: ctxt->sax->error(ctxt->userData,
2400: "char encoding EBCDIC not supported\n");
2401: break;
2402: case XML_CHAR_ENCODING_UCS4_2143:
2403: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2405: ctxt->sax->error(ctxt->userData,
2406: "char encoding UCS4 2143 not supported\n");
2407: break;
2408: case XML_CHAR_ENCODING_UCS4_3412:
2409: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2411: ctxt->sax->error(ctxt->userData,
2412: "char encoding UCS4 3412 not supported\n");
2413: break;
2414: case XML_CHAR_ENCODING_UCS2:
2415: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2417: ctxt->sax->error(ctxt->userData,
2418: "char encoding UCS2 not supported\n");
2419: break;
2420: case XML_CHAR_ENCODING_8859_1:
2421: case XML_CHAR_ENCODING_8859_2:
2422: case XML_CHAR_ENCODING_8859_3:
2423: case XML_CHAR_ENCODING_8859_4:
2424: case XML_CHAR_ENCODING_8859_5:
2425: case XML_CHAR_ENCODING_8859_6:
2426: case XML_CHAR_ENCODING_8859_7:
2427: case XML_CHAR_ENCODING_8859_8:
2428: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2429: /*
2430: * Keep the internal content in the document encoding
2431: */
2432: if ((ctxt->inputNr == 1) &&
2433: (ctxt->encoding == NULL) &&
2434: (ctxt->input->encoding != NULL)) {
2435: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2436: }
1.198 daniel 2437: ctxt->charset = enc;
1.195 daniel 2438: return(0);
1.193 daniel 2439: case XML_CHAR_ENCODING_2022_JP:
2440: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2442: ctxt->sax->error(ctxt->userData,
2443: "char encoding ISO-2022-JPnot supported\n");
2444: break;
2445: case XML_CHAR_ENCODING_SHIFT_JIS:
2446: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448: ctxt->sax->error(ctxt->userData,
2449: "char encoding Shift_JIS not supported\n");
2450: break;
2451: case XML_CHAR_ENCODING_EUC_JP:
2452: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2454: ctxt->sax->error(ctxt->userData,
2455: "char encoding EUC-JPnot supported\n");
2456: break;
2457: }
2458: }
2459: if (handler == NULL)
2460: return(-1);
1.198 daniel 2461: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2462: return(xmlSwitchToEncoding(ctxt, handler));
2463: }
2464:
2465: /**
2466: * xmlSwitchToEncoding:
2467: * @ctxt: the parser context
2468: * @handler: the encoding handler
2469: *
2470: * change the input functions when discovering the character encoding
2471: * of a given entity.
2472: *
2473: * Returns 0 in case of success, -1 otherwise
2474: */
2475: int
2476: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2477: {
1.194 daniel 2478: int nbchars;
2479:
1.156 daniel 2480: if (handler != NULL) {
2481: if (ctxt->input != NULL) {
2482: if (ctxt->input->buf != NULL) {
2483: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2484: if (ctxt->input->buf->encoder == handler)
2485: return(0);
1.197 daniel 2486: /*
2487: * Note: this is a bit dangerous, but that's what it
2488: * takes to use nearly compatible signature for different
2489: * encodings.
2490: */
2491: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2492: ctxt->input->buf->encoder = handler;
2493: return(0);
1.156 daniel 2494: }
2495: ctxt->input->buf->encoder = handler;
2496:
2497: /*
1.194 daniel 2498: * Is there already some content down the pipe to convert ?
1.156 daniel 2499: */
2500: if ((ctxt->input->buf->buffer != NULL) &&
2501: (ctxt->input->buf->buffer->use > 0)) {
2502: int processed;
2503:
2504: /*
2505: * Specific handling of the Byte Order Mark for
2506: * UTF-16
2507: */
1.195 daniel 2508: if ((handler->name != NULL) &&
2509: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2510: (ctxt->input->cur[0] == 0xFF) &&
2511: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2512: ctxt->input->cur += 2;
1.156 daniel 2513: }
1.195 daniel 2514: if ((handler->name != NULL) &&
2515: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2516: (ctxt->input->cur[0] == 0xFE) &&
2517: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2518: ctxt->input->cur += 2;
1.156 daniel 2519: }
2520:
2521: /*
1.194 daniel 2522: * Shring the current input buffer.
2523: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2524: */
2525: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2526: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2527: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2528: ctxt->input->buf->buffer = xmlBufferCreate();
2529:
2530: /*
1.197 daniel 2531: * convert just enough to get
2532: * '<?xml version="1.0" encoding="xxx"?>'
2533: * parsed with the autodetected encoding
2534: * into the parser reading buffer.
1.194 daniel 2535: */
1.197 daniel 2536: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2537: ctxt->input->buf->buffer,
2538: ctxt->input->buf->raw);
1.194 daniel 2539: if (nbchars < 0) {
2540: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2541: return(-1);
1.156 daniel 2542: }
1.194 daniel 2543: ctxt->input->base =
2544: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2545: }
1.193 daniel 2546: return(0);
1.156 daniel 2547: } else {
2548: if (ctxt->input->length == 0) {
2549: /*
2550: * When parsing a static memory array one must know the
2551: * size to be able to convert the buffer.
2552: */
2553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2554: ctxt->sax->error(ctxt->userData,
2555: "xmlSwitchEncoding : no input\n");
1.193 daniel 2556: return(-1);
1.156 daniel 2557: } else {
1.194 daniel 2558: int processed;
2559:
2560: /*
2561: * Shring the current input buffer.
2562: * Move it as the raw buffer and create a new input buffer
2563: */
2564: processed = ctxt->input->cur - ctxt->input->base;
2565: ctxt->input->buf->raw = xmlBufferCreate();
2566: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
2567: ctxt->input->length - processed);
2568: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2569:
2570: /*
1.194 daniel 2571: * convert as much as possible of the raw input
2572: * to the parser reading buffer.
2573: */
2574: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2575: ctxt->input->buf->buffer,
2576: ctxt->input->buf->raw);
2577: if (nbchars < 0) {
2578: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2579: return(-1);
1.156 daniel 2580: }
1.194 daniel 2581:
1.156 daniel 2582: /*
2583: * Conversion succeeded, get rid of the old buffer
2584: */
2585: if ((ctxt->input->free != NULL) &&
2586: (ctxt->input->base != NULL))
2587: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2588: ctxt->input->base =
2589: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2590: }
2591: }
2592: } else {
2593: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2594: ctxt->sax->error(ctxt->userData,
2595: "xmlSwitchEncoding : no input\n");
1.193 daniel 2596: return(-1);
1.156 daniel 2597: }
1.195 daniel 2598: /*
2599: * The parsing is now done in UTF8 natively
2600: */
1.198 daniel 2601: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2602: } else
2603: return(-1);
2604: return(0);
1.156 daniel 2605:
1.75 daniel 2606: }
2607:
2608: /************************************************************************
2609: * *
1.123 daniel 2610: * Commodity functions to handle xmlChars *
1.28 daniel 2611: * *
2612: ************************************************************************/
2613:
1.50 daniel 2614: /**
2615: * xmlStrndup:
1.123 daniel 2616: * @cur: the input xmlChar *
1.50 daniel 2617: * @len: the len of @cur
2618: *
1.123 daniel 2619: * a strndup for array of xmlChar's
1.68 daniel 2620: *
1.123 daniel 2621: * Returns a new xmlChar * or NULL
1.1 veillard 2622: */
1.123 daniel 2623: xmlChar *
2624: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2625: xmlChar *ret;
2626:
2627: if ((cur == NULL) || (len < 0)) return(NULL);
2628: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2629: if (ret == NULL) {
1.86 daniel 2630: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2631: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2632: return(NULL);
2633: }
1.123 daniel 2634: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2635: ret[len] = 0;
2636: return(ret);
2637: }
2638:
1.50 daniel 2639: /**
2640: * xmlStrdup:
1.123 daniel 2641: * @cur: the input xmlChar *
1.50 daniel 2642: *
1.152 daniel 2643: * a strdup for array of xmlChar's. Since they are supposed to be
2644: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2645: * a termination mark of '0'.
1.68 daniel 2646: *
1.123 daniel 2647: * Returns a new xmlChar * or NULL
1.1 veillard 2648: */
1.123 daniel 2649: xmlChar *
2650: xmlStrdup(const xmlChar *cur) {
2651: const xmlChar *p = cur;
1.1 veillard 2652:
1.135 daniel 2653: if (cur == NULL) return(NULL);
1.152 daniel 2654: while (*p != 0) p++;
1.1 veillard 2655: return(xmlStrndup(cur, p - cur));
2656: }
2657:
1.50 daniel 2658: /**
2659: * xmlCharStrndup:
2660: * @cur: the input char *
2661: * @len: the len of @cur
2662: *
1.123 daniel 2663: * a strndup for char's to xmlChar's
1.68 daniel 2664: *
1.123 daniel 2665: * Returns a new xmlChar * or NULL
1.45 daniel 2666: */
2667:
1.123 daniel 2668: xmlChar *
1.55 daniel 2669: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2670: int i;
1.135 daniel 2671: xmlChar *ret;
2672:
2673: if ((cur == NULL) || (len < 0)) return(NULL);
2674: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2675: if (ret == NULL) {
1.86 daniel 2676: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2677: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2678: return(NULL);
2679: }
2680: for (i = 0;i < len;i++)
1.123 daniel 2681: ret[i] = (xmlChar) cur[i];
1.45 daniel 2682: ret[len] = 0;
2683: return(ret);
2684: }
2685:
1.50 daniel 2686: /**
2687: * xmlCharStrdup:
2688: * @cur: the input char *
2689: * @len: the len of @cur
2690: *
1.123 daniel 2691: * a strdup for char's to xmlChar's
1.68 daniel 2692: *
1.123 daniel 2693: * Returns a new xmlChar * or NULL
1.45 daniel 2694: */
2695:
1.123 daniel 2696: xmlChar *
1.55 daniel 2697: xmlCharStrdup(const char *cur) {
1.45 daniel 2698: const char *p = cur;
2699:
1.135 daniel 2700: if (cur == NULL) return(NULL);
1.45 daniel 2701: while (*p != '\0') p++;
2702: return(xmlCharStrndup(cur, p - cur));
2703: }
2704:
1.50 daniel 2705: /**
2706: * xmlStrcmp:
1.123 daniel 2707: * @str1: the first xmlChar *
2708: * @str2: the second xmlChar *
1.50 daniel 2709: *
1.123 daniel 2710: * a strcmp for xmlChar's
1.68 daniel 2711: *
2712: * Returns the integer result of the comparison
1.14 veillard 2713: */
2714:
1.55 daniel 2715: int
1.123 daniel 2716: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2717: register int tmp;
2718:
1.135 daniel 2719: if ((str1 == NULL) && (str2 == NULL)) return(0);
2720: if (str1 == NULL) return(-1);
2721: if (str2 == NULL) return(1);
1.14 veillard 2722: do {
2723: tmp = *str1++ - *str2++;
2724: if (tmp != 0) return(tmp);
2725: } while ((*str1 != 0) && (*str2 != 0));
2726: return (*str1 - *str2);
2727: }
2728:
1.50 daniel 2729: /**
2730: * xmlStrncmp:
1.123 daniel 2731: * @str1: the first xmlChar *
2732: * @str2: the second xmlChar *
1.50 daniel 2733: * @len: the max comparison length
2734: *
1.123 daniel 2735: * a strncmp for xmlChar's
1.68 daniel 2736: *
2737: * Returns the integer result of the comparison
1.14 veillard 2738: */
2739:
1.55 daniel 2740: int
1.123 daniel 2741: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2742: register int tmp;
2743:
2744: if (len <= 0) return(0);
1.135 daniel 2745: if ((str1 == NULL) && (str2 == NULL)) return(0);
2746: if (str1 == NULL) return(-1);
2747: if (str2 == NULL) return(1);
1.14 veillard 2748: do {
2749: tmp = *str1++ - *str2++;
2750: if (tmp != 0) return(tmp);
2751: len--;
2752: if (len <= 0) return(0);
2753: } while ((*str1 != 0) && (*str2 != 0));
2754: return (*str1 - *str2);
2755: }
2756:
1.50 daniel 2757: /**
2758: * xmlStrchr:
1.123 daniel 2759: * @str: the xmlChar * array
2760: * @val: the xmlChar to search
1.50 daniel 2761: *
1.123 daniel 2762: * a strchr for xmlChar's
1.68 daniel 2763: *
1.123 daniel 2764: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2765: */
2766:
1.123 daniel 2767: const xmlChar *
2768: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2769: if (str == NULL) return(NULL);
1.14 veillard 2770: while (*str != 0) {
1.123 daniel 2771: if (*str == val) return((xmlChar *) str);
1.14 veillard 2772: str++;
2773: }
2774: return(NULL);
1.89 daniel 2775: }
2776:
2777: /**
2778: * xmlStrstr:
1.123 daniel 2779: * @str: the xmlChar * array (haystack)
2780: * @val: the xmlChar to search (needle)
1.89 daniel 2781: *
1.123 daniel 2782: * a strstr for xmlChar's
1.89 daniel 2783: *
1.123 daniel 2784: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2785: */
2786:
1.123 daniel 2787: const xmlChar *
2788: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2789: int n;
2790:
2791: if (str == NULL) return(NULL);
2792: if (val == NULL) return(NULL);
2793: n = xmlStrlen(val);
2794:
2795: if (n == 0) return(str);
2796: while (*str != 0) {
2797: if (*str == *val) {
1.123 daniel 2798: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2799: }
2800: str++;
2801: }
2802: return(NULL);
2803: }
2804:
2805: /**
2806: * xmlStrsub:
1.123 daniel 2807: * @str: the xmlChar * array (haystack)
1.89 daniel 2808: * @start: the index of the first char (zero based)
2809: * @len: the length of the substring
2810: *
2811: * Extract a substring of a given string
2812: *
1.123 daniel 2813: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2814: */
2815:
1.123 daniel 2816: xmlChar *
2817: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2818: int i;
2819:
2820: if (str == NULL) return(NULL);
2821: if (start < 0) return(NULL);
1.90 daniel 2822: if (len < 0) return(NULL);
1.89 daniel 2823:
2824: for (i = 0;i < start;i++) {
2825: if (*str == 0) return(NULL);
2826: str++;
2827: }
2828: if (*str == 0) return(NULL);
2829: return(xmlStrndup(str, len));
1.14 veillard 2830: }
1.28 daniel 2831:
1.50 daniel 2832: /**
2833: * xmlStrlen:
1.123 daniel 2834: * @str: the xmlChar * array
1.50 daniel 2835: *
1.127 daniel 2836: * length of a xmlChar's string
1.68 daniel 2837: *
1.123 daniel 2838: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2839: */
2840:
1.55 daniel 2841: int
1.123 daniel 2842: xmlStrlen(const xmlChar *str) {
1.45 daniel 2843: int len = 0;
2844:
2845: if (str == NULL) return(0);
2846: while (*str != 0) {
2847: str++;
2848: len++;
2849: }
2850: return(len);
2851: }
2852:
1.50 daniel 2853: /**
2854: * xmlStrncat:
1.123 daniel 2855: * @cur: the original xmlChar * array
2856: * @add: the xmlChar * array added
1.50 daniel 2857: * @len: the length of @add
2858: *
1.123 daniel 2859: * a strncat for array of xmlChar's
1.68 daniel 2860: *
1.123 daniel 2861: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2862: */
2863:
1.123 daniel 2864: xmlChar *
2865: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2866: int size;
1.123 daniel 2867: xmlChar *ret;
1.45 daniel 2868:
2869: if ((add == NULL) || (len == 0))
2870: return(cur);
2871: if (cur == NULL)
2872: return(xmlStrndup(add, len));
2873:
2874: size = xmlStrlen(cur);
1.123 daniel 2875: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2876: if (ret == NULL) {
1.86 daniel 2877: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2878: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2879: return(cur);
2880: }
1.123 daniel 2881: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2882: ret[size + len] = 0;
2883: return(ret);
2884: }
2885:
1.50 daniel 2886: /**
2887: * xmlStrcat:
1.123 daniel 2888: * @cur: the original xmlChar * array
2889: * @add: the xmlChar * array added
1.50 daniel 2890: *
1.152 daniel 2891: * a strcat for array of xmlChar's. Since they are supposed to be
2892: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2893: * a termination mark of '0'.
1.68 daniel 2894: *
1.123 daniel 2895: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2896: */
1.123 daniel 2897: xmlChar *
2898: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2899: const xmlChar *p = add;
1.45 daniel 2900:
2901: if (add == NULL) return(cur);
2902: if (cur == NULL)
2903: return(xmlStrdup(add));
2904:
1.152 daniel 2905: while (*p != 0) p++;
1.45 daniel 2906: return(xmlStrncat(cur, add, p - add));
2907: }
2908:
2909: /************************************************************************
2910: * *
2911: * Commodity functions, cleanup needed ? *
2912: * *
2913: ************************************************************************/
2914:
1.50 daniel 2915: /**
2916: * areBlanks:
2917: * @ctxt: an XML parser context
1.123 daniel 2918: * @str: a xmlChar *
1.50 daniel 2919: * @len: the size of @str
2920: *
1.45 daniel 2921: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2922: *
1.68 daniel 2923: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2924: */
2925:
1.123 daniel 2926: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2927: int i, ret;
1.45 daniel 2928: xmlNodePtr lastChild;
2929:
1.176 daniel 2930: /*
2931: * Check for xml:space value.
2932: */
2933: if (*(ctxt->space) == 1)
2934: return(0);
2935:
2936: /*
2937: * Check that the string is made of blanks
2938: */
1.45 daniel 2939: for (i = 0;i < len;i++)
2940: if (!(IS_BLANK(str[i]))) return(0);
2941:
1.176 daniel 2942: /*
2943: * Look if the element is mixed content in the Dtd if available
2944: */
1.104 daniel 2945: if (ctxt->myDoc != NULL) {
2946: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2947: if (ret == 0) return(1);
2948: if (ret == 1) return(0);
2949: }
1.176 daniel 2950:
1.104 daniel 2951: /*
1.176 daniel 2952: * Otherwise, heuristic :-\
1.104 daniel 2953: */
1.179 daniel 2954: if (ctxt->keepBlanks)
2955: return(0);
2956: if (RAW != '<') return(0);
2957: if (ctxt->node == NULL) return(0);
2958: if ((ctxt->node->children == NULL) &&
2959: (RAW == '<') && (NXT(1) == '/')) return(0);
2960:
1.45 daniel 2961: lastChild = xmlGetLastChild(ctxt->node);
2962: if (lastChild == NULL) {
2963: if (ctxt->node->content != NULL) return(0);
2964: } else if (xmlNodeIsText(lastChild))
2965: return(0);
1.157 daniel 2966: else if ((ctxt->node->children != NULL) &&
2967: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2968: return(0);
1.45 daniel 2969: return(1);
2970: }
2971:
1.50 daniel 2972: /**
2973: * xmlHandleEntity:
2974: * @ctxt: an XML parser context
2975: * @entity: an XML entity pointer.
2976: *
2977: * Default handling of defined entities, when should we define a new input
1.45 daniel 2978: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2979: *
2980: * OBSOLETE: to be removed at some point.
1.45 daniel 2981: */
2982:
1.55 daniel 2983: void
2984: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2985: int len;
1.50 daniel 2986: xmlParserInputPtr input;
1.45 daniel 2987:
2988: if (entity->content == NULL) {
1.123 daniel 2989: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2991: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2992: entity->name);
1.59 daniel 2993: ctxt->wellFormed = 0;
1.180 daniel 2994: ctxt->disableSAX = 1;
1.45 daniel 2995: return;
2996: }
2997: len = xmlStrlen(entity->content);
2998: if (len <= 2) goto handle_as_char;
2999:
3000: /*
3001: * Redefine its content as an input stream.
3002: */
1.50 daniel 3003: input = xmlNewEntityInputStream(ctxt, entity);
3004: xmlPushInput(ctxt, input);
1.45 daniel 3005: return;
3006:
3007: handle_as_char:
3008: /*
3009: * Just handle the content as a set of chars.
3010: */
1.171 daniel 3011: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3012: (ctxt->sax->characters != NULL))
1.74 daniel 3013: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 3014:
3015: }
3016:
3017: /*
3018: * Forward definition for recusive behaviour.
3019: */
1.77 daniel 3020: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3021: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 3022:
1.28 daniel 3023: /************************************************************************
3024: * *
3025: * Extra stuff for namespace support *
3026: * Relates to http://www.w3.org/TR/WD-xml-names *
3027: * *
3028: ************************************************************************/
3029:
1.50 daniel 3030: /**
3031: * xmlNamespaceParseNCName:
3032: * @ctxt: an XML parser context
3033: *
3034: * parse an XML namespace name.
1.28 daniel 3035: *
3036: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3037: *
3038: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3039: * CombiningChar | Extender
1.68 daniel 3040: *
3041: * Returns the namespace name or NULL
1.28 daniel 3042: */
3043:
1.123 daniel 3044: xmlChar *
1.55 daniel 3045: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3046: xmlChar buf[XML_MAX_NAMELEN + 5];
3047: int len = 0, l;
3048: int cur = CUR_CHAR(l);
1.28 daniel 3049:
1.156 daniel 3050: /* load first the value of the char !!! */
1.152 daniel 3051: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3052:
1.152 daniel 3053: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3054: (cur == '.') || (cur == '-') ||
3055: (cur == '_') ||
3056: (IS_COMBINING(cur)) ||
3057: (IS_EXTENDER(cur))) {
3058: COPY_BUF(l,buf,len,cur);
3059: NEXTL(l);
3060: cur = CUR_CHAR(l);
1.91 daniel 3061: if (len >= XML_MAX_NAMELEN) {
3062: fprintf(stderr,
3063: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3064: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3065: (cur == '.') || (cur == '-') ||
3066: (cur == '_') ||
3067: (IS_COMBINING(cur)) ||
3068: (IS_EXTENDER(cur))) {
3069: NEXTL(l);
3070: cur = CUR_CHAR(l);
3071: }
1.91 daniel 3072: break;
3073: }
3074: }
3075: return(xmlStrndup(buf, len));
1.28 daniel 3076: }
3077:
1.50 daniel 3078: /**
3079: * xmlNamespaceParseQName:
3080: * @ctxt: an XML parser context
1.123 daniel 3081: * @prefix: a xmlChar **
1.50 daniel 3082: *
3083: * parse an XML qualified name
1.28 daniel 3084: *
3085: * [NS 5] QName ::= (Prefix ':')? LocalPart
3086: *
3087: * [NS 6] Prefix ::= NCName
3088: *
3089: * [NS 7] LocalPart ::= NCName
1.68 daniel 3090: *
1.127 daniel 3091: * Returns the local part, and prefix is updated
1.50 daniel 3092: * to get the Prefix if any.
1.28 daniel 3093: */
3094:
1.123 daniel 3095: xmlChar *
3096: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3097: xmlChar *ret = NULL;
1.28 daniel 3098:
3099: *prefix = NULL;
3100: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3101: if (RAW == ':') {
1.28 daniel 3102: *prefix = ret;
1.40 daniel 3103: NEXT;
1.28 daniel 3104: ret = xmlNamespaceParseNCName(ctxt);
3105: }
3106:
3107: return(ret);
3108: }
3109:
1.50 daniel 3110: /**
1.72 daniel 3111: * xmlSplitQName:
1.162 daniel 3112: * @ctxt: an XML parser context
1.72 daniel 3113: * @name: an XML parser context
1.123 daniel 3114: * @prefix: a xmlChar **
1.72 daniel 3115: *
3116: * parse an XML qualified name string
3117: *
3118: * [NS 5] QName ::= (Prefix ':')? LocalPart
3119: *
3120: * [NS 6] Prefix ::= NCName
3121: *
3122: * [NS 7] LocalPart ::= NCName
3123: *
1.127 daniel 3124: * Returns the local part, and prefix is updated
1.72 daniel 3125: * to get the Prefix if any.
3126: */
3127:
1.123 daniel 3128: xmlChar *
1.162 daniel 3129: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3130: xmlChar buf[XML_MAX_NAMELEN + 5];
3131: int len = 0;
1.123 daniel 3132: xmlChar *ret = NULL;
3133: const xmlChar *cur = name;
1.162 daniel 3134: int c,l;
1.72 daniel 3135:
3136: *prefix = NULL;
1.113 daniel 3137:
3138: /* xml: prefix is not really a namespace */
3139: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3140: (cur[2] == 'l') && (cur[3] == ':'))
3141: return(xmlStrdup(name));
3142:
1.162 daniel 3143: /* nasty but valid */
3144: if (cur[0] == ':')
3145: return(xmlStrdup(name));
3146:
3147: c = CUR_SCHAR(cur, l);
3148: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3149:
1.162 daniel 3150: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3151: (c == '.') || (c == '-') ||
3152: (c == '_') ||
3153: (IS_COMBINING(c)) ||
3154: (IS_EXTENDER(c))) {
3155: COPY_BUF(l,buf,len,c);
3156: cur += l;
3157: c = CUR_SCHAR(cur, l);
3158: }
1.72 daniel 3159:
1.162 daniel 3160: ret = xmlStrndup(buf, len);
1.72 daniel 3161:
1.162 daniel 3162: if (c == ':') {
3163: cur += l;
1.163 daniel 3164: c = CUR_SCHAR(cur, l);
1.162 daniel 3165: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3166: *prefix = ret;
1.162 daniel 3167: len = 0;
1.72 daniel 3168:
1.162 daniel 3169: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3170: (c == '.') || (c == '-') ||
3171: (c == '_') ||
3172: (IS_COMBINING(c)) ||
3173: (IS_EXTENDER(c))) {
3174: COPY_BUF(l,buf,len,c);
3175: cur += l;
3176: c = CUR_SCHAR(cur, l);
3177: }
1.72 daniel 3178:
1.162 daniel 3179: ret = xmlStrndup(buf, len);
1.72 daniel 3180: }
3181:
3182: return(ret);
3183: }
3184: /**
1.50 daniel 3185: * xmlNamespaceParseNSDef:
3186: * @ctxt: an XML parser context
3187: *
3188: * parse a namespace prefix declaration
1.28 daniel 3189: *
3190: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3191: *
3192: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3193: *
3194: * Returns the namespace name
1.28 daniel 3195: */
3196:
1.123 daniel 3197: xmlChar *
1.55 daniel 3198: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3199: xmlChar *name = NULL;
1.28 daniel 3200:
1.152 daniel 3201: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3202: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3203: (NXT(4) == 's')) {
3204: SKIP(5);
1.152 daniel 3205: if (RAW == ':') {
1.40 daniel 3206: NEXT;
1.28 daniel 3207: name = xmlNamespaceParseNCName(ctxt);
3208: }
3209: }
1.39 daniel 3210: return(name);
1.28 daniel 3211: }
3212:
1.50 daniel 3213: /**
3214: * xmlParseQuotedString:
3215: * @ctxt: an XML parser context
3216: *
1.45 daniel 3217: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3218: * To be removed at next drop of binary compatibility
1.68 daniel 3219: *
3220: * Returns the string parser or NULL.
1.45 daniel 3221: */
1.123 daniel 3222: xmlChar *
1.55 daniel 3223: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3224: xmlChar *buf = NULL;
1.152 daniel 3225: int len = 0,l;
1.140 daniel 3226: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3227: int c;
1.45 daniel 3228:
1.135 daniel 3229: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3230: if (buf == NULL) {
3231: fprintf(stderr, "malloc of %d byte failed\n", size);
3232: return(NULL);
3233: }
1.152 daniel 3234: if (RAW == '"') {
1.45 daniel 3235: NEXT;
1.152 daniel 3236: c = CUR_CHAR(l);
1.135 daniel 3237: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3238: if (len + 5 >= size) {
1.135 daniel 3239: size *= 2;
3240: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3241: if (buf == NULL) {
3242: fprintf(stderr, "realloc of %d byte failed\n", size);
3243: return(NULL);
3244: }
3245: }
1.152 daniel 3246: COPY_BUF(l,buf,len,c);
3247: NEXTL(l);
3248: c = CUR_CHAR(l);
1.135 daniel 3249: }
3250: if (c != '"') {
1.123 daniel 3251: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3253: ctxt->sax->error(ctxt->userData,
3254: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3255: ctxt->wellFormed = 0;
1.180 daniel 3256: ctxt->disableSAX = 1;
1.55 daniel 3257: } else {
1.45 daniel 3258: NEXT;
3259: }
1.152 daniel 3260: } else if (RAW == '\''){
1.45 daniel 3261: NEXT;
1.135 daniel 3262: c = CUR;
3263: while (IS_CHAR(c) && (c != '\'')) {
3264: if (len + 1 >= size) {
3265: size *= 2;
3266: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3267: if (buf == NULL) {
3268: fprintf(stderr, "realloc of %d byte failed\n", size);
3269: return(NULL);
3270: }
3271: }
3272: buf[len++] = c;
3273: NEXT;
3274: c = CUR;
3275: }
1.152 daniel 3276: if (RAW != '\'') {
1.123 daniel 3277: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3279: ctxt->sax->error(ctxt->userData,
3280: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3281: ctxt->wellFormed = 0;
1.180 daniel 3282: ctxt->disableSAX = 1;
1.55 daniel 3283: } else {
1.45 daniel 3284: NEXT;
3285: }
3286: }
1.135 daniel 3287: return(buf);
1.45 daniel 3288: }
3289:
1.50 daniel 3290: /**
3291: * xmlParseNamespace:
3292: * @ctxt: an XML parser context
3293: *
1.45 daniel 3294: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3295: *
3296: * This is what the older xml-name Working Draft specified, a bunch of
3297: * other stuff may still rely on it, so support is still here as
1.127 daniel 3298: * if it was declared on the root of the Tree:-(
1.110 daniel 3299: *
3300: * To be removed at next drop of binary compatibility
1.45 daniel 3301: */
3302:
1.55 daniel 3303: void
3304: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3305: xmlChar *href = NULL;
3306: xmlChar *prefix = NULL;
1.45 daniel 3307: int garbage = 0;
3308:
3309: /*
3310: * We just skipped "namespace" or "xml:namespace"
3311: */
3312: SKIP_BLANKS;
3313:
1.153 daniel 3314: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3315: /*
3316: * We can have "ns" or "prefix" attributes
3317: * Old encoding as 'href' or 'AS' attributes is still supported
3318: */
1.152 daniel 3319: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3320: garbage = 0;
3321: SKIP(2);
3322: SKIP_BLANKS;
3323:
1.152 daniel 3324: if (RAW != '=') continue;
1.45 daniel 3325: NEXT;
3326: SKIP_BLANKS;
3327:
3328: href = xmlParseQuotedString(ctxt);
3329: SKIP_BLANKS;
1.152 daniel 3330: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3331: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3332: garbage = 0;
3333: SKIP(4);
3334: SKIP_BLANKS;
3335:
1.152 daniel 3336: if (RAW != '=') continue;
1.45 daniel 3337: NEXT;
3338: SKIP_BLANKS;
3339:
3340: href = xmlParseQuotedString(ctxt);
3341: SKIP_BLANKS;
1.152 daniel 3342: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3343: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3344: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3345: garbage = 0;
3346: SKIP(6);
3347: SKIP_BLANKS;
3348:
1.152 daniel 3349: if (RAW != '=') continue;
1.45 daniel 3350: NEXT;
3351: SKIP_BLANKS;
3352:
3353: prefix = xmlParseQuotedString(ctxt);
3354: SKIP_BLANKS;
1.152 daniel 3355: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3356: garbage = 0;
3357: SKIP(2);
3358: SKIP_BLANKS;
3359:
1.152 daniel 3360: if (RAW != '=') continue;
1.45 daniel 3361: NEXT;
3362: SKIP_BLANKS;
3363:
3364: prefix = xmlParseQuotedString(ctxt);
3365: SKIP_BLANKS;
1.152 daniel 3366: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3367: garbage = 0;
1.91 daniel 3368: NEXT;
1.45 daniel 3369: } else {
3370: /*
3371: * Found garbage when parsing the namespace
3372: */
1.122 daniel 3373: if (!garbage) {
1.55 daniel 3374: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3375: ctxt->sax->error(ctxt->userData,
3376: "xmlParseNamespace found garbage\n");
3377: }
1.123 daniel 3378: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3379: ctxt->wellFormed = 0;
1.180 daniel 3380: ctxt->disableSAX = 1;
1.45 daniel 3381: NEXT;
3382: }
3383: }
3384:
3385: MOVETO_ENDTAG(CUR_PTR);
3386: NEXT;
3387:
3388: /*
3389: * Register the DTD.
1.72 daniel 3390: if (href != NULL)
3391: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3392: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3393: */
3394:
1.119 daniel 3395: if (prefix != NULL) xmlFree(prefix);
3396: if (href != NULL) xmlFree(href);
1.45 daniel 3397: }
3398:
1.28 daniel 3399: /************************************************************************
3400: * *
3401: * The parser itself *
3402: * Relates to http://www.w3.org/TR/REC-xml *
3403: * *
3404: ************************************************************************/
1.14 veillard 3405:
1.50 daniel 3406: /**
1.97 daniel 3407: * xmlScanName:
3408: * @ctxt: an XML parser context
3409: *
3410: * Trickery: parse an XML name but without consuming the input flow
3411: * Needed for rollback cases.
3412: *
3413: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3414: * CombiningChar | Extender
3415: *
3416: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3417: *
3418: * [6] Names ::= Name (S Name)*
3419: *
3420: * Returns the Name parsed or NULL
3421: */
3422:
1.123 daniel 3423: xmlChar *
1.97 daniel 3424: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3425: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3426: int len = 0;
3427:
3428: GROW;
1.152 daniel 3429: if (!IS_LETTER(RAW) && (RAW != '_') &&
3430: (RAW != ':')) {
1.97 daniel 3431: return(NULL);
3432: }
3433:
3434: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3435: (NXT(len) == '.') || (NXT(len) == '-') ||
3436: (NXT(len) == '_') || (NXT(len) == ':') ||
3437: (IS_COMBINING(NXT(len))) ||
3438: (IS_EXTENDER(NXT(len)))) {
3439: buf[len] = NXT(len);
3440: len++;
3441: if (len >= XML_MAX_NAMELEN) {
3442: fprintf(stderr,
3443: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3444: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3445: (NXT(len) == '.') || (NXT(len) == '-') ||
3446: (NXT(len) == '_') || (NXT(len) == ':') ||
3447: (IS_COMBINING(NXT(len))) ||
3448: (IS_EXTENDER(NXT(len))))
3449: len++;
3450: break;
3451: }
3452: }
3453: return(xmlStrndup(buf, len));
3454: }
3455:
3456: /**
1.50 daniel 3457: * xmlParseName:
3458: * @ctxt: an XML parser context
3459: *
3460: * parse an XML name.
1.22 daniel 3461: *
3462: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3463: * CombiningChar | Extender
3464: *
3465: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3466: *
3467: * [6] Names ::= Name (S Name)*
1.68 daniel 3468: *
3469: * Returns the Name parsed or NULL
1.1 veillard 3470: */
3471:
1.123 daniel 3472: xmlChar *
1.55 daniel 3473: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3474: xmlChar buf[XML_MAX_NAMELEN + 5];
3475: int len = 0, l;
3476: int c;
1.1 veillard 3477:
1.91 daniel 3478: GROW;
1.160 daniel 3479: c = CUR_CHAR(l);
1.190 daniel 3480: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3481: (!IS_LETTER(c) && (c != '_') &&
3482: (c != ':'))) {
1.91 daniel 3483: return(NULL);
3484: }
1.40 daniel 3485:
1.190 daniel 3486: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3487: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3488: (c == '.') || (c == '-') ||
3489: (c == '_') || (c == ':') ||
3490: (IS_COMBINING(c)) ||
3491: (IS_EXTENDER(c)))) {
1.160 daniel 3492: COPY_BUF(l,buf,len,c);
3493: NEXTL(l);
3494: c = CUR_CHAR(l);
1.91 daniel 3495: if (len >= XML_MAX_NAMELEN) {
3496: fprintf(stderr,
3497: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3498: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3499: (c == '.') || (c == '-') ||
3500: (c == '_') || (c == ':') ||
3501: (IS_COMBINING(c)) ||
3502: (IS_EXTENDER(c))) {
3503: NEXTL(l);
3504: c = CUR_CHAR(l);
1.97 daniel 3505: }
1.91 daniel 3506: break;
3507: }
3508: }
3509: return(xmlStrndup(buf, len));
1.22 daniel 3510: }
3511:
1.50 daniel 3512: /**
1.135 daniel 3513: * xmlParseStringName:
3514: * @ctxt: an XML parser context
3515: * @str: a pointer to an index in the string
3516: *
3517: * parse an XML name.
3518: *
3519: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3520: * CombiningChar | Extender
3521: *
3522: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3523: *
3524: * [6] Names ::= Name (S Name)*
3525: *
3526: * Returns the Name parsed or NULL. The str pointer
3527: * is updated to the current location in the string.
3528: */
3529:
3530: xmlChar *
3531: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3532: xmlChar buf[XML_MAX_NAMELEN + 5];
3533: const xmlChar *cur = *str;
3534: int len = 0, l;
3535: int c;
1.135 daniel 3536:
1.176 daniel 3537: c = CUR_SCHAR(cur, l);
3538: if (!IS_LETTER(c) && (c != '_') &&
3539: (c != ':')) {
1.135 daniel 3540: return(NULL);
3541: }
3542:
1.176 daniel 3543: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3544: (c == '.') || (c == '-') ||
3545: (c == '_') || (c == ':') ||
3546: (IS_COMBINING(c)) ||
3547: (IS_EXTENDER(c))) {
3548: COPY_BUF(l,buf,len,c);
3549: cur += l;
3550: c = CUR_SCHAR(cur, l);
3551: if (len >= XML_MAX_NAMELEN) {
3552: fprintf(stderr,
3553: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3554: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3555: (c == '.') || (c == '-') ||
3556: (c == '_') || (c == ':') ||
3557: (IS_COMBINING(c)) ||
3558: (IS_EXTENDER(c))) {
3559: cur += l;
3560: c = CUR_SCHAR(cur, l);
3561: }
3562: break;
3563: }
1.135 daniel 3564: }
1.176 daniel 3565: *str = cur;
3566: return(xmlStrndup(buf, len));
1.135 daniel 3567: }
3568:
3569: /**
1.50 daniel 3570: * xmlParseNmtoken:
3571: * @ctxt: an XML parser context
3572: *
3573: * parse an XML Nmtoken.
1.22 daniel 3574: *
3575: * [7] Nmtoken ::= (NameChar)+
3576: *
3577: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3578: *
3579: * Returns the Nmtoken parsed or NULL
1.22 daniel 3580: */
3581:
1.123 daniel 3582: xmlChar *
1.55 daniel 3583: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3584: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3585: int len = 0;
1.160 daniel 3586: int c,l;
1.22 daniel 3587:
1.91 daniel 3588: GROW;
1.160 daniel 3589: c = CUR_CHAR(l);
3590: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3591: (c == '.') || (c == '-') ||
3592: (c == '_') || (c == ':') ||
3593: (IS_COMBINING(c)) ||
3594: (IS_EXTENDER(c))) {
3595: COPY_BUF(l,buf,len,c);
3596: NEXTL(l);
3597: c = CUR_CHAR(l);
1.91 daniel 3598: if (len >= XML_MAX_NAMELEN) {
3599: fprintf(stderr,
3600: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3601: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3602: (c == '.') || (c == '-') ||
3603: (c == '_') || (c == ':') ||
3604: (IS_COMBINING(c)) ||
3605: (IS_EXTENDER(c))) {
3606: NEXTL(l);
3607: c = CUR_CHAR(l);
3608: }
1.91 daniel 3609: break;
3610: }
3611: }
1.168 daniel 3612: if (len == 0)
3613: return(NULL);
1.91 daniel 3614: return(xmlStrndup(buf, len));
1.1 veillard 3615: }
3616:
1.50 daniel 3617: /**
3618: * xmlParseEntityValue:
3619: * @ctxt: an XML parser context
1.78 daniel 3620: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3621: *
3622: * parse a value for ENTITY decl.
1.24 daniel 3623: *
3624: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3625: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3626: *
1.78 daniel 3627: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3628: */
3629:
1.123 daniel 3630: xmlChar *
3631: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3632: xmlChar *buf = NULL;
3633: int len = 0;
1.140 daniel 3634: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3635: int c, l;
1.135 daniel 3636: xmlChar stop;
1.123 daniel 3637: xmlChar *ret = NULL;
1.176 daniel 3638: const xmlChar *cur = NULL;
1.98 daniel 3639: xmlParserInputPtr input;
1.24 daniel 3640:
1.152 daniel 3641: if (RAW == '"') stop = '"';
3642: else if (RAW == '\'') stop = '\'';
1.135 daniel 3643: else {
3644: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3646: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3647: ctxt->wellFormed = 0;
1.180 daniel 3648: ctxt->disableSAX = 1;
1.135 daniel 3649: return(NULL);
3650: }
3651: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3652: if (buf == NULL) {
3653: fprintf(stderr, "malloc of %d byte failed\n", size);
3654: return(NULL);
3655: }
1.94 daniel 3656:
1.135 daniel 3657: /*
3658: * The content of the entity definition is copied in a buffer.
3659: */
1.94 daniel 3660:
1.135 daniel 3661: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3662: input = ctxt->input;
3663: GROW;
3664: NEXT;
1.152 daniel 3665: c = CUR_CHAR(l);
1.135 daniel 3666: /*
3667: * NOTE: 4.4.5 Included in Literal
3668: * When a parameter entity reference appears in a literal entity
3669: * value, ... a single or double quote character in the replacement
3670: * text is always treated as a normal data character and will not
3671: * terminate the literal.
3672: * In practice it means we stop the loop only when back at parsing
3673: * the initial entity and the quote is found
3674: */
3675: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3676: if (len + 5 >= size) {
1.135 daniel 3677: size *= 2;
3678: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3679: if (buf == NULL) {
3680: fprintf(stderr, "realloc of %d byte failed\n", size);
3681: return(NULL);
1.94 daniel 3682: }
1.79 daniel 3683: }
1.152 daniel 3684: COPY_BUF(l,buf,len,c);
3685: NEXTL(l);
1.98 daniel 3686: /*
1.135 daniel 3687: * Pop-up of finished entities.
1.98 daniel 3688: */
1.152 daniel 3689: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3690: xmlPopInput(ctxt);
1.152 daniel 3691:
3692: c = CUR_CHAR(l);
1.135 daniel 3693: if (c == 0) {
1.94 daniel 3694: GROW;
1.152 daniel 3695: c = CUR_CHAR(l);
1.79 daniel 3696: }
1.135 daniel 3697: }
3698: buf[len] = 0;
3699:
3700: /*
1.176 daniel 3701: * Raise problem w.r.t. '&' and '%' being used in non-entities
3702: * reference constructs. Note Charref will be handled in
3703: * xmlStringDecodeEntities()
3704: */
3705: cur = buf;
3706: while (*cur != 0) {
3707: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3708: xmlChar *name;
3709: xmlChar tmp = *cur;
3710:
3711: cur++;
3712: name = xmlParseStringName(ctxt, &cur);
3713: if ((name == NULL) || (*cur != ';')) {
3714: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3715: ctxt->sax->error(ctxt->userData,
3716: "EntityValue: '%c' forbidden except for entities references\n",
3717: tmp);
3718: ctxt->wellFormed = 0;
1.180 daniel 3719: ctxt->disableSAX = 1;
1.176 daniel 3720: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3721: }
3722: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3724: ctxt->sax->error(ctxt->userData,
3725: "EntityValue: PEReferences forbidden in internal subset\n",
3726: tmp);
3727: ctxt->wellFormed = 0;
1.180 daniel 3728: ctxt->disableSAX = 1;
1.176 daniel 3729: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3730: }
3731: if (name != NULL)
3732: xmlFree(name);
3733: }
3734: cur++;
3735: }
3736:
3737: /*
1.135 daniel 3738: * Then PEReference entities are substituted.
3739: */
3740: if (c != stop) {
3741: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3743: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3744: ctxt->wellFormed = 0;
1.180 daniel 3745: ctxt->disableSAX = 1;
1.170 daniel 3746: xmlFree(buf);
1.135 daniel 3747: } else {
3748: NEXT;
3749: /*
3750: * NOTE: 4.4.7 Bypassed
3751: * When a general entity reference appears in the EntityValue in
3752: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3753: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3754: */
3755: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3756: 0, 0, 0);
3757: if (orig != NULL)
3758: *orig = buf;
3759: else
3760: xmlFree(buf);
1.24 daniel 3761: }
3762:
3763: return(ret);
3764: }
3765:
1.50 daniel 3766: /**
3767: * xmlParseAttValue:
3768: * @ctxt: an XML parser context
3769: *
3770: * parse a value for an attribute
1.78 daniel 3771: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3772: * will be handled later in xmlStringGetNodeList
1.29 daniel 3773: *
3774: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3775: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3776: *
1.129 daniel 3777: * 3.3.3 Attribute-Value Normalization:
3778: * Before the value of an attribute is passed to the application or
3779: * checked for validity, the XML processor must normalize it as follows:
3780: * - a character reference is processed by appending the referenced
3781: * character to the attribute value
3782: * - an entity reference is processed by recursively processing the
3783: * replacement text of the entity
3784: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3785: * appending #x20 to the normalized value, except that only a single
3786: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3787: * parsed entity or the literal entity value of an internal parsed entity
3788: * - other characters are processed by appending them to the normalized value
1.130 daniel 3789: * If the declared value is not CDATA, then the XML processor must further
3790: * process the normalized attribute value by discarding any leading and
3791: * trailing space (#x20) characters, and by replacing sequences of space
3792: * (#x20) characters by a single space (#x20) character.
3793: * All attributes for which no declaration has been read should be treated
3794: * by a non-validating parser as if declared CDATA.
1.129 daniel 3795: *
3796: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3797: */
3798:
1.123 daniel 3799: xmlChar *
1.55 daniel 3800: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3801: xmlChar limit = 0;
1.198 daniel 3802: xmlChar *buf = NULL;
3803: int len = 0;
3804: int buf_size = 0;
3805: int c, l;
1.129 daniel 3806: xmlChar *current = NULL;
3807: xmlEntityPtr ent;
3808:
1.29 daniel 3809:
1.91 daniel 3810: SHRINK;
1.151 daniel 3811: if (NXT(0) == '"') {
1.96 daniel 3812: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3813: limit = '"';
1.40 daniel 3814: NEXT;
1.151 daniel 3815: } else if (NXT(0) == '\'') {
1.129 daniel 3816: limit = '\'';
1.96 daniel 3817: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3818: NEXT;
1.29 daniel 3819: } else {
1.123 daniel 3820: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3822: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3823: ctxt->wellFormed = 0;
1.180 daniel 3824: ctxt->disableSAX = 1;
1.129 daniel 3825: return(NULL);
1.29 daniel 3826: }
3827:
1.129 daniel 3828: /*
3829: * allocate a translation buffer.
3830: */
1.198 daniel 3831: buf_size = XML_PARSER_BUFFER_SIZE;
3832: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
3833: if (buf == NULL) {
1.129 daniel 3834: perror("xmlParseAttValue: malloc failed");
3835: return(NULL);
3836: }
3837:
3838: /*
3839: * Ok loop until we reach one of the ending char or a size limit.
3840: */
1.198 daniel 3841: c = CUR_CHAR(l);
3842: while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
3843: if (c == 0) break;
3844: if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 3845: int val = xmlParseCharRef(ctxt);
1.198 daniel 3846: COPY_BUF(l,buf,len,val);
3847: NEXTL(l);
3848: } else if (c == '&') {
1.129 daniel 3849: ent = xmlParseEntityRef(ctxt);
3850: if ((ent != NULL) &&
3851: (ctxt->replaceEntities != 0)) {
1.185 daniel 3852: xmlChar *rep;
3853:
1.186 daniel 3854: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3855: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3856: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3857: if (rep != NULL) {
3858: current = rep;
3859: while (*current != 0) {
1.198 daniel 3860: buf[len++] = *current++;
3861: if (len > buf_size - 10) {
3862: growBuffer(buf);
1.186 daniel 3863: }
1.185 daniel 3864: }
1.186 daniel 3865: xmlFree(rep);
1.129 daniel 3866: }
1.186 daniel 3867: } else {
3868: if (ent->content != NULL)
1.198 daniel 3869: buf[len++] = ent->content[0];
1.129 daniel 3870: }
3871: } else if (ent != NULL) {
3872: int i = xmlStrlen(ent->name);
3873: const xmlChar *cur = ent->name;
3874:
1.186 daniel 3875: /*
3876: * This may look absurd but is needed to detect
3877: * entities problems
3878: */
3879: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3880: xmlChar *rep;
3881: rep = xmlStringDecodeEntities(ctxt, ent->content,
3882: XML_SUBSTITUTE_REF, 0, 0, 0);
3883: if (rep != NULL)
3884: xmlFree(rep);
3885: }
3886:
3887: /*
3888: * Just output the reference
3889: */
1.198 daniel 3890: buf[len++] = '&';
3891: if (len > buf_size - i - 10) {
3892: growBuffer(buf);
1.129 daniel 3893: }
3894: for (;i > 0;i--)
1.198 daniel 3895: buf[len++] = *cur++;
3896: buf[len++] = ';';
1.129 daniel 3897: }
3898: } else {
1.198 daniel 3899: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3900: COPY_BUF(l,buf,len,0x20);
3901: if (len > buf_size - 10) {
3902: growBuffer(buf);
1.129 daniel 3903: }
3904: } else {
1.198 daniel 3905: COPY_BUF(l,buf,len,c);
3906: if (len > buf_size - 10) {
3907: growBuffer(buf);
1.129 daniel 3908: }
3909: }
1.198 daniel 3910: NEXTL(l);
1.129 daniel 3911: }
1.198 daniel 3912: GROW;
3913: c = CUR_CHAR(l);
1.129 daniel 3914: }
1.198 daniel 3915: buf[len++] = 0;
1.152 daniel 3916: if (RAW == '<') {
1.129 daniel 3917: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3918: ctxt->sax->error(ctxt->userData,
3919: "Unescaped '<' not allowed in attributes values\n");
3920: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3921: ctxt->wellFormed = 0;
1.180 daniel 3922: ctxt->disableSAX = 1;
1.152 daniel 3923: } else if (RAW != limit) {
1.129 daniel 3924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3925: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3926: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3927: ctxt->wellFormed = 0;
1.180 daniel 3928: ctxt->disableSAX = 1;
1.129 daniel 3929: } else
3930: NEXT;
1.198 daniel 3931: return(buf);
1.29 daniel 3932: }
3933:
1.50 daniel 3934: /**
3935: * xmlParseSystemLiteral:
3936: * @ctxt: an XML parser context
3937: *
3938: * parse an XML Literal
1.21 daniel 3939: *
1.22 daniel 3940: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3941: *
3942: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3943: */
3944:
1.123 daniel 3945: xmlChar *
1.55 daniel 3946: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3947: xmlChar *buf = NULL;
3948: int len = 0;
1.140 daniel 3949: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3950: int cur, l;
1.135 daniel 3951: xmlChar stop;
1.168 daniel 3952: int state = ctxt->instate;
1.21 daniel 3953:
1.91 daniel 3954: SHRINK;
1.152 daniel 3955: if (RAW == '"') {
1.40 daniel 3956: NEXT;
1.135 daniel 3957: stop = '"';
1.152 daniel 3958: } else if (RAW == '\'') {
1.40 daniel 3959: NEXT;
1.135 daniel 3960: stop = '\'';
1.21 daniel 3961: } else {
1.55 daniel 3962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3963: ctxt->sax->error(ctxt->userData,
3964: "SystemLiteral \" or ' expected\n");
1.123 daniel 3965: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3966: ctxt->wellFormed = 0;
1.180 daniel 3967: ctxt->disableSAX = 1;
1.135 daniel 3968: return(NULL);
1.21 daniel 3969: }
3970:
1.135 daniel 3971: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3972: if (buf == NULL) {
3973: fprintf(stderr, "malloc of %d byte failed\n", size);
3974: return(NULL);
3975: }
1.168 daniel 3976: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3977: cur = CUR_CHAR(l);
1.135 daniel 3978: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3979: if (len + 5 >= size) {
1.135 daniel 3980: size *= 2;
3981: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3982: if (buf == NULL) {
3983: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3984: ctxt->instate = state;
1.135 daniel 3985: return(NULL);
3986: }
3987: }
1.152 daniel 3988: COPY_BUF(l,buf,len,cur);
3989: NEXTL(l);
3990: cur = CUR_CHAR(l);
1.135 daniel 3991: if (cur == 0) {
3992: GROW;
3993: SHRINK;
1.152 daniel 3994: cur = CUR_CHAR(l);
1.135 daniel 3995: }
3996: }
3997: buf[len] = 0;
1.168 daniel 3998: ctxt->instate = state;
1.135 daniel 3999: if (!IS_CHAR(cur)) {
4000: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4001: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4002: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4003: ctxt->wellFormed = 0;
1.180 daniel 4004: ctxt->disableSAX = 1;
1.135 daniel 4005: } else {
4006: NEXT;
4007: }
4008: return(buf);
1.21 daniel 4009: }
4010:
1.50 daniel 4011: /**
4012: * xmlParsePubidLiteral:
4013: * @ctxt: an XML parser context
1.21 daniel 4014: *
1.50 daniel 4015: * parse an XML public literal
1.68 daniel 4016: *
4017: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4018: *
4019: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 4020: */
4021:
1.123 daniel 4022: xmlChar *
1.55 daniel 4023: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4024: xmlChar *buf = NULL;
4025: int len = 0;
1.140 daniel 4026: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 4027: xmlChar cur;
4028: xmlChar stop;
1.125 daniel 4029:
1.91 daniel 4030: SHRINK;
1.152 daniel 4031: if (RAW == '"') {
1.40 daniel 4032: NEXT;
1.135 daniel 4033: stop = '"';
1.152 daniel 4034: } else if (RAW == '\'') {
1.40 daniel 4035: NEXT;
1.135 daniel 4036: stop = '\'';
1.21 daniel 4037: } else {
1.55 daniel 4038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4039: ctxt->sax->error(ctxt->userData,
4040: "SystemLiteral \" or ' expected\n");
1.123 daniel 4041: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4042: ctxt->wellFormed = 0;
1.180 daniel 4043: ctxt->disableSAX = 1;
1.135 daniel 4044: return(NULL);
4045: }
4046: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4047: if (buf == NULL) {
4048: fprintf(stderr, "malloc of %d byte failed\n", size);
4049: return(NULL);
4050: }
4051: cur = CUR;
4052: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4053: if (len + 1 >= size) {
4054: size *= 2;
4055: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4056: if (buf == NULL) {
4057: fprintf(stderr, "realloc of %d byte failed\n", size);
4058: return(NULL);
4059: }
4060: }
4061: buf[len++] = cur;
4062: NEXT;
4063: cur = CUR;
4064: if (cur == 0) {
4065: GROW;
4066: SHRINK;
4067: cur = CUR;
4068: }
4069: }
4070: buf[len] = 0;
4071: if (cur != stop) {
4072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4073: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4074: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4075: ctxt->wellFormed = 0;
1.180 daniel 4076: ctxt->disableSAX = 1;
1.135 daniel 4077: } else {
4078: NEXT;
1.21 daniel 4079: }
1.135 daniel 4080: return(buf);
1.21 daniel 4081: }
4082:
1.50 daniel 4083: /**
4084: * xmlParseCharData:
4085: * @ctxt: an XML parser context
4086: * @cdata: int indicating whether we are within a CDATA section
4087: *
4088: * parse a CharData section.
4089: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4090: *
1.151 daniel 4091: * The right angle bracket (>) may be represented using the string ">",
4092: * and must, for compatibility, be escaped using ">" or a character
4093: * reference when it appears in the string "]]>" in content, when that
4094: * string is not marking the end of a CDATA section.
4095: *
1.27 daniel 4096: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4097: */
4098:
1.55 daniel 4099: void
4100: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4101: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4102: int nbchar = 0;
1.152 daniel 4103: int cur, l;
1.27 daniel 4104:
1.91 daniel 4105: SHRINK;
1.152 daniel 4106: cur = CUR_CHAR(l);
1.190 daniel 4107: while (((cur != '<') || (ctxt->token == '<')) &&
4108: ((cur != '&') || (ctxt->token == '&')) &&
4109: (IS_CHAR(cur))) {
1.97 daniel 4110: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4111: (NXT(2) == '>')) {
4112: if (cdata) break;
4113: else {
4114: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4115: ctxt->sax->error(ctxt->userData,
1.59 daniel 4116: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4117: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4118: /* Should this be relaxed ??? I see a "must here */
4119: ctxt->wellFormed = 0;
1.180 daniel 4120: ctxt->disableSAX = 1;
1.59 daniel 4121: }
4122: }
1.152 daniel 4123: COPY_BUF(l,buf,nbchar,cur);
4124: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4125: /*
4126: * Ok the segment is to be consumed as chars.
4127: */
1.171 daniel 4128: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4129: if (areBlanks(ctxt, buf, nbchar)) {
4130: if (ctxt->sax->ignorableWhitespace != NULL)
4131: ctxt->sax->ignorableWhitespace(ctxt->userData,
4132: buf, nbchar);
4133: } else {
4134: if (ctxt->sax->characters != NULL)
4135: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4136: }
4137: }
4138: nbchar = 0;
4139: }
1.152 daniel 4140: NEXTL(l);
4141: cur = CUR_CHAR(l);
1.27 daniel 4142: }
1.91 daniel 4143: if (nbchar != 0) {
4144: /*
4145: * Ok the segment is to be consumed as chars.
4146: */
1.171 daniel 4147: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4148: if (areBlanks(ctxt, buf, nbchar)) {
4149: if (ctxt->sax->ignorableWhitespace != NULL)
4150: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4151: } else {
4152: if (ctxt->sax->characters != NULL)
4153: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4154: }
4155: }
1.45 daniel 4156: }
1.27 daniel 4157: }
4158:
1.50 daniel 4159: /**
4160: * xmlParseExternalID:
4161: * @ctxt: an XML parser context
1.123 daniel 4162: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4163: * @strict: indicate whether we should restrict parsing to only
4164: * production [75], see NOTE below
1.50 daniel 4165: *
1.67 daniel 4166: * Parse an External ID or a Public ID
4167: *
4168: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4169: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4170: *
4171: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4172: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4173: *
4174: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4175: *
1.68 daniel 4176: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4177: * case publicID receives PubidLiteral, is strict is off
4178: * it is possible to return NULL and have publicID set.
1.22 daniel 4179: */
4180:
1.123 daniel 4181: xmlChar *
4182: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4183: xmlChar *URI = NULL;
1.22 daniel 4184:
1.91 daniel 4185: SHRINK;
1.152 daniel 4186: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4187: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4188: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4189: SKIP(6);
1.59 daniel 4190: if (!IS_BLANK(CUR)) {
4191: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4192: ctxt->sax->error(ctxt->userData,
1.59 daniel 4193: "Space required after 'SYSTEM'\n");
1.123 daniel 4194: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4195: ctxt->wellFormed = 0;
1.180 daniel 4196: ctxt->disableSAX = 1;
1.59 daniel 4197: }
1.42 daniel 4198: SKIP_BLANKS;
1.39 daniel 4199: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4200: if (URI == NULL) {
1.55 daniel 4201: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4202: ctxt->sax->error(ctxt->userData,
1.39 daniel 4203: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4204: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4205: ctxt->wellFormed = 0;
1.180 daniel 4206: ctxt->disableSAX = 1;
1.59 daniel 4207: }
1.152 daniel 4208: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4209: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4210: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4211: SKIP(6);
1.59 daniel 4212: if (!IS_BLANK(CUR)) {
4213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4214: ctxt->sax->error(ctxt->userData,
1.59 daniel 4215: "Space required after 'PUBLIC'\n");
1.123 daniel 4216: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4217: ctxt->wellFormed = 0;
1.180 daniel 4218: ctxt->disableSAX = 1;
1.59 daniel 4219: }
1.42 daniel 4220: SKIP_BLANKS;
1.39 daniel 4221: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4222: if (*publicID == NULL) {
1.55 daniel 4223: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4224: ctxt->sax->error(ctxt->userData,
1.39 daniel 4225: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4226: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4227: ctxt->wellFormed = 0;
1.180 daniel 4228: ctxt->disableSAX = 1;
1.59 daniel 4229: }
1.67 daniel 4230: if (strict) {
4231: /*
4232: * We don't handle [83] so "S SystemLiteral" is required.
4233: */
4234: if (!IS_BLANK(CUR)) {
4235: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4236: ctxt->sax->error(ctxt->userData,
1.67 daniel 4237: "Space required after the Public Identifier\n");
1.123 daniel 4238: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4239: ctxt->wellFormed = 0;
1.180 daniel 4240: ctxt->disableSAX = 1;
1.67 daniel 4241: }
4242: } else {
4243: /*
4244: * We handle [83] so we return immediately, if
4245: * "S SystemLiteral" is not detected. From a purely parsing
4246: * point of view that's a nice mess.
4247: */
1.135 daniel 4248: const xmlChar *ptr;
4249: GROW;
4250:
4251: ptr = CUR_PTR;
1.67 daniel 4252: if (!IS_BLANK(*ptr)) return(NULL);
4253:
4254: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4255: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4256: }
1.42 daniel 4257: SKIP_BLANKS;
1.39 daniel 4258: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4259: if (URI == NULL) {
1.55 daniel 4260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4261: ctxt->sax->error(ctxt->userData,
1.39 daniel 4262: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4263: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4264: ctxt->wellFormed = 0;
1.180 daniel 4265: ctxt->disableSAX = 1;
1.59 daniel 4266: }
1.22 daniel 4267: }
1.39 daniel 4268: return(URI);
1.22 daniel 4269: }
4270:
1.50 daniel 4271: /**
4272: * xmlParseComment:
1.69 daniel 4273: * @ctxt: an XML parser context
1.50 daniel 4274: *
1.3 veillard 4275: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4276: * The spec says that "For compatibility, the string "--" (double-hyphen)
4277: * must not occur within comments. "
1.22 daniel 4278: *
4279: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4280: */
1.72 daniel 4281: void
1.114 daniel 4282: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4283: xmlChar *buf = NULL;
1.195 daniel 4284: int len;
1.140 daniel 4285: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4286: int q, ql;
4287: int r, rl;
4288: int cur, l;
1.140 daniel 4289: xmlParserInputState state;
1.187 daniel 4290: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4291:
4292: /*
1.22 daniel 4293: * Check that there is a comment right here.
1.3 veillard 4294: */
1.152 daniel 4295: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4296: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4297:
1.140 daniel 4298: state = ctxt->instate;
1.97 daniel 4299: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4300: SHRINK;
1.40 daniel 4301: SKIP(4);
1.135 daniel 4302: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4303: if (buf == NULL) {
4304: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4305: ctxt->instate = state;
1.135 daniel 4306: return;
4307: }
1.152 daniel 4308: q = CUR_CHAR(ql);
4309: NEXTL(ql);
4310: r = CUR_CHAR(rl);
4311: NEXTL(rl);
4312: cur = CUR_CHAR(l);
1.195 daniel 4313: len = 0;
1.135 daniel 4314: while (IS_CHAR(cur) &&
4315: ((cur != '>') ||
4316: (r != '-') || (q != '-'))) {
1.195 daniel 4317: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4319: ctxt->sax->error(ctxt->userData,
1.38 daniel 4320: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4321: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4322: ctxt->wellFormed = 0;
1.180 daniel 4323: ctxt->disableSAX = 1;
1.59 daniel 4324: }
1.152 daniel 4325: if (len + 5 >= size) {
1.135 daniel 4326: size *= 2;
4327: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4328: if (buf == NULL) {
4329: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4330: ctxt->instate = state;
1.135 daniel 4331: return;
4332: }
4333: }
1.152 daniel 4334: COPY_BUF(ql,buf,len,q);
1.135 daniel 4335: q = r;
1.152 daniel 4336: ql = rl;
1.135 daniel 4337: r = cur;
1.152 daniel 4338: rl = l;
4339: NEXTL(l);
4340: cur = CUR_CHAR(l);
1.135 daniel 4341: if (cur == 0) {
4342: SHRINK;
4343: GROW;
1.152 daniel 4344: cur = CUR_CHAR(l);
1.135 daniel 4345: }
1.3 veillard 4346: }
1.135 daniel 4347: buf[len] = 0;
4348: if (!IS_CHAR(cur)) {
1.55 daniel 4349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4350: ctxt->sax->error(ctxt->userData,
1.135 daniel 4351: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4352: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4353: ctxt->wellFormed = 0;
1.180 daniel 4354: ctxt->disableSAX = 1;
1.178 daniel 4355: xmlFree(buf);
1.3 veillard 4356: } else {
1.187 daniel 4357: if (input != ctxt->input) {
4358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4359: ctxt->sax->error(ctxt->userData,
4360: "Comment doesn't start and stop in the same entity\n");
4361: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4362: ctxt->wellFormed = 0;
4363: ctxt->disableSAX = 1;
4364: }
1.40 daniel 4365: NEXT;
1.171 daniel 4366: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4367: (!ctxt->disableSAX))
1.135 daniel 4368: ctxt->sax->comment(ctxt->userData, buf);
4369: xmlFree(buf);
1.3 veillard 4370: }
1.140 daniel 4371: ctxt->instate = state;
1.3 veillard 4372: }
4373:
1.50 daniel 4374: /**
4375: * xmlParsePITarget:
4376: * @ctxt: an XML parser context
4377: *
4378: * parse the name of a PI
1.22 daniel 4379: *
4380: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4381: *
4382: * Returns the PITarget name or NULL
1.22 daniel 4383: */
4384:
1.123 daniel 4385: xmlChar *
1.55 daniel 4386: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4387: xmlChar *name;
1.22 daniel 4388:
4389: name = xmlParseName(ctxt);
1.139 daniel 4390: if ((name != NULL) &&
1.22 daniel 4391: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4392: ((name[1] == 'm') || (name[1] == 'M')) &&
4393: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4394: int i;
1.177 daniel 4395: if ((name[0] == 'x') && (name[1] == 'm') &&
4396: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4398: ctxt->sax->error(ctxt->userData,
4399: "XML declaration allowed only at the start of the document\n");
4400: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4401: ctxt->wellFormed = 0;
1.180 daniel 4402: ctxt->disableSAX = 1;
1.151 daniel 4403: return(name);
4404: } else if (name[3] == 0) {
4405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4406: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4407: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4408: ctxt->wellFormed = 0;
1.180 daniel 4409: ctxt->disableSAX = 1;
1.151 daniel 4410: return(name);
4411: }
1.139 daniel 4412: for (i = 0;;i++) {
4413: if (xmlW3CPIs[i] == NULL) break;
4414: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4415: return(name);
4416: }
4417: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4418: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4419: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4420: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4421: }
1.22 daniel 4422: }
4423: return(name);
4424: }
4425:
1.50 daniel 4426: /**
4427: * xmlParsePI:
4428: * @ctxt: an XML parser context
4429: *
4430: * parse an XML Processing Instruction.
1.22 daniel 4431: *
4432: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4433: *
1.69 daniel 4434: * The processing is transfered to SAX once parsed.
1.3 veillard 4435: */
4436:
1.55 daniel 4437: void
4438: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4439: xmlChar *buf = NULL;
4440: int len = 0;
1.140 daniel 4441: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4442: int cur, l;
1.123 daniel 4443: xmlChar *target;
1.140 daniel 4444: xmlParserInputState state;
1.22 daniel 4445:
1.152 daniel 4446: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4447: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4448: state = ctxt->instate;
4449: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4450: /*
4451: * this is a Processing Instruction.
4452: */
1.40 daniel 4453: SKIP(2);
1.91 daniel 4454: SHRINK;
1.3 veillard 4455:
4456: /*
1.22 daniel 4457: * Parse the target name and check for special support like
4458: * namespace.
1.3 veillard 4459: */
1.22 daniel 4460: target = xmlParsePITarget(ctxt);
4461: if (target != NULL) {
1.156 daniel 4462: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4463: if (input != ctxt->input) {
4464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4465: ctxt->sax->error(ctxt->userData,
4466: "PI declaration doesn't start and stop in the same entity\n");
4467: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4468: ctxt->wellFormed = 0;
4469: ctxt->disableSAX = 1;
4470: }
1.156 daniel 4471: SKIP(2);
4472:
4473: /*
4474: * SAX: PI detected.
4475: */
1.171 daniel 4476: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4477: (ctxt->sax->processingInstruction != NULL))
4478: ctxt->sax->processingInstruction(ctxt->userData,
4479: target, NULL);
4480: ctxt->instate = state;
1.170 daniel 4481: xmlFree(target);
1.156 daniel 4482: return;
4483: }
1.135 daniel 4484: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4485: if (buf == NULL) {
4486: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4487: ctxt->instate = state;
1.135 daniel 4488: return;
4489: }
4490: cur = CUR;
4491: if (!IS_BLANK(cur)) {
1.114 daniel 4492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4493: ctxt->sax->error(ctxt->userData,
4494: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4495: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4496: ctxt->wellFormed = 0;
1.180 daniel 4497: ctxt->disableSAX = 1;
1.114 daniel 4498: }
4499: SKIP_BLANKS;
1.152 daniel 4500: cur = CUR_CHAR(l);
1.135 daniel 4501: while (IS_CHAR(cur) &&
4502: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4503: if (len + 5 >= size) {
1.135 daniel 4504: size *= 2;
4505: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4506: if (buf == NULL) {
4507: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4508: ctxt->instate = state;
1.135 daniel 4509: return;
4510: }
4511: }
1.152 daniel 4512: COPY_BUF(l,buf,len,cur);
4513: NEXTL(l);
4514: cur = CUR_CHAR(l);
1.135 daniel 4515: if (cur == 0) {
4516: SHRINK;
4517: GROW;
1.152 daniel 4518: cur = CUR_CHAR(l);
1.135 daniel 4519: }
4520: }
4521: buf[len] = 0;
1.152 daniel 4522: if (cur != '?') {
1.72 daniel 4523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4524: ctxt->sax->error(ctxt->userData,
1.72 daniel 4525: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4526: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4527: ctxt->wellFormed = 0;
1.180 daniel 4528: ctxt->disableSAX = 1;
1.22 daniel 4529: } else {
1.187 daniel 4530: if (input != ctxt->input) {
4531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4532: ctxt->sax->error(ctxt->userData,
4533: "PI declaration doesn't start and stop in the same entity\n");
4534: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4535: ctxt->wellFormed = 0;
4536: ctxt->disableSAX = 1;
4537: }
1.72 daniel 4538: SKIP(2);
1.44 daniel 4539:
1.72 daniel 4540: /*
4541: * SAX: PI detected.
4542: */
1.171 daniel 4543: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4544: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4545: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4546: target, buf);
1.22 daniel 4547: }
1.135 daniel 4548: xmlFree(buf);
1.119 daniel 4549: xmlFree(target);
1.3 veillard 4550: } else {
1.55 daniel 4551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4552: ctxt->sax->error(ctxt->userData,
4553: "xmlParsePI : no target name\n");
1.123 daniel 4554: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4555: ctxt->wellFormed = 0;
1.180 daniel 4556: ctxt->disableSAX = 1;
1.22 daniel 4557: }
1.140 daniel 4558: ctxt->instate = state;
1.22 daniel 4559: }
4560: }
4561:
1.50 daniel 4562: /**
4563: * xmlParseNotationDecl:
4564: * @ctxt: an XML parser context
4565: *
4566: * parse a notation declaration
1.22 daniel 4567: *
4568: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4569: *
4570: * Hence there is actually 3 choices:
4571: * 'PUBLIC' S PubidLiteral
4572: * 'PUBLIC' S PubidLiteral S SystemLiteral
4573: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4574: *
1.67 daniel 4575: * See the NOTE on xmlParseExternalID().
1.22 daniel 4576: */
4577:
1.55 daniel 4578: void
4579: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4580: xmlChar *name;
4581: xmlChar *Pubid;
4582: xmlChar *Systemid;
1.22 daniel 4583:
1.152 daniel 4584: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4585: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4586: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4587: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4588: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4589: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4590: SHRINK;
1.40 daniel 4591: SKIP(10);
1.67 daniel 4592: if (!IS_BLANK(CUR)) {
4593: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4594: ctxt->sax->error(ctxt->userData,
4595: "Space required after '<!NOTATION'\n");
1.123 daniel 4596: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4597: ctxt->wellFormed = 0;
1.180 daniel 4598: ctxt->disableSAX = 1;
1.67 daniel 4599: return;
4600: }
4601: SKIP_BLANKS;
1.22 daniel 4602:
4603: name = xmlParseName(ctxt);
4604: if (name == NULL) {
1.55 daniel 4605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4606: ctxt->sax->error(ctxt->userData,
4607: "NOTATION: Name expected here\n");
1.123 daniel 4608: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4609: ctxt->wellFormed = 0;
1.180 daniel 4610: ctxt->disableSAX = 1;
1.67 daniel 4611: return;
4612: }
4613: if (!IS_BLANK(CUR)) {
4614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4615: ctxt->sax->error(ctxt->userData,
1.67 daniel 4616: "Space required after the NOTATION name'\n");
1.123 daniel 4617: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4618: ctxt->wellFormed = 0;
1.180 daniel 4619: ctxt->disableSAX = 1;
1.22 daniel 4620: return;
4621: }
1.42 daniel 4622: SKIP_BLANKS;
1.67 daniel 4623:
1.22 daniel 4624: /*
1.67 daniel 4625: * Parse the IDs.
1.22 daniel 4626: */
1.160 daniel 4627: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4628: SKIP_BLANKS;
4629:
1.152 daniel 4630: if (RAW == '>') {
1.187 daniel 4631: if (input != ctxt->input) {
4632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4633: ctxt->sax->error(ctxt->userData,
4634: "Notation declaration doesn't start and stop in the same entity\n");
4635: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4636: ctxt->wellFormed = 0;
4637: ctxt->disableSAX = 1;
4638: }
1.40 daniel 4639: NEXT;
1.171 daniel 4640: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4641: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4642: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4643: } else {
4644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4645: ctxt->sax->error(ctxt->userData,
1.67 daniel 4646: "'>' required to close NOTATION declaration\n");
1.123 daniel 4647: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4648: ctxt->wellFormed = 0;
1.180 daniel 4649: ctxt->disableSAX = 1;
1.67 daniel 4650: }
1.119 daniel 4651: xmlFree(name);
4652: if (Systemid != NULL) xmlFree(Systemid);
4653: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4654: }
4655: }
4656:
1.50 daniel 4657: /**
4658: * xmlParseEntityDecl:
4659: * @ctxt: an XML parser context
4660: *
4661: * parse <!ENTITY declarations
1.22 daniel 4662: *
4663: * [70] EntityDecl ::= GEDecl | PEDecl
4664: *
4665: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4666: *
4667: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4668: *
4669: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4670: *
4671: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4672: *
4673: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4674: *
4675: * [ VC: Notation Declared ]
1.116 daniel 4676: * The Name must match the declared name of a notation.
1.22 daniel 4677: */
4678:
1.55 daniel 4679: void
4680: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4681: xmlChar *name = NULL;
4682: xmlChar *value = NULL;
4683: xmlChar *URI = NULL, *literal = NULL;
4684: xmlChar *ndata = NULL;
1.39 daniel 4685: int isParameter = 0;
1.123 daniel 4686: xmlChar *orig = NULL;
1.22 daniel 4687:
1.94 daniel 4688: GROW;
1.152 daniel 4689: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4690: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4691: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4692: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4693: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4694: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4695: SHRINK;
1.40 daniel 4696: SKIP(8);
1.59 daniel 4697: if (!IS_BLANK(CUR)) {
4698: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4699: ctxt->sax->error(ctxt->userData,
4700: "Space required after '<!ENTITY'\n");
1.123 daniel 4701: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4702: ctxt->wellFormed = 0;
1.180 daniel 4703: ctxt->disableSAX = 1;
1.59 daniel 4704: }
4705: SKIP_BLANKS;
1.40 daniel 4706:
1.152 daniel 4707: if (RAW == '%') {
1.40 daniel 4708: NEXT;
1.59 daniel 4709: if (!IS_BLANK(CUR)) {
4710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4711: ctxt->sax->error(ctxt->userData,
4712: "Space required after '%'\n");
1.123 daniel 4713: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4714: ctxt->wellFormed = 0;
1.180 daniel 4715: ctxt->disableSAX = 1;
1.59 daniel 4716: }
1.42 daniel 4717: SKIP_BLANKS;
1.39 daniel 4718: isParameter = 1;
1.22 daniel 4719: }
4720:
4721: name = xmlParseName(ctxt);
1.24 daniel 4722: if (name == NULL) {
1.55 daniel 4723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4724: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4725: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4726: ctxt->wellFormed = 0;
1.180 daniel 4727: ctxt->disableSAX = 1;
1.24 daniel 4728: return;
4729: }
1.59 daniel 4730: if (!IS_BLANK(CUR)) {
4731: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4732: ctxt->sax->error(ctxt->userData,
1.59 daniel 4733: "Space required after the entity name\n");
1.123 daniel 4734: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4735: ctxt->wellFormed = 0;
1.180 daniel 4736: ctxt->disableSAX = 1;
1.59 daniel 4737: }
1.42 daniel 4738: SKIP_BLANKS;
1.24 daniel 4739:
1.22 daniel 4740: /*
1.68 daniel 4741: * handle the various case of definitions...
1.22 daniel 4742: */
1.39 daniel 4743: if (isParameter) {
1.152 daniel 4744: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4745: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4746: if (value) {
1.171 daniel 4747: if ((ctxt->sax != NULL) &&
4748: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4749: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4750: XML_INTERNAL_PARAMETER_ENTITY,
4751: NULL, NULL, value);
4752: }
1.24 daniel 4753: else {
1.67 daniel 4754: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4755: if ((URI == NULL) && (literal == NULL)) {
4756: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4757: ctxt->sax->error(ctxt->userData,
4758: "Entity value required\n");
4759: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4760: ctxt->wellFormed = 0;
1.180 daniel 4761: ctxt->disableSAX = 1;
1.169 daniel 4762: }
1.39 daniel 4763: if (URI) {
1.193 daniel 4764: xmlURIPtr uri;
4765:
4766: uri = xmlParseURI((const char *) URI);
4767: if (uri == NULL) {
4768: if ((ctxt->sax != NULL) &&
4769: (!ctxt->disableSAX) &&
4770: (ctxt->sax->error != NULL))
4771: ctxt->sax->error(ctxt->userData,
4772: "Invalid URI: %s\n", URI);
4773: ctxt->wellFormed = 0;
4774: ctxt->errNo = XML_ERR_INVALID_URI;
4775: } else {
4776: if (uri->fragment != NULL) {
4777: if ((ctxt->sax != NULL) &&
4778: (!ctxt->disableSAX) &&
4779: (ctxt->sax->error != NULL))
4780: ctxt->sax->error(ctxt->userData,
4781: "Fragment not allowed: %s\n", URI);
4782: ctxt->wellFormed = 0;
4783: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4784: } else {
4785: if ((ctxt->sax != NULL) &&
4786: (!ctxt->disableSAX) &&
4787: (ctxt->sax->entityDecl != NULL))
4788: ctxt->sax->entityDecl(ctxt->userData, name,
4789: XML_EXTERNAL_PARAMETER_ENTITY,
4790: literal, URI, NULL);
4791: }
4792: xmlFreeURI(uri);
4793: }
1.39 daniel 4794: }
1.24 daniel 4795: }
4796: } else {
1.152 daniel 4797: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4798: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4799: if ((ctxt->sax != NULL) &&
4800: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4801: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4802: XML_INTERNAL_GENERAL_ENTITY,
4803: NULL, NULL, value);
4804: } else {
1.67 daniel 4805: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4806: if ((URI == NULL) && (literal == NULL)) {
4807: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4808: ctxt->sax->error(ctxt->userData,
4809: "Entity value required\n");
4810: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4811: ctxt->wellFormed = 0;
1.180 daniel 4812: ctxt->disableSAX = 1;
1.169 daniel 4813: }
1.193 daniel 4814: if (URI) {
4815: xmlURIPtr uri;
4816:
4817: uri = xmlParseURI((const char *)URI);
4818: if (uri == NULL) {
4819: if ((ctxt->sax != NULL) &&
4820: (!ctxt->disableSAX) &&
4821: (ctxt->sax->error != NULL))
4822: ctxt->sax->error(ctxt->userData,
4823: "Invalid URI: %s\n", URI);
4824: ctxt->wellFormed = 0;
4825: ctxt->errNo = XML_ERR_INVALID_URI;
4826: } else {
4827: if (uri->fragment != NULL) {
4828: if ((ctxt->sax != NULL) &&
4829: (!ctxt->disableSAX) &&
4830: (ctxt->sax->error != NULL))
4831: ctxt->sax->error(ctxt->userData,
4832: "Fragment not allowed: %s\n", URI);
4833: ctxt->wellFormed = 0;
4834: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4835: }
4836: xmlFreeURI(uri);
4837: }
4838: }
1.152 daniel 4839: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4840: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4841: ctxt->sax->error(ctxt->userData,
1.59 daniel 4842: "Space required before 'NDATA'\n");
1.123 daniel 4843: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4844: ctxt->wellFormed = 0;
1.180 daniel 4845: ctxt->disableSAX = 1;
1.59 daniel 4846: }
1.42 daniel 4847: SKIP_BLANKS;
1.152 daniel 4848: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4849: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4850: (NXT(4) == 'A')) {
4851: SKIP(5);
1.59 daniel 4852: if (!IS_BLANK(CUR)) {
4853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4854: ctxt->sax->error(ctxt->userData,
1.59 daniel 4855: "Space required after 'NDATA'\n");
1.123 daniel 4856: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4857: ctxt->wellFormed = 0;
1.180 daniel 4858: ctxt->disableSAX = 1;
1.59 daniel 4859: }
1.42 daniel 4860: SKIP_BLANKS;
1.24 daniel 4861: ndata = xmlParseName(ctxt);
1.171 daniel 4862: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4863: (ctxt->sax->unparsedEntityDecl != NULL))
4864: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4865: literal, URI, ndata);
4866: } else {
1.171 daniel 4867: if ((ctxt->sax != NULL) &&
4868: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4869: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4870: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4871: literal, URI, NULL);
1.24 daniel 4872: }
4873: }
4874: }
1.42 daniel 4875: SKIP_BLANKS;
1.152 daniel 4876: if (RAW != '>') {
1.55 daniel 4877: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4878: ctxt->sax->error(ctxt->userData,
1.31 daniel 4879: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4880: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4881: ctxt->wellFormed = 0;
1.180 daniel 4882: ctxt->disableSAX = 1;
1.187 daniel 4883: } else {
4884: if (input != ctxt->input) {
4885: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4886: ctxt->sax->error(ctxt->userData,
4887: "Entity declaration doesn't start and stop in the same entity\n");
4888: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4889: ctxt->wellFormed = 0;
4890: ctxt->disableSAX = 1;
4891: }
1.40 daniel 4892: NEXT;
1.187 daniel 4893: }
1.78 daniel 4894: if (orig != NULL) {
4895: /*
1.98 daniel 4896: * Ugly mechanism to save the raw entity value.
1.78 daniel 4897: */
4898: xmlEntityPtr cur = NULL;
4899:
1.98 daniel 4900: if (isParameter) {
4901: if ((ctxt->sax != NULL) &&
4902: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4903: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4904: } else {
4905: if ((ctxt->sax != NULL) &&
4906: (ctxt->sax->getEntity != NULL))
1.120 daniel 4907: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4908: }
4909: if (cur != NULL) {
4910: if (cur->orig != NULL)
1.119 daniel 4911: xmlFree(orig);
1.98 daniel 4912: else
4913: cur->orig = orig;
4914: } else
1.119 daniel 4915: xmlFree(orig);
1.78 daniel 4916: }
1.119 daniel 4917: if (name != NULL) xmlFree(name);
4918: if (value != NULL) xmlFree(value);
4919: if (URI != NULL) xmlFree(URI);
4920: if (literal != NULL) xmlFree(literal);
4921: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4922: }
4923: }
4924:
1.50 daniel 4925: /**
1.59 daniel 4926: * xmlParseDefaultDecl:
4927: * @ctxt: an XML parser context
4928: * @value: Receive a possible fixed default value for the attribute
4929: *
4930: * Parse an attribute default declaration
4931: *
4932: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4933: *
1.99 daniel 4934: * [ VC: Required Attribute ]
1.117 daniel 4935: * if the default declaration is the keyword #REQUIRED, then the
4936: * attribute must be specified for all elements of the type in the
4937: * attribute-list declaration.
1.99 daniel 4938: *
4939: * [ VC: Attribute Default Legal ]
1.102 daniel 4940: * The declared default value must meet the lexical constraints of
4941: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4942: *
4943: * [ VC: Fixed Attribute Default ]
1.117 daniel 4944: * if an attribute has a default value declared with the #FIXED
4945: * keyword, instances of that attribute must match the default value.
1.99 daniel 4946: *
4947: * [ WFC: No < in Attribute Values ]
4948: * handled in xmlParseAttValue()
4949: *
1.59 daniel 4950: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4951: * or XML_ATTRIBUTE_FIXED.
4952: */
4953:
4954: int
1.123 daniel 4955: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4956: int val;
1.123 daniel 4957: xmlChar *ret;
1.59 daniel 4958:
4959: *value = NULL;
1.152 daniel 4960: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4961: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4962: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4963: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4964: (NXT(8) == 'D')) {
4965: SKIP(9);
4966: return(XML_ATTRIBUTE_REQUIRED);
4967: }
1.152 daniel 4968: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4969: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4970: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4971: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4972: SKIP(8);
4973: return(XML_ATTRIBUTE_IMPLIED);
4974: }
4975: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4976: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4977: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4978: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4979: SKIP(6);
4980: val = XML_ATTRIBUTE_FIXED;
4981: if (!IS_BLANK(CUR)) {
4982: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4983: ctxt->sax->error(ctxt->userData,
4984: "Space required after '#FIXED'\n");
1.123 daniel 4985: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4986: ctxt->wellFormed = 0;
1.180 daniel 4987: ctxt->disableSAX = 1;
1.59 daniel 4988: }
4989: SKIP_BLANKS;
4990: }
4991: ret = xmlParseAttValue(ctxt);
1.96 daniel 4992: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4993: if (ret == NULL) {
4994: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4995: ctxt->sax->error(ctxt->userData,
1.59 daniel 4996: "Attribute default value declaration error\n");
4997: ctxt->wellFormed = 0;
1.180 daniel 4998: ctxt->disableSAX = 1;
1.59 daniel 4999: } else
5000: *value = ret;
5001: return(val);
5002: }
5003:
5004: /**
1.66 daniel 5005: * xmlParseNotationType:
5006: * @ctxt: an XML parser context
5007: *
5008: * parse an Notation attribute type.
5009: *
1.99 daniel 5010: * Note: the leading 'NOTATION' S part has already being parsed...
5011: *
1.66 daniel 5012: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5013: *
1.99 daniel 5014: * [ VC: Notation Attributes ]
1.117 daniel 5015: * Values of this type must match one of the notation names included
1.99 daniel 5016: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 5017: *
5018: * Returns: the notation attribute tree built while parsing
5019: */
5020:
5021: xmlEnumerationPtr
5022: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5023: xmlChar *name;
1.66 daniel 5024: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5025:
1.152 daniel 5026: if (RAW != '(') {
1.66 daniel 5027: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5028: ctxt->sax->error(ctxt->userData,
5029: "'(' required to start 'NOTATION'\n");
1.123 daniel 5030: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 5031: ctxt->wellFormed = 0;
1.180 daniel 5032: ctxt->disableSAX = 1;
1.66 daniel 5033: return(NULL);
5034: }
1.91 daniel 5035: SHRINK;
1.66 daniel 5036: do {
5037: NEXT;
5038: SKIP_BLANKS;
5039: name = xmlParseName(ctxt);
5040: if (name == NULL) {
5041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5042: ctxt->sax->error(ctxt->userData,
1.66 daniel 5043: "Name expected in NOTATION declaration\n");
1.123 daniel 5044: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 5045: ctxt->wellFormed = 0;
1.180 daniel 5046: ctxt->disableSAX = 1;
1.66 daniel 5047: return(ret);
5048: }
5049: cur = xmlCreateEnumeration(name);
1.119 daniel 5050: xmlFree(name);
1.66 daniel 5051: if (cur == NULL) return(ret);
5052: if (last == NULL) ret = last = cur;
5053: else {
5054: last->next = cur;
5055: last = cur;
5056: }
5057: SKIP_BLANKS;
1.152 daniel 5058: } while (RAW == '|');
5059: if (RAW != ')') {
1.66 daniel 5060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5061: ctxt->sax->error(ctxt->userData,
1.66 daniel 5062: "')' required to finish NOTATION declaration\n");
1.123 daniel 5063: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5064: ctxt->wellFormed = 0;
1.180 daniel 5065: ctxt->disableSAX = 1;
1.170 daniel 5066: if ((last != NULL) && (last != ret))
5067: xmlFreeEnumeration(last);
1.66 daniel 5068: return(ret);
5069: }
5070: NEXT;
5071: return(ret);
5072: }
5073:
5074: /**
5075: * xmlParseEnumerationType:
5076: * @ctxt: an XML parser context
5077: *
5078: * parse an Enumeration attribute type.
5079: *
5080: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5081: *
1.99 daniel 5082: * [ VC: Enumeration ]
1.117 daniel 5083: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5084: * the declaration
5085: *
1.66 daniel 5086: * Returns: the enumeration attribute tree built while parsing
5087: */
5088:
5089: xmlEnumerationPtr
5090: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5091: xmlChar *name;
1.66 daniel 5092: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5093:
1.152 daniel 5094: if (RAW != '(') {
1.66 daniel 5095: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5096: ctxt->sax->error(ctxt->userData,
1.66 daniel 5097: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5098: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5099: ctxt->wellFormed = 0;
1.180 daniel 5100: ctxt->disableSAX = 1;
1.66 daniel 5101: return(NULL);
5102: }
1.91 daniel 5103: SHRINK;
1.66 daniel 5104: do {
5105: NEXT;
5106: SKIP_BLANKS;
5107: name = xmlParseNmtoken(ctxt);
5108: if (name == NULL) {
5109: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5110: ctxt->sax->error(ctxt->userData,
1.66 daniel 5111: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5112: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5113: ctxt->wellFormed = 0;
1.180 daniel 5114: ctxt->disableSAX = 1;
1.66 daniel 5115: return(ret);
5116: }
5117: cur = xmlCreateEnumeration(name);
1.119 daniel 5118: xmlFree(name);
1.66 daniel 5119: if (cur == NULL) return(ret);
5120: if (last == NULL) ret = last = cur;
5121: else {
5122: last->next = cur;
5123: last = cur;
5124: }
5125: SKIP_BLANKS;
1.152 daniel 5126: } while (RAW == '|');
5127: if (RAW != ')') {
1.66 daniel 5128: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5129: ctxt->sax->error(ctxt->userData,
1.66 daniel 5130: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5131: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5132: ctxt->wellFormed = 0;
1.180 daniel 5133: ctxt->disableSAX = 1;
1.66 daniel 5134: return(ret);
5135: }
5136: NEXT;
5137: return(ret);
5138: }
5139:
5140: /**
1.50 daniel 5141: * xmlParseEnumeratedType:
5142: * @ctxt: an XML parser context
1.66 daniel 5143: * @tree: the enumeration tree built while parsing
1.50 daniel 5144: *
1.66 daniel 5145: * parse an Enumerated attribute type.
1.22 daniel 5146: *
5147: * [57] EnumeratedType ::= NotationType | Enumeration
5148: *
5149: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5150: *
1.50 daniel 5151: *
1.66 daniel 5152: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5153: */
5154:
1.66 daniel 5155: int
5156: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5157: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5158: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5159: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5160: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5161: SKIP(8);
5162: if (!IS_BLANK(CUR)) {
5163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5164: ctxt->sax->error(ctxt->userData,
5165: "Space required after 'NOTATION'\n");
1.123 daniel 5166: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5167: ctxt->wellFormed = 0;
1.180 daniel 5168: ctxt->disableSAX = 1;
1.66 daniel 5169: return(0);
5170: }
5171: SKIP_BLANKS;
5172: *tree = xmlParseNotationType(ctxt);
5173: if (*tree == NULL) return(0);
5174: return(XML_ATTRIBUTE_NOTATION);
5175: }
5176: *tree = xmlParseEnumerationType(ctxt);
5177: if (*tree == NULL) return(0);
5178: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5179: }
5180:
1.50 daniel 5181: /**
5182: * xmlParseAttributeType:
5183: * @ctxt: an XML parser context
1.66 daniel 5184: * @tree: the enumeration tree built while parsing
1.50 daniel 5185: *
1.59 daniel 5186: * parse the Attribute list def for an element
1.22 daniel 5187: *
5188: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5189: *
5190: * [55] StringType ::= 'CDATA'
5191: *
5192: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5193: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5194: *
1.102 daniel 5195: * Validity constraints for attribute values syntax are checked in
5196: * xmlValidateAttributeValue()
5197: *
1.99 daniel 5198: * [ VC: ID ]
1.117 daniel 5199: * Values of type ID must match the Name production. A name must not
1.99 daniel 5200: * appear more than once in an XML document as a value of this type;
5201: * i.e., ID values must uniquely identify the elements which bear them.
5202: *
5203: * [ VC: One ID per Element Type ]
1.117 daniel 5204: * No element type may have more than one ID attribute specified.
1.99 daniel 5205: *
5206: * [ VC: ID Attribute Default ]
1.117 daniel 5207: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5208: *
5209: * [ VC: IDREF ]
1.102 daniel 5210: * Values of type IDREF must match the Name production, and values
1.140 daniel 5211: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5212: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5213: * values must match the value of some ID attribute.
5214: *
5215: * [ VC: Entity Name ]
1.102 daniel 5216: * Values of type ENTITY must match the Name production, values
1.140 daniel 5217: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5218: * name of an unparsed entity declared in the DTD.
1.99 daniel 5219: *
5220: * [ VC: Name Token ]
1.102 daniel 5221: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5222: * of type NMTOKENS must match Nmtokens.
5223: *
1.69 daniel 5224: * Returns the attribute type
1.22 daniel 5225: */
1.59 daniel 5226: int
1.66 daniel 5227: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5228: SHRINK;
1.152 daniel 5229: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5230: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5231: (NXT(4) == 'A')) {
5232: SKIP(5);
1.66 daniel 5233: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5234: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5235: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5236: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5237: SKIP(6);
5238: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5239: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5240: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5241: (NXT(4) == 'F')) {
5242: SKIP(5);
1.59 daniel 5243: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5244: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5245: SKIP(2);
5246: return(XML_ATTRIBUTE_ID);
1.152 daniel 5247: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5248: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5249: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5250: SKIP(6);
1.59 daniel 5251: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5252: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5253: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5254: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5255: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5256: SKIP(8);
1.59 daniel 5257: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5258: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5259: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5260: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5261: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5262: SKIP(8);
5263: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5264: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5265: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5266: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5267: (NXT(6) == 'N')) {
5268: SKIP(7);
1.59 daniel 5269: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5270: }
1.66 daniel 5271: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5272: }
5273:
1.50 daniel 5274: /**
5275: * xmlParseAttributeListDecl:
5276: * @ctxt: an XML parser context
5277: *
5278: * : parse the Attribute list def for an element
1.22 daniel 5279: *
5280: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5281: *
5282: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5283: *
1.22 daniel 5284: */
1.55 daniel 5285: void
5286: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5287: xmlChar *elemName;
5288: xmlChar *attrName;
1.103 daniel 5289: xmlEnumerationPtr tree;
1.22 daniel 5290:
1.152 daniel 5291: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5292: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5293: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5294: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5295: (NXT(8) == 'T')) {
1.187 daniel 5296: xmlParserInputPtr input = ctxt->input;
5297:
1.40 daniel 5298: SKIP(9);
1.59 daniel 5299: if (!IS_BLANK(CUR)) {
5300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5301: ctxt->sax->error(ctxt->userData,
5302: "Space required after '<!ATTLIST'\n");
1.123 daniel 5303: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5304: ctxt->wellFormed = 0;
1.180 daniel 5305: ctxt->disableSAX = 1;
1.59 daniel 5306: }
1.42 daniel 5307: SKIP_BLANKS;
1.59 daniel 5308: elemName = xmlParseName(ctxt);
5309: if (elemName == NULL) {
1.55 daniel 5310: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5311: ctxt->sax->error(ctxt->userData,
5312: "ATTLIST: no name for Element\n");
1.123 daniel 5313: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5314: ctxt->wellFormed = 0;
1.180 daniel 5315: ctxt->disableSAX = 1;
1.22 daniel 5316: return;
5317: }
1.42 daniel 5318: SKIP_BLANKS;
1.152 daniel 5319: while (RAW != '>') {
1.123 daniel 5320: const xmlChar *check = CUR_PTR;
1.59 daniel 5321: int type;
5322: int def;
1.123 daniel 5323: xmlChar *defaultValue = NULL;
1.59 daniel 5324:
1.103 daniel 5325: tree = NULL;
1.59 daniel 5326: attrName = xmlParseName(ctxt);
5327: if (attrName == NULL) {
5328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5329: ctxt->sax->error(ctxt->userData,
5330: "ATTLIST: no name for Attribute\n");
1.123 daniel 5331: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5332: ctxt->wellFormed = 0;
1.180 daniel 5333: ctxt->disableSAX = 1;
1.59 daniel 5334: break;
5335: }
1.97 daniel 5336: GROW;
1.59 daniel 5337: if (!IS_BLANK(CUR)) {
5338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5339: ctxt->sax->error(ctxt->userData,
1.59 daniel 5340: "Space required after the attribute name\n");
1.123 daniel 5341: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5342: ctxt->wellFormed = 0;
1.180 daniel 5343: ctxt->disableSAX = 1;
1.170 daniel 5344: if (attrName != NULL)
5345: xmlFree(attrName);
5346: if (defaultValue != NULL)
5347: xmlFree(defaultValue);
1.59 daniel 5348: break;
5349: }
5350: SKIP_BLANKS;
5351:
1.66 daniel 5352: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5353: if (type <= 0) {
5354: if (attrName != NULL)
5355: xmlFree(attrName);
5356: if (defaultValue != NULL)
5357: xmlFree(defaultValue);
5358: break;
5359: }
1.22 daniel 5360:
1.97 daniel 5361: GROW;
1.59 daniel 5362: if (!IS_BLANK(CUR)) {
5363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5364: ctxt->sax->error(ctxt->userData,
1.59 daniel 5365: "Space required after the attribute type\n");
1.123 daniel 5366: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5367: ctxt->wellFormed = 0;
1.180 daniel 5368: ctxt->disableSAX = 1;
1.170 daniel 5369: if (attrName != NULL)
5370: xmlFree(attrName);
5371: if (defaultValue != NULL)
5372: xmlFree(defaultValue);
5373: if (tree != NULL)
5374: xmlFreeEnumeration(tree);
1.59 daniel 5375: break;
5376: }
1.42 daniel 5377: SKIP_BLANKS;
1.59 daniel 5378:
5379: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5380: if (def <= 0) {
5381: if (attrName != NULL)
5382: xmlFree(attrName);
5383: if (defaultValue != NULL)
5384: xmlFree(defaultValue);
5385: if (tree != NULL)
5386: xmlFreeEnumeration(tree);
5387: break;
5388: }
1.59 daniel 5389:
1.97 daniel 5390: GROW;
1.152 daniel 5391: if (RAW != '>') {
1.59 daniel 5392: if (!IS_BLANK(CUR)) {
5393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5394: ctxt->sax->error(ctxt->userData,
1.59 daniel 5395: "Space required after the attribute default value\n");
1.123 daniel 5396: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5397: ctxt->wellFormed = 0;
1.180 daniel 5398: ctxt->disableSAX = 1;
1.170 daniel 5399: if (attrName != NULL)
5400: xmlFree(attrName);
5401: if (defaultValue != NULL)
5402: xmlFree(defaultValue);
5403: if (tree != NULL)
5404: xmlFreeEnumeration(tree);
1.59 daniel 5405: break;
5406: }
5407: SKIP_BLANKS;
5408: }
1.40 daniel 5409: if (check == CUR_PTR) {
1.55 daniel 5410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5411: ctxt->sax->error(ctxt->userData,
1.59 daniel 5412: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5413: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5414: if (attrName != NULL)
5415: xmlFree(attrName);
5416: if (defaultValue != NULL)
5417: xmlFree(defaultValue);
5418: if (tree != NULL)
5419: xmlFreeEnumeration(tree);
1.22 daniel 5420: break;
5421: }
1.171 daniel 5422: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5423: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5424: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5425: type, def, defaultValue, tree);
1.59 daniel 5426: if (attrName != NULL)
1.119 daniel 5427: xmlFree(attrName);
1.59 daniel 5428: if (defaultValue != NULL)
1.119 daniel 5429: xmlFree(defaultValue);
1.97 daniel 5430: GROW;
1.22 daniel 5431: }
1.187 daniel 5432: if (RAW == '>') {
5433: if (input != ctxt->input) {
5434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5435: ctxt->sax->error(ctxt->userData,
5436: "Attribute list declaration doesn't start and stop in the same entity\n");
5437: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5438: ctxt->wellFormed = 0;
5439: ctxt->disableSAX = 1;
5440: }
1.40 daniel 5441: NEXT;
1.187 daniel 5442: }
1.22 daniel 5443:
1.119 daniel 5444: xmlFree(elemName);
1.22 daniel 5445: }
5446: }
5447:
1.50 daniel 5448: /**
1.61 daniel 5449: * xmlParseElementMixedContentDecl:
5450: * @ctxt: an XML parser context
5451: *
5452: * parse the declaration for a Mixed Element content
5453: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5454: *
5455: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5456: * '(' S? '#PCDATA' S? ')'
5457: *
1.99 daniel 5458: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5459: *
5460: * [ VC: No Duplicate Types ]
1.117 daniel 5461: * The same name must not appear more than once in a single
5462: * mixed-content declaration.
1.99 daniel 5463: *
1.61 daniel 5464: * returns: the list of the xmlElementContentPtr describing the element choices
5465: */
5466: xmlElementContentPtr
1.62 daniel 5467: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5468: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5469: xmlChar *elem = NULL;
1.61 daniel 5470:
1.97 daniel 5471: GROW;
1.152 daniel 5472: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5473: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5474: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5475: (NXT(6) == 'A')) {
5476: SKIP(7);
5477: SKIP_BLANKS;
1.91 daniel 5478: SHRINK;
1.152 daniel 5479: if (RAW == ')') {
1.187 daniel 5480: ctxt->entity = ctxt->input;
1.63 daniel 5481: NEXT;
5482: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5483: if (RAW == '*') {
1.136 daniel 5484: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5485: NEXT;
5486: }
1.63 daniel 5487: return(ret);
5488: }
1.152 daniel 5489: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5490: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5491: if (ret == NULL) return(NULL);
1.99 daniel 5492: }
1.152 daniel 5493: while (RAW == '|') {
1.64 daniel 5494: NEXT;
1.61 daniel 5495: if (elem == NULL) {
5496: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5497: if (ret == NULL) return(NULL);
5498: ret->c1 = cur;
1.64 daniel 5499: cur = ret;
1.61 daniel 5500: } else {
1.64 daniel 5501: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5502: if (n == NULL) return(NULL);
5503: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5504: cur->c2 = n;
5505: cur = n;
1.119 daniel 5506: xmlFree(elem);
1.61 daniel 5507: }
5508: SKIP_BLANKS;
5509: elem = xmlParseName(ctxt);
5510: if (elem == NULL) {
5511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5512: ctxt->sax->error(ctxt->userData,
1.61 daniel 5513: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5514: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5515: ctxt->wellFormed = 0;
1.180 daniel 5516: ctxt->disableSAX = 1;
1.61 daniel 5517: xmlFreeElementContent(cur);
5518: return(NULL);
5519: }
5520: SKIP_BLANKS;
1.97 daniel 5521: GROW;
1.61 daniel 5522: }
1.152 daniel 5523: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5524: if (elem != NULL) {
1.61 daniel 5525: cur->c2 = xmlNewElementContent(elem,
5526: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5527: xmlFree(elem);
1.66 daniel 5528: }
1.65 daniel 5529: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5530: ctxt->entity = ctxt->input;
1.64 daniel 5531: SKIP(2);
1.61 daniel 5532: } else {
1.119 daniel 5533: if (elem != NULL) xmlFree(elem);
1.61 daniel 5534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5535: ctxt->sax->error(ctxt->userData,
1.63 daniel 5536: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5537: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5538: ctxt->wellFormed = 0;
1.180 daniel 5539: ctxt->disableSAX = 1;
1.61 daniel 5540: xmlFreeElementContent(ret);
5541: return(NULL);
5542: }
5543:
5544: } else {
5545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5546: ctxt->sax->error(ctxt->userData,
1.61 daniel 5547: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5548: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5549: ctxt->wellFormed = 0;
1.180 daniel 5550: ctxt->disableSAX = 1;
1.61 daniel 5551: }
5552: return(ret);
5553: }
5554:
5555: /**
5556: * xmlParseElementChildrenContentDecl:
1.50 daniel 5557: * @ctxt: an XML parser context
5558: *
1.61 daniel 5559: * parse the declaration for a Mixed Element content
5560: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5561: *
1.61 daniel 5562: *
1.22 daniel 5563: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5564: *
5565: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5566: *
5567: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5568: *
5569: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5570: *
1.99 daniel 5571: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5572: * TODO Parameter-entity replacement text must be properly nested
5573: * with parenthetized groups. That is to say, if either of the
5574: * opening or closing parentheses in a choice, seq, or Mixed
5575: * construct is contained in the replacement text for a parameter
5576: * entity, both must be contained in the same replacement text. For
5577: * interoperability, if a parameter-entity reference appears in a
5578: * choice, seq, or Mixed construct, its replacement text should not
5579: * be empty, and neither the first nor last non-blank character of
5580: * the replacement text should be a connector (| or ,).
5581: *
1.62 daniel 5582: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5583: * hierarchy.
5584: */
5585: xmlElementContentPtr
1.62 daniel 5586: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5587: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5588: xmlChar *elem;
5589: xmlChar type = 0;
1.62 daniel 5590:
5591: SKIP_BLANKS;
1.94 daniel 5592: GROW;
1.152 daniel 5593: if (RAW == '(') {
1.63 daniel 5594: /* Recurse on first child */
1.62 daniel 5595: NEXT;
5596: SKIP_BLANKS;
5597: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5598: SKIP_BLANKS;
1.101 daniel 5599: GROW;
1.62 daniel 5600: } else {
5601: elem = xmlParseName(ctxt);
5602: if (elem == NULL) {
5603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5604: ctxt->sax->error(ctxt->userData,
1.62 daniel 5605: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5606: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5607: ctxt->wellFormed = 0;
1.180 daniel 5608: ctxt->disableSAX = 1;
1.62 daniel 5609: return(NULL);
5610: }
5611: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5612: GROW;
1.152 daniel 5613: if (RAW == '?') {
1.104 daniel 5614: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5615: NEXT;
1.152 daniel 5616: } else if (RAW == '*') {
1.104 daniel 5617: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5618: NEXT;
1.152 daniel 5619: } else if (RAW == '+') {
1.104 daniel 5620: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5621: NEXT;
5622: } else {
1.104 daniel 5623: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5624: }
1.119 daniel 5625: xmlFree(elem);
1.101 daniel 5626: GROW;
1.62 daniel 5627: }
5628: SKIP_BLANKS;
1.91 daniel 5629: SHRINK;
1.152 daniel 5630: while (RAW != ')') {
1.63 daniel 5631: /*
5632: * Each loop we parse one separator and one element.
5633: */
1.152 daniel 5634: if (RAW == ',') {
1.62 daniel 5635: if (type == 0) type = CUR;
5636:
5637: /*
5638: * Detect "Name | Name , Name" error
5639: */
5640: else if (type != CUR) {
5641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5642: ctxt->sax->error(ctxt->userData,
1.62 daniel 5643: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5644: type);
1.123 daniel 5645: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5646: ctxt->wellFormed = 0;
1.180 daniel 5647: ctxt->disableSAX = 1;
1.170 daniel 5648: if ((op != NULL) && (op != ret))
5649: xmlFreeElementContent(op);
5650: if ((last != NULL) && (last != ret))
5651: xmlFreeElementContent(last);
5652: if (ret != NULL)
5653: xmlFreeElementContent(ret);
1.62 daniel 5654: return(NULL);
5655: }
1.64 daniel 5656: NEXT;
1.62 daniel 5657:
1.63 daniel 5658: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5659: if (op == NULL) {
5660: xmlFreeElementContent(ret);
5661: return(NULL);
5662: }
5663: if (last == NULL) {
5664: op->c1 = ret;
1.65 daniel 5665: ret = cur = op;
1.63 daniel 5666: } else {
5667: cur->c2 = op;
5668: op->c1 = last;
5669: cur =op;
1.65 daniel 5670: last = NULL;
1.63 daniel 5671: }
1.152 daniel 5672: } else if (RAW == '|') {
1.62 daniel 5673: if (type == 0) type = CUR;
5674:
5675: /*
1.63 daniel 5676: * Detect "Name , Name | Name" error
1.62 daniel 5677: */
5678: else if (type != CUR) {
5679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5680: ctxt->sax->error(ctxt->userData,
1.62 daniel 5681: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5682: type);
1.123 daniel 5683: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5684: ctxt->wellFormed = 0;
1.180 daniel 5685: ctxt->disableSAX = 1;
1.170 daniel 5686: if ((op != NULL) && (op != ret))
5687: xmlFreeElementContent(op);
5688: if ((last != NULL) && (last != ret))
5689: xmlFreeElementContent(last);
5690: if (ret != NULL)
5691: xmlFreeElementContent(ret);
1.62 daniel 5692: return(NULL);
5693: }
1.64 daniel 5694: NEXT;
1.62 daniel 5695:
1.63 daniel 5696: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5697: if (op == NULL) {
1.170 daniel 5698: if ((op != NULL) && (op != ret))
5699: xmlFreeElementContent(op);
5700: if ((last != NULL) && (last != ret))
5701: xmlFreeElementContent(last);
5702: if (ret != NULL)
5703: xmlFreeElementContent(ret);
1.63 daniel 5704: return(NULL);
5705: }
5706: if (last == NULL) {
5707: op->c1 = ret;
1.65 daniel 5708: ret = cur = op;
1.63 daniel 5709: } else {
5710: cur->c2 = op;
5711: op->c1 = last;
5712: cur =op;
1.65 daniel 5713: last = NULL;
1.63 daniel 5714: }
1.62 daniel 5715: } else {
5716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5717: ctxt->sax->error(ctxt->userData,
1.62 daniel 5718: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5719: ctxt->wellFormed = 0;
1.180 daniel 5720: ctxt->disableSAX = 1;
1.123 daniel 5721: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5722: if ((op != NULL) && (op != ret))
5723: xmlFreeElementContent(op);
5724: if ((last != NULL) && (last != ret))
5725: xmlFreeElementContent(last);
5726: if (ret != NULL)
5727: xmlFreeElementContent(ret);
1.62 daniel 5728: return(NULL);
5729: }
1.101 daniel 5730: GROW;
1.62 daniel 5731: SKIP_BLANKS;
1.101 daniel 5732: GROW;
1.152 daniel 5733: if (RAW == '(') {
1.63 daniel 5734: /* Recurse on second child */
1.62 daniel 5735: NEXT;
5736: SKIP_BLANKS;
1.65 daniel 5737: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5738: SKIP_BLANKS;
5739: } else {
5740: elem = xmlParseName(ctxt);
5741: if (elem == NULL) {
5742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5743: ctxt->sax->error(ctxt->userData,
1.122 daniel 5744: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5745: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5746: ctxt->wellFormed = 0;
1.180 daniel 5747: ctxt->disableSAX = 1;
1.170 daniel 5748: if ((op != NULL) && (op != ret))
5749: xmlFreeElementContent(op);
5750: if ((last != NULL) && (last != ret))
5751: xmlFreeElementContent(last);
5752: if (ret != NULL)
5753: xmlFreeElementContent(ret);
1.62 daniel 5754: return(NULL);
5755: }
1.65 daniel 5756: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5757: xmlFree(elem);
1.152 daniel 5758: if (RAW == '?') {
1.105 daniel 5759: last->ocur = XML_ELEMENT_CONTENT_OPT;
5760: NEXT;
1.152 daniel 5761: } else if (RAW == '*') {
1.105 daniel 5762: last->ocur = XML_ELEMENT_CONTENT_MULT;
5763: NEXT;
1.152 daniel 5764: } else if (RAW == '+') {
1.105 daniel 5765: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5766: NEXT;
5767: } else {
5768: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5769: }
1.63 daniel 5770: }
5771: SKIP_BLANKS;
1.97 daniel 5772: GROW;
1.64 daniel 5773: }
1.65 daniel 5774: if ((cur != NULL) && (last != NULL)) {
5775: cur->c2 = last;
1.62 daniel 5776: }
1.187 daniel 5777: ctxt->entity = ctxt->input;
1.62 daniel 5778: NEXT;
1.152 daniel 5779: if (RAW == '?') {
1.62 daniel 5780: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5781: NEXT;
1.152 daniel 5782: } else if (RAW == '*') {
1.62 daniel 5783: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5784: NEXT;
1.152 daniel 5785: } else if (RAW == '+') {
1.62 daniel 5786: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5787: NEXT;
5788: }
5789: return(ret);
1.61 daniel 5790: }
5791:
5792: /**
5793: * xmlParseElementContentDecl:
5794: * @ctxt: an XML parser context
5795: * @name: the name of the element being defined.
5796: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5797: *
1.61 daniel 5798: * parse the declaration for an Element content either Mixed or Children,
5799: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5800: *
5801: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5802: *
1.61 daniel 5803: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5804: */
5805:
1.61 daniel 5806: int
1.123 daniel 5807: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5808: xmlElementContentPtr *result) {
5809:
5810: xmlElementContentPtr tree = NULL;
1.187 daniel 5811: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5812: int res;
5813:
5814: *result = NULL;
5815:
1.152 daniel 5816: if (RAW != '(') {
1.61 daniel 5817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5818: ctxt->sax->error(ctxt->userData,
1.61 daniel 5819: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5820: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5821: ctxt->wellFormed = 0;
1.180 daniel 5822: ctxt->disableSAX = 1;
1.61 daniel 5823: return(-1);
5824: }
5825: NEXT;
1.97 daniel 5826: GROW;
1.61 daniel 5827: SKIP_BLANKS;
1.152 daniel 5828: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5829: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5830: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5831: (NXT(6) == 'A')) {
1.62 daniel 5832: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5833: res = XML_ELEMENT_TYPE_MIXED;
5834: } else {
1.62 daniel 5835: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5836: res = XML_ELEMENT_TYPE_ELEMENT;
5837: }
1.187 daniel 5838: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5840: ctxt->sax->error(ctxt->userData,
5841: "Element content declaration doesn't start and stop in the same entity\n");
5842: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5843: ctxt->wellFormed = 0;
5844: ctxt->disableSAX = 1;
5845: }
1.61 daniel 5846: SKIP_BLANKS;
1.63 daniel 5847: /****************************
1.152 daniel 5848: if (RAW != ')') {
1.61 daniel 5849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5850: ctxt->sax->error(ctxt->userData,
1.61 daniel 5851: "xmlParseElementContentDecl : ')' expected\n");
5852: ctxt->wellFormed = 0;
1.180 daniel 5853: ctxt->disableSAX = 1;
1.61 daniel 5854: return(-1);
5855: }
1.63 daniel 5856: ****************************/
5857: *result = tree;
1.61 daniel 5858: return(res);
1.22 daniel 5859: }
5860:
1.50 daniel 5861: /**
5862: * xmlParseElementDecl:
5863: * @ctxt: an XML parser context
5864: *
5865: * parse an Element declaration.
1.22 daniel 5866: *
5867: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5868: *
1.99 daniel 5869: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5870: * No element type may be declared more than once
1.69 daniel 5871: *
5872: * Returns the type of the element, or -1 in case of error
1.22 daniel 5873: */
1.59 daniel 5874: int
1.55 daniel 5875: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5876: xmlChar *name;
1.59 daniel 5877: int ret = -1;
1.61 daniel 5878: xmlElementContentPtr content = NULL;
1.22 daniel 5879:
1.97 daniel 5880: GROW;
1.152 daniel 5881: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5882: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5883: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5884: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5885: (NXT(8) == 'T')) {
1.187 daniel 5886: xmlParserInputPtr input = ctxt->input;
5887:
1.40 daniel 5888: SKIP(9);
1.59 daniel 5889: if (!IS_BLANK(CUR)) {
5890: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5891: ctxt->sax->error(ctxt->userData,
1.59 daniel 5892: "Space required after 'ELEMENT'\n");
1.123 daniel 5893: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5894: ctxt->wellFormed = 0;
1.180 daniel 5895: ctxt->disableSAX = 1;
1.59 daniel 5896: }
1.42 daniel 5897: SKIP_BLANKS;
1.22 daniel 5898: name = xmlParseName(ctxt);
5899: if (name == NULL) {
1.55 daniel 5900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5901: ctxt->sax->error(ctxt->userData,
1.59 daniel 5902: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5903: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5904: ctxt->wellFormed = 0;
1.180 daniel 5905: ctxt->disableSAX = 1;
1.59 daniel 5906: return(-1);
5907: }
5908: if (!IS_BLANK(CUR)) {
5909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5910: ctxt->sax->error(ctxt->userData,
1.59 daniel 5911: "Space required after the element name\n");
1.123 daniel 5912: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5913: ctxt->wellFormed = 0;
1.180 daniel 5914: ctxt->disableSAX = 1;
1.22 daniel 5915: }
1.42 daniel 5916: SKIP_BLANKS;
1.152 daniel 5917: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5918: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5919: (NXT(4) == 'Y')) {
5920: SKIP(5);
1.22 daniel 5921: /*
5922: * Element must always be empty.
5923: */
1.59 daniel 5924: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5925: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5926: (NXT(2) == 'Y')) {
5927: SKIP(3);
1.22 daniel 5928: /*
5929: * Element is a generic container.
5930: */
1.59 daniel 5931: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5932: } else if (RAW == '(') {
1.61 daniel 5933: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5934: } else {
1.98 daniel 5935: /*
5936: * [ WFC: PEs in Internal Subset ] error handling.
5937: */
1.152 daniel 5938: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5939: (ctxt->inputNr == 1)) {
5940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941: ctxt->sax->error(ctxt->userData,
5942: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5943: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5944: } else {
5945: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5946: ctxt->sax->error(ctxt->userData,
5947: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5948: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5949: }
1.61 daniel 5950: ctxt->wellFormed = 0;
1.180 daniel 5951: ctxt->disableSAX = 1;
1.119 daniel 5952: if (name != NULL) xmlFree(name);
1.61 daniel 5953: return(-1);
1.22 daniel 5954: }
1.142 daniel 5955:
5956: SKIP_BLANKS;
5957: /*
5958: * Pop-up of finished entities.
5959: */
1.152 daniel 5960: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5961: xmlPopInput(ctxt);
1.42 daniel 5962: SKIP_BLANKS;
1.142 daniel 5963:
1.152 daniel 5964: if (RAW != '>') {
1.55 daniel 5965: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5966: ctxt->sax->error(ctxt->userData,
1.31 daniel 5967: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5968: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5969: ctxt->wellFormed = 0;
1.180 daniel 5970: ctxt->disableSAX = 1;
1.61 daniel 5971: } else {
1.187 daniel 5972: if (input != ctxt->input) {
5973: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5974: ctxt->sax->error(ctxt->userData,
5975: "Element declaration doesn't start and stop in the same entity\n");
5976: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5977: ctxt->wellFormed = 0;
5978: ctxt->disableSAX = 1;
5979: }
5980:
1.40 daniel 5981: NEXT;
1.171 daniel 5982: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5983: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5984: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5985: content);
1.61 daniel 5986: }
1.84 daniel 5987: if (content != NULL) {
5988: xmlFreeElementContent(content);
5989: }
1.61 daniel 5990: if (name != NULL) {
1.119 daniel 5991: xmlFree(name);
1.61 daniel 5992: }
1.22 daniel 5993: }
1.59 daniel 5994: return(ret);
1.22 daniel 5995: }
5996:
1.50 daniel 5997: /**
5998: * xmlParseMarkupDecl:
5999: * @ctxt: an XML parser context
6000: *
6001: * parse Markup declarations
1.22 daniel 6002: *
6003: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6004: * NotationDecl | PI | Comment
6005: *
1.98 daniel 6006: * [ VC: Proper Declaration/PE Nesting ]
6007: * TODO Parameter-entity replacement text must be properly nested with
6008: * markup declarations. That is to say, if either the first character
6009: * or the last character of a markup declaration (markupdecl above) is
6010: * contained in the replacement text for a parameter-entity reference,
6011: * both must be contained in the same replacement text.
6012: *
6013: * [ WFC: PEs in Internal Subset ]
6014: * In the internal DTD subset, parameter-entity references can occur
6015: * only where markup declarations can occur, not within markup declarations.
6016: * (This does not apply to references that occur in external parameter
6017: * entities or to the external subset.)
1.22 daniel 6018: */
1.55 daniel 6019: void
6020: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 6021: GROW;
1.22 daniel 6022: xmlParseElementDecl(ctxt);
6023: xmlParseAttributeListDecl(ctxt);
6024: xmlParseEntityDecl(ctxt);
6025: xmlParseNotationDecl(ctxt);
6026: xmlParsePI(ctxt);
1.114 daniel 6027: xmlParseComment(ctxt);
1.98 daniel 6028: /*
6029: * This is only for internal subset. On external entities,
6030: * the replacement is done before parsing stage
6031: */
6032: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6033: xmlParsePEReference(ctxt);
1.97 daniel 6034: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 6035: }
6036:
1.50 daniel 6037: /**
1.76 daniel 6038: * xmlParseTextDecl:
6039: * @ctxt: an XML parser context
6040: *
6041: * parse an XML declaration header for external entities
6042: *
6043: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 6044: *
6045: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6046: */
6047:
1.172 daniel 6048: void
1.76 daniel 6049: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6050: xmlChar *version;
1.76 daniel 6051:
6052: /*
6053: * We know that '<?xml' is here.
6054: */
1.193 daniel 6055: if ((RAW == '<') && (NXT(1) == '?') &&
6056: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6057: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6058: SKIP(5);
6059: } else {
6060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6061: ctxt->sax->error(ctxt->userData,
6062: "Text declaration '<?xml' required\n");
6063: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6064: ctxt->wellFormed = 0;
6065: ctxt->disableSAX = 1;
6066:
6067: return;
6068: }
1.76 daniel 6069:
6070: if (!IS_BLANK(CUR)) {
6071: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6072: ctxt->sax->error(ctxt->userData,
6073: "Space needed after '<?xml'\n");
1.123 daniel 6074: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6075: ctxt->wellFormed = 0;
1.180 daniel 6076: ctxt->disableSAX = 1;
1.76 daniel 6077: }
6078: SKIP_BLANKS;
6079:
6080: /*
6081: * We may have the VersionInfo here.
6082: */
6083: version = xmlParseVersionInfo(ctxt);
6084: if (version == NULL)
6085: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6086: ctxt->input->version = version;
1.76 daniel 6087:
6088: /*
6089: * We must have the encoding declaration
6090: */
6091: if (!IS_BLANK(CUR)) {
6092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6093: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6094: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6095: ctxt->wellFormed = 0;
1.180 daniel 6096: ctxt->disableSAX = 1;
1.76 daniel 6097: }
1.195 daniel 6098: xmlParseEncodingDecl(ctxt);
1.193 daniel 6099: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6100: /*
6101: * The XML REC instructs us to stop parsing right here
6102: */
6103: return;
6104: }
1.76 daniel 6105:
6106: SKIP_BLANKS;
1.152 daniel 6107: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6108: SKIP(2);
1.152 daniel 6109: } else if (RAW == '>') {
1.76 daniel 6110: /* Deprecated old WD ... */
6111: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6112: ctxt->sax->error(ctxt->userData,
6113: "XML declaration must end-up with '?>'\n");
1.123 daniel 6114: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6115: ctxt->wellFormed = 0;
1.180 daniel 6116: ctxt->disableSAX = 1;
1.76 daniel 6117: NEXT;
6118: } else {
6119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6120: ctxt->sax->error(ctxt->userData,
6121: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6122: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6123: ctxt->wellFormed = 0;
1.180 daniel 6124: ctxt->disableSAX = 1;
1.76 daniel 6125: MOVETO_ENDTAG(CUR_PTR);
6126: NEXT;
6127: }
6128: }
6129:
6130: /*
6131: * xmlParseConditionalSections
6132: * @ctxt: an XML parser context
6133: *
6134: * TODO : Conditionnal section are not yet supported !
6135: *
6136: * [61] conditionalSect ::= includeSect | ignoreSect
6137: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6138: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6139: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6140: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6141: */
6142:
6143: void
6144: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6145: SKIP(3);
6146: SKIP_BLANKS;
1.168 daniel 6147: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6148: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6149: (NXT(6) == 'E')) {
1.165 daniel 6150: SKIP(7);
1.168 daniel 6151: SKIP_BLANKS;
6152: if (RAW != '[') {
6153: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6154: ctxt->sax->error(ctxt->userData,
6155: "XML conditional section '[' expected\n");
6156: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6157: ctxt->wellFormed = 0;
1.180 daniel 6158: ctxt->disableSAX = 1;
1.168 daniel 6159: } else {
6160: NEXT;
6161: }
1.165 daniel 6162: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6163: (NXT(2) != '>'))) {
6164: const xmlChar *check = CUR_PTR;
6165: int cons = ctxt->input->consumed;
6166: int tok = ctxt->token;
6167:
6168: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6169: xmlParseConditionalSections(ctxt);
6170: } else if (IS_BLANK(CUR)) {
6171: NEXT;
6172: } else if (RAW == '%') {
6173: xmlParsePEReference(ctxt);
6174: } else
6175: xmlParseMarkupDecl(ctxt);
6176:
6177: /*
6178: * Pop-up of finished entities.
6179: */
6180: while ((RAW == 0) && (ctxt->inputNr > 1))
6181: xmlPopInput(ctxt);
6182:
6183: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6184: (tok == ctxt->token)) {
6185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6186: ctxt->sax->error(ctxt->userData,
6187: "Content error in the external subset\n");
6188: ctxt->wellFormed = 0;
1.180 daniel 6189: ctxt->disableSAX = 1;
1.165 daniel 6190: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6191: break;
6192: }
6193: }
1.168 daniel 6194: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6195: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6196: int state;
6197:
1.168 daniel 6198: SKIP(6);
6199: SKIP_BLANKS;
6200: if (RAW != '[') {
6201: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6202: ctxt->sax->error(ctxt->userData,
6203: "XML conditional section '[' expected\n");
6204: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6205: ctxt->wellFormed = 0;
1.180 daniel 6206: ctxt->disableSAX = 1;
1.168 daniel 6207: } else {
6208: NEXT;
6209: }
1.171 daniel 6210:
1.143 daniel 6211: /*
1.171 daniel 6212: * Parse up to the end of the conditionnal section
6213: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6214: */
1.171 daniel 6215: state = ctxt->disableSAX;
1.165 daniel 6216: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6217: (NXT(2) != '>'))) {
1.171 daniel 6218: const xmlChar *check = CUR_PTR;
6219: int cons = ctxt->input->consumed;
6220: int tok = ctxt->token;
6221:
6222: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6223: xmlParseConditionalSections(ctxt);
6224: } else if (IS_BLANK(CUR)) {
6225: NEXT;
6226: } else if (RAW == '%') {
6227: xmlParsePEReference(ctxt);
6228: } else
6229: xmlParseMarkupDecl(ctxt);
6230:
1.165 daniel 6231: /*
6232: * Pop-up of finished entities.
6233: */
6234: while ((RAW == 0) && (ctxt->inputNr > 1))
6235: xmlPopInput(ctxt);
1.143 daniel 6236:
1.171 daniel 6237: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6238: (tok == ctxt->token)) {
6239: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6240: ctxt->sax->error(ctxt->userData,
6241: "Content error in the external subset\n");
6242: ctxt->wellFormed = 0;
1.180 daniel 6243: ctxt->disableSAX = 1;
1.171 daniel 6244: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6245: break;
6246: }
1.165 daniel 6247: }
1.171 daniel 6248: ctxt->disableSAX = state;
1.168 daniel 6249: } else {
6250: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6251: ctxt->sax->error(ctxt->userData,
6252: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6253: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6254: ctxt->wellFormed = 0;
1.180 daniel 6255: ctxt->disableSAX = 1;
1.143 daniel 6256: }
6257:
1.152 daniel 6258: if (RAW == 0)
1.143 daniel 6259: SHRINK;
6260:
1.152 daniel 6261: if (RAW == 0) {
1.76 daniel 6262: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6263: ctxt->sax->error(ctxt->userData,
6264: "XML conditional section not closed\n");
1.123 daniel 6265: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6266: ctxt->wellFormed = 0;
1.180 daniel 6267: ctxt->disableSAX = 1;
1.143 daniel 6268: } else {
6269: SKIP(3);
1.76 daniel 6270: }
6271: }
6272:
6273: /**
1.124 daniel 6274: * xmlParseExternalSubset:
1.76 daniel 6275: * @ctxt: an XML parser context
1.124 daniel 6276: * @ExternalID: the external identifier
6277: * @SystemID: the system identifier (or URL)
1.76 daniel 6278: *
6279: * parse Markup declarations from an external subset
6280: *
6281: * [30] extSubset ::= textDecl? extSubsetDecl
6282: *
6283: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6284: */
6285: void
1.123 daniel 6286: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6287: const xmlChar *SystemID) {
1.132 daniel 6288: GROW;
1.152 daniel 6289: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6290: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6291: (NXT(4) == 'l')) {
1.172 daniel 6292: xmlParseTextDecl(ctxt);
1.193 daniel 6293: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6294: /*
6295: * The XML REC instructs us to stop parsing right here
6296: */
6297: ctxt->instate = XML_PARSER_EOF;
6298: return;
6299: }
1.76 daniel 6300: }
1.79 daniel 6301: if (ctxt->myDoc == NULL) {
1.116 daniel 6302: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6303: }
6304: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6305: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6306:
1.96 daniel 6307: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6308: ctxt->external = 1;
1.152 daniel 6309: while (((RAW == '<') && (NXT(1) == '?')) ||
6310: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6311: IS_BLANK(CUR)) {
1.123 daniel 6312: const xmlChar *check = CUR_PTR;
1.115 daniel 6313: int cons = ctxt->input->consumed;
1.164 daniel 6314: int tok = ctxt->token;
1.115 daniel 6315:
1.152 daniel 6316: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6317: xmlParseConditionalSections(ctxt);
6318: } else if (IS_BLANK(CUR)) {
6319: NEXT;
1.152 daniel 6320: } else if (RAW == '%') {
1.76 daniel 6321: xmlParsePEReference(ctxt);
6322: } else
6323: xmlParseMarkupDecl(ctxt);
1.77 daniel 6324:
6325: /*
6326: * Pop-up of finished entities.
6327: */
1.166 daniel 6328: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6329: xmlPopInput(ctxt);
6330:
1.164 daniel 6331: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6332: (tok == ctxt->token)) {
1.115 daniel 6333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6334: ctxt->sax->error(ctxt->userData,
6335: "Content error in the external subset\n");
6336: ctxt->wellFormed = 0;
1.180 daniel 6337: ctxt->disableSAX = 1;
1.123 daniel 6338: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6339: break;
6340: }
1.76 daniel 6341: }
6342:
1.152 daniel 6343: if (RAW != 0) {
1.76 daniel 6344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6345: ctxt->sax->error(ctxt->userData,
6346: "Extra content at the end of the document\n");
1.123 daniel 6347: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6348: ctxt->wellFormed = 0;
1.180 daniel 6349: ctxt->disableSAX = 1;
1.76 daniel 6350: }
6351:
6352: }
6353:
6354: /**
1.77 daniel 6355: * xmlParseReference:
6356: * @ctxt: an XML parser context
6357: *
6358: * parse and handle entity references in content, depending on the SAX
6359: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6360: * CharRef, a predefined entity, if there is no reference() callback.
6361: * or if the parser was asked to switch to that mode.
1.77 daniel 6362: *
6363: * [67] Reference ::= EntityRef | CharRef
6364: */
6365: void
6366: xmlParseReference(xmlParserCtxtPtr ctxt) {
6367: xmlEntityPtr ent;
1.123 daniel 6368: xmlChar *val;
1.152 daniel 6369: if (RAW != '&') return;
1.77 daniel 6370:
1.113 daniel 6371: if (ctxt->inputNr > 1) {
1.123 daniel 6372: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6373:
1.171 daniel 6374: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6375: (!ctxt->disableSAX))
1.113 daniel 6376: ctxt->sax->characters(ctxt->userData, cur, 1);
6377: if (ctxt->token == '&')
6378: ctxt->token = 0;
6379: else {
6380: SKIP(1);
6381: }
6382: return;
6383: }
1.77 daniel 6384: if (NXT(1) == '#') {
1.152 daniel 6385: int i = 0;
1.153 daniel 6386: xmlChar out[10];
6387: int hex = NXT(2);
1.77 daniel 6388: int val = xmlParseCharRef(ctxt);
1.152 daniel 6389:
1.198 daniel 6390: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 6391: /*
6392: * So we are using non-UTF-8 buffers
6393: * Check that the char fit on 8bits, if not
6394: * generate a CharRef.
6395: */
6396: if (val <= 0xFF) {
6397: out[0] = val;
6398: out[1] = 0;
1.171 daniel 6399: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6400: (!ctxt->disableSAX))
1.153 daniel 6401: ctxt->sax->characters(ctxt->userData, out, 1);
6402: } else {
6403: if ((hex == 'x') || (hex == 'X'))
6404: sprintf((char *)out, "#x%X", val);
6405: else
6406: sprintf((char *)out, "#%d", val);
1.171 daniel 6407: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6408: (!ctxt->disableSAX))
1.153 daniel 6409: ctxt->sax->reference(ctxt->userData, out);
6410: }
6411: } else {
6412: /*
6413: * Just encode the value in UTF-8
6414: */
6415: COPY_BUF(0 ,out, i, val);
6416: out[i] = 0;
1.171 daniel 6417: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6418: (!ctxt->disableSAX))
1.153 daniel 6419: ctxt->sax->characters(ctxt->userData, out, i);
6420: }
1.77 daniel 6421: } else {
6422: ent = xmlParseEntityRef(ctxt);
6423: if (ent == NULL) return;
6424: if ((ent->name != NULL) &&
1.159 daniel 6425: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6426: xmlNodePtr list = NULL;
6427: int ret;
6428:
6429:
6430: /*
6431: * The first reference to the entity trigger a parsing phase
6432: * where the ent->children is filled with the result from
6433: * the parsing.
6434: */
6435: if (ent->children == NULL) {
6436: xmlChar *value;
6437: value = ent->content;
6438:
6439: /*
6440: * Check that this entity is well formed
6441: */
6442: if ((value != NULL) &&
6443: (value[1] == 0) && (value[0] == '<') &&
6444: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6445: /*
6446: * TODO: get definite answer on this !!!
6447: * Lots of entity decls are used to declare a single
6448: * char
6449: * <!ENTITY lt "<">
6450: * Which seems to be valid since
6451: * 2.4: The ampersand character (&) and the left angle
6452: * bracket (<) may appear in their literal form only
6453: * when used ... They are also legal within the literal
6454: * entity value of an internal entity declaration;i
6455: * see "4.3.2 Well-Formed Parsed Entities".
6456: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6457: * Looking at the OASIS test suite and James Clark
6458: * tests, this is broken. However the XML REC uses
6459: * it. Is the XML REC not well-formed ????
6460: * This is a hack to avoid this problem
6461: */
6462: list = xmlNewDocText(ctxt->myDoc, value);
6463: if (list != NULL) {
6464: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6465: (ent->children == NULL)) {
6466: ent->children = list;
6467: ent->last = list;
6468: list->parent = (xmlNodePtr) ent;
6469: } else {
6470: xmlFreeNodeList(list);
6471: }
6472: } else if (list != NULL) {
6473: xmlFreeNodeList(list);
6474: }
1.181 daniel 6475: } else {
1.180 daniel 6476: /*
6477: * 4.3.2: An internal general parsed entity is well-formed
6478: * if its replacement text matches the production labeled
6479: * content.
6480: */
1.185 daniel 6481: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6482: ctxt->depth++;
1.180 daniel 6483: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6484: ctxt->sax, NULL, ctxt->depth,
6485: value, &list);
6486: ctxt->depth--;
6487: } else if (ent->etype ==
6488: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6489: ctxt->depth++;
1.180 daniel 6490: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6491: ctxt->sax, NULL, ctxt->depth,
6492: ent->SystemID, ent->ExternalID, &list);
6493: ctxt->depth--;
6494: } else {
1.180 daniel 6495: ret = -1;
6496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6497: ctxt->sax->error(ctxt->userData,
6498: "Internal: invalid entity type\n");
6499: }
1.185 daniel 6500: if (ret == XML_ERR_ENTITY_LOOP) {
6501: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6502: ctxt->sax->error(ctxt->userData,
6503: "Detected entity reference loop\n");
6504: ctxt->wellFormed = 0;
6505: ctxt->disableSAX = 1;
6506: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6507: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6508: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6509: (ent->children == NULL)) {
6510: ent->children = list;
6511: while (list != NULL) {
6512: list->parent = (xmlNodePtr) ent;
6513: if (list->next == NULL)
6514: ent->last = list;
6515: list = list->next;
6516: }
6517: } else {
6518: xmlFreeNodeList(list);
6519: }
6520: } else if (ret > 0) {
6521: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6522: ctxt->sax->error(ctxt->userData,
6523: "Entity value required\n");
6524: ctxt->errNo = ret;
6525: ctxt->wellFormed = 0;
6526: ctxt->disableSAX = 1;
6527: } else if (list != NULL) {
6528: xmlFreeNodeList(list);
6529: }
6530: }
6531: }
1.113 daniel 6532: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6533: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6534: /*
6535: * Create a node.
6536: */
6537: ctxt->sax->reference(ctxt->userData, ent->name);
6538: return;
6539: } else if (ctxt->replaceEntities) {
6540: xmlParserInputPtr input;
1.79 daniel 6541:
1.113 daniel 6542: input = xmlNewEntityInputStream(ctxt, ent);
6543: xmlPushInput(ctxt, input);
1.167 daniel 6544: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6545: (RAW == '<') && (NXT(1) == '?') &&
6546: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6547: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6548: xmlParseTextDecl(ctxt);
1.193 daniel 6549: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6550: /*
6551: * The XML REC instructs us to stop parsing right here
6552: */
6553: ctxt->instate = XML_PARSER_EOF;
6554: return;
6555: }
1.199 daniel 6556: if (input->standalone == 1) {
1.167 daniel 6557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6558: ctxt->sax->error(ctxt->userData,
6559: "external parsed entities cannot be standalone\n");
6560: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6561: ctxt->wellFormed = 0;
1.180 daniel 6562: ctxt->disableSAX = 1;
1.167 daniel 6563: }
6564: }
1.179 daniel 6565: /*
6566: * !!! TODO: build the tree under the entity first
6567: * 1234
6568: */
1.113 daniel 6569: return;
6570: }
1.77 daniel 6571: }
6572: val = ent->content;
6573: if (val == NULL) return;
6574: /*
6575: * inline the entity.
6576: */
1.171 daniel 6577: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6578: (!ctxt->disableSAX))
1.77 daniel 6579: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6580: }
1.24 daniel 6581: }
6582:
1.50 daniel 6583: /**
6584: * xmlParseEntityRef:
6585: * @ctxt: an XML parser context
6586: *
6587: * parse ENTITY references declarations
1.24 daniel 6588: *
6589: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6590: *
1.98 daniel 6591: * [ WFC: Entity Declared ]
6592: * In a document without any DTD, a document with only an internal DTD
6593: * subset which contains no parameter entity references, or a document
6594: * with "standalone='yes'", the Name given in the entity reference
6595: * must match that in an entity declaration, except that well-formed
6596: * documents need not declare any of the following entities: amp, lt,
6597: * gt, apos, quot. The declaration of a parameter entity must precede
6598: * any reference to it. Similarly, the declaration of a general entity
6599: * must precede any reference to it which appears in a default value in an
6600: * attribute-list declaration. Note that if entities are declared in the
6601: * external subset or in external parameter entities, a non-validating
6602: * processor is not obligated to read and process their declarations;
6603: * for such documents, the rule that an entity must be declared is a
6604: * well-formedness constraint only if standalone='yes'.
6605: *
6606: * [ WFC: Parsed Entity ]
6607: * An entity reference must not contain the name of an unparsed entity
6608: *
1.77 daniel 6609: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6610: */
1.77 daniel 6611: xmlEntityPtr
1.55 daniel 6612: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6613: xmlChar *name;
1.72 daniel 6614: xmlEntityPtr ent = NULL;
1.24 daniel 6615:
1.91 daniel 6616: GROW;
1.111 daniel 6617:
1.152 daniel 6618: if (RAW == '&') {
1.40 daniel 6619: NEXT;
1.24 daniel 6620: name = xmlParseName(ctxt);
6621: if (name == NULL) {
1.55 daniel 6622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6623: ctxt->sax->error(ctxt->userData,
6624: "xmlParseEntityRef: no name\n");
1.123 daniel 6625: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6626: ctxt->wellFormed = 0;
1.180 daniel 6627: ctxt->disableSAX = 1;
1.24 daniel 6628: } else {
1.152 daniel 6629: if (RAW == ';') {
1.40 daniel 6630: NEXT;
1.24 daniel 6631: /*
1.77 daniel 6632: * Ask first SAX for entity resolution, otherwise try the
6633: * predefined set.
6634: */
6635: if (ctxt->sax != NULL) {
6636: if (ctxt->sax->getEntity != NULL)
6637: ent = ctxt->sax->getEntity(ctxt->userData, name);
6638: if (ent == NULL)
6639: ent = xmlGetPredefinedEntity(name);
6640: }
6641: /*
1.98 daniel 6642: * [ WFC: Entity Declared ]
6643: * In a document without any DTD, a document with only an
6644: * internal DTD subset which contains no parameter entity
6645: * references, or a document with "standalone='yes'", the
6646: * Name given in the entity reference must match that in an
6647: * entity declaration, except that well-formed documents
6648: * need not declare any of the following entities: amp, lt,
6649: * gt, apos, quot.
6650: * The declaration of a parameter entity must precede any
6651: * reference to it.
6652: * Similarly, the declaration of a general entity must
6653: * precede any reference to it which appears in a default
6654: * value in an attribute-list declaration. Note that if
6655: * entities are declared in the external subset or in
6656: * external parameter entities, a non-validating processor
6657: * is not obligated to read and process their declarations;
6658: * for such documents, the rule that an entity must be
6659: * declared is a well-formedness constraint only if
6660: * standalone='yes'.
1.59 daniel 6661: */
1.77 daniel 6662: if (ent == NULL) {
1.98 daniel 6663: if ((ctxt->standalone == 1) ||
6664: ((ctxt->hasExternalSubset == 0) &&
6665: (ctxt->hasPErefs == 0))) {
6666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6667: ctxt->sax->error(ctxt->userData,
6668: "Entity '%s' not defined\n", name);
1.123 daniel 6669: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6670: ctxt->wellFormed = 0;
1.180 daniel 6671: ctxt->disableSAX = 1;
1.77 daniel 6672: } else {
1.98 daniel 6673: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6674: ctxt->sax->warning(ctxt->userData,
6675: "Entity '%s' not defined\n", name);
1.123 daniel 6676: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6677: }
1.77 daniel 6678: }
1.59 daniel 6679:
6680: /*
1.98 daniel 6681: * [ WFC: Parsed Entity ]
6682: * An entity reference must not contain the name of an
6683: * unparsed entity
6684: */
1.159 daniel 6685: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6686: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6687: ctxt->sax->error(ctxt->userData,
6688: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6689: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6690: ctxt->wellFormed = 0;
1.180 daniel 6691: ctxt->disableSAX = 1;
1.98 daniel 6692: }
6693:
6694: /*
6695: * [ WFC: No External Entity References ]
6696: * Attribute values cannot contain direct or indirect
6697: * entity references to external entities.
6698: */
6699: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6700: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6702: ctxt->sax->error(ctxt->userData,
6703: "Attribute references external entity '%s'\n", name);
1.123 daniel 6704: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6705: ctxt->wellFormed = 0;
1.180 daniel 6706: ctxt->disableSAX = 1;
1.98 daniel 6707: }
6708: /*
6709: * [ WFC: No < in Attribute Values ]
6710: * The replacement text of any entity referred to directly or
6711: * indirectly in an attribute value (other than "<") must
6712: * not contain a <.
1.59 daniel 6713: */
1.98 daniel 6714: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6715: (ent != NULL) &&
6716: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6717: (ent->content != NULL) &&
6718: (xmlStrchr(ent->content, '<'))) {
6719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6720: ctxt->sax->error(ctxt->userData,
6721: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6722: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6723: ctxt->wellFormed = 0;
1.180 daniel 6724: ctxt->disableSAX = 1;
1.98 daniel 6725: }
6726:
6727: /*
6728: * Internal check, no parameter entities here ...
6729: */
6730: else {
1.159 daniel 6731: switch (ent->etype) {
1.59 daniel 6732: case XML_INTERNAL_PARAMETER_ENTITY:
6733: case XML_EXTERNAL_PARAMETER_ENTITY:
6734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6735: ctxt->sax->error(ctxt->userData,
1.59 daniel 6736: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6737: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6738: ctxt->wellFormed = 0;
1.180 daniel 6739: ctxt->disableSAX = 1;
6740: break;
6741: default:
1.59 daniel 6742: break;
6743: }
6744: }
6745:
6746: /*
1.98 daniel 6747: * [ WFC: No Recursion ]
1.117 daniel 6748: * TODO A parsed entity must not contain a recursive reference
6749: * to itself, either directly or indirectly.
1.59 daniel 6750: */
1.77 daniel 6751:
1.24 daniel 6752: } else {
1.55 daniel 6753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6754: ctxt->sax->error(ctxt->userData,
1.59 daniel 6755: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6756: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6757: ctxt->wellFormed = 0;
1.180 daniel 6758: ctxt->disableSAX = 1;
1.24 daniel 6759: }
1.119 daniel 6760: xmlFree(name);
1.24 daniel 6761: }
6762: }
1.77 daniel 6763: return(ent);
1.24 daniel 6764: }
1.135 daniel 6765: /**
6766: * xmlParseStringEntityRef:
6767: * @ctxt: an XML parser context
6768: * @str: a pointer to an index in the string
6769: *
6770: * parse ENTITY references declarations, but this version parses it from
6771: * a string value.
6772: *
6773: * [68] EntityRef ::= '&' Name ';'
6774: *
6775: * [ WFC: Entity Declared ]
6776: * In a document without any DTD, a document with only an internal DTD
6777: * subset which contains no parameter entity references, or a document
6778: * with "standalone='yes'", the Name given in the entity reference
6779: * must match that in an entity declaration, except that well-formed
6780: * documents need not declare any of the following entities: amp, lt,
6781: * gt, apos, quot. The declaration of a parameter entity must precede
6782: * any reference to it. Similarly, the declaration of a general entity
6783: * must precede any reference to it which appears in a default value in an
6784: * attribute-list declaration. Note that if entities are declared in the
6785: * external subset or in external parameter entities, a non-validating
6786: * processor is not obligated to read and process their declarations;
6787: * for such documents, the rule that an entity must be declared is a
6788: * well-formedness constraint only if standalone='yes'.
6789: *
6790: * [ WFC: Parsed Entity ]
6791: * An entity reference must not contain the name of an unparsed entity
6792: *
6793: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6794: * is updated to the current location in the string.
6795: */
6796: xmlEntityPtr
6797: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6798: xmlChar *name;
6799: const xmlChar *ptr;
6800: xmlChar cur;
6801: xmlEntityPtr ent = NULL;
6802:
1.156 daniel 6803: if ((str == NULL) || (*str == NULL))
6804: return(NULL);
1.135 daniel 6805: ptr = *str;
6806: cur = *ptr;
6807: if (cur == '&') {
6808: ptr++;
6809: cur = *ptr;
6810: name = xmlParseStringName(ctxt, &ptr);
6811: if (name == NULL) {
6812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6813: ctxt->sax->error(ctxt->userData,
6814: "xmlParseEntityRef: no name\n");
6815: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6816: ctxt->wellFormed = 0;
1.180 daniel 6817: ctxt->disableSAX = 1;
1.135 daniel 6818: } else {
1.185 daniel 6819: if (*ptr == ';') {
6820: ptr++;
1.135 daniel 6821: /*
6822: * Ask first SAX for entity resolution, otherwise try the
6823: * predefined set.
6824: */
6825: if (ctxt->sax != NULL) {
6826: if (ctxt->sax->getEntity != NULL)
6827: ent = ctxt->sax->getEntity(ctxt->userData, name);
6828: if (ent == NULL)
6829: ent = xmlGetPredefinedEntity(name);
6830: }
6831: /*
6832: * [ WFC: Entity Declared ]
6833: * In a document without any DTD, a document with only an
6834: * internal DTD subset which contains no parameter entity
6835: * references, or a document with "standalone='yes'", the
6836: * Name given in the entity reference must match that in an
6837: * entity declaration, except that well-formed documents
6838: * need not declare any of the following entities: amp, lt,
6839: * gt, apos, quot.
6840: * The declaration of a parameter entity must precede any
6841: * reference to it.
6842: * Similarly, the declaration of a general entity must
6843: * precede any reference to it which appears in a default
6844: * value in an attribute-list declaration. Note that if
6845: * entities are declared in the external subset or in
6846: * external parameter entities, a non-validating processor
6847: * is not obligated to read and process their declarations;
6848: * for such documents, the rule that an entity must be
6849: * declared is a well-formedness constraint only if
6850: * standalone='yes'.
6851: */
6852: if (ent == NULL) {
6853: if ((ctxt->standalone == 1) ||
6854: ((ctxt->hasExternalSubset == 0) &&
6855: (ctxt->hasPErefs == 0))) {
6856: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6857: ctxt->sax->error(ctxt->userData,
6858: "Entity '%s' not defined\n", name);
6859: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6860: ctxt->wellFormed = 0;
1.180 daniel 6861: ctxt->disableSAX = 1;
1.135 daniel 6862: } else {
6863: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6864: ctxt->sax->warning(ctxt->userData,
6865: "Entity '%s' not defined\n", name);
6866: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6867: }
6868: }
6869:
6870: /*
6871: * [ WFC: Parsed Entity ]
6872: * An entity reference must not contain the name of an
6873: * unparsed entity
6874: */
1.159 daniel 6875: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6877: ctxt->sax->error(ctxt->userData,
6878: "Entity reference to unparsed entity %s\n", name);
6879: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6880: ctxt->wellFormed = 0;
1.180 daniel 6881: ctxt->disableSAX = 1;
1.135 daniel 6882: }
6883:
6884: /*
6885: * [ WFC: No External Entity References ]
6886: * Attribute values cannot contain direct or indirect
6887: * entity references to external entities.
6888: */
6889: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6890: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6892: ctxt->sax->error(ctxt->userData,
6893: "Attribute references external entity '%s'\n", name);
6894: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6895: ctxt->wellFormed = 0;
1.180 daniel 6896: ctxt->disableSAX = 1;
1.135 daniel 6897: }
6898: /*
6899: * [ WFC: No < in Attribute Values ]
6900: * The replacement text of any entity referred to directly or
6901: * indirectly in an attribute value (other than "<") must
6902: * not contain a <.
6903: */
6904: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6905: (ent != NULL) &&
6906: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6907: (ent->content != NULL) &&
6908: (xmlStrchr(ent->content, '<'))) {
6909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6910: ctxt->sax->error(ctxt->userData,
6911: "'<' in entity '%s' is not allowed in attributes values\n", name);
6912: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6913: ctxt->wellFormed = 0;
1.180 daniel 6914: ctxt->disableSAX = 1;
1.135 daniel 6915: }
6916:
6917: /*
6918: * Internal check, no parameter entities here ...
6919: */
6920: else {
1.159 daniel 6921: switch (ent->etype) {
1.135 daniel 6922: case XML_INTERNAL_PARAMETER_ENTITY:
6923: case XML_EXTERNAL_PARAMETER_ENTITY:
6924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6925: ctxt->sax->error(ctxt->userData,
6926: "Attempt to reference the parameter entity '%s'\n", name);
6927: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6928: ctxt->wellFormed = 0;
1.180 daniel 6929: ctxt->disableSAX = 1;
6930: break;
6931: default:
1.135 daniel 6932: break;
6933: }
6934: }
6935:
6936: /*
6937: * [ WFC: No Recursion ]
6938: * TODO A parsed entity must not contain a recursive reference
6939: * to itself, either directly or indirectly.
6940: */
6941:
6942: } else {
6943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6944: ctxt->sax->error(ctxt->userData,
6945: "xmlParseEntityRef: expecting ';'\n");
6946: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6947: ctxt->wellFormed = 0;
1.180 daniel 6948: ctxt->disableSAX = 1;
1.135 daniel 6949: }
6950: xmlFree(name);
6951: }
6952: }
1.185 daniel 6953: *str = ptr;
1.135 daniel 6954: return(ent);
6955: }
1.24 daniel 6956:
1.50 daniel 6957: /**
6958: * xmlParsePEReference:
6959: * @ctxt: an XML parser context
6960: *
6961: * parse PEReference declarations
1.77 daniel 6962: * The entity content is handled directly by pushing it's content as
6963: * a new input stream.
1.22 daniel 6964: *
6965: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6966: *
1.98 daniel 6967: * [ WFC: No Recursion ]
6968: * TODO A parsed entity must not contain a recursive
6969: * reference to itself, either directly or indirectly.
6970: *
6971: * [ WFC: Entity Declared ]
6972: * In a document without any DTD, a document with only an internal DTD
6973: * subset which contains no parameter entity references, or a document
6974: * with "standalone='yes'", ... ... The declaration of a parameter
6975: * entity must precede any reference to it...
6976: *
6977: * [ VC: Entity Declared ]
6978: * In a document with an external subset or external parameter entities
6979: * with "standalone='no'", ... ... The declaration of a parameter entity
6980: * must precede any reference to it...
6981: *
6982: * [ WFC: In DTD ]
6983: * Parameter-entity references may only appear in the DTD.
6984: * NOTE: misleading but this is handled.
1.22 daniel 6985: */
1.77 daniel 6986: void
1.55 daniel 6987: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6988: xmlChar *name;
1.72 daniel 6989: xmlEntityPtr entity = NULL;
1.50 daniel 6990: xmlParserInputPtr input;
1.22 daniel 6991:
1.152 daniel 6992: if (RAW == '%') {
1.40 daniel 6993: NEXT;
1.22 daniel 6994: name = xmlParseName(ctxt);
6995: if (name == NULL) {
1.55 daniel 6996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6997: ctxt->sax->error(ctxt->userData,
6998: "xmlParsePEReference: no name\n");
1.123 daniel 6999: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7000: ctxt->wellFormed = 0;
1.180 daniel 7001: ctxt->disableSAX = 1;
1.22 daniel 7002: } else {
1.152 daniel 7003: if (RAW == ';') {
1.40 daniel 7004: NEXT;
1.98 daniel 7005: if ((ctxt->sax != NULL) &&
7006: (ctxt->sax->getParameterEntity != NULL))
7007: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7008: name);
1.45 daniel 7009: if (entity == NULL) {
1.98 daniel 7010: /*
7011: * [ WFC: Entity Declared ]
7012: * In a document without any DTD, a document with only an
7013: * internal DTD subset which contains no parameter entity
7014: * references, or a document with "standalone='yes'", ...
7015: * ... The declaration of a parameter entity must precede
7016: * any reference to it...
7017: */
7018: if ((ctxt->standalone == 1) ||
7019: ((ctxt->hasExternalSubset == 0) &&
7020: (ctxt->hasPErefs == 0))) {
7021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7022: ctxt->sax->error(ctxt->userData,
7023: "PEReference: %%%s; not found\n", name);
1.123 daniel 7024: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 7025: ctxt->wellFormed = 0;
1.180 daniel 7026: ctxt->disableSAX = 1;
1.98 daniel 7027: } else {
7028: /*
7029: * [ VC: Entity Declared ]
7030: * In a document with an external subset or external
7031: * parameter entities with "standalone='no'", ...
7032: * ... The declaration of a parameter entity must precede
7033: * any reference to it...
7034: */
7035: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7036: ctxt->sax->warning(ctxt->userData,
7037: "PEReference: %%%s; not found\n", name);
7038: ctxt->valid = 0;
7039: }
1.50 daniel 7040: } else {
1.98 daniel 7041: /*
7042: * Internal checking in case the entity quest barfed
7043: */
1.159 daniel 7044: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7045: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7046: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7047: ctxt->sax->warning(ctxt->userData,
7048: "Internal: %%%s; is not a parameter entity\n", name);
7049: } else {
1.164 daniel 7050: /*
7051: * TODO !!!
7052: * handle the extra spaces added before and after
7053: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7054: */
1.98 daniel 7055: input = xmlNewEntityInputStream(ctxt, entity);
7056: xmlPushInput(ctxt, input);
1.164 daniel 7057: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7058: (RAW == '<') && (NXT(1) == '?') &&
7059: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7060: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7061: xmlParseTextDecl(ctxt);
1.193 daniel 7062: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7063: /*
7064: * The XML REC instructs us to stop parsing
7065: * right here
7066: */
7067: ctxt->instate = XML_PARSER_EOF;
7068: xmlFree(name);
7069: return;
7070: }
1.164 daniel 7071: }
7072: if (ctxt->token == 0)
7073: ctxt->token = ' ';
1.98 daniel 7074: }
1.45 daniel 7075: }
1.98 daniel 7076: ctxt->hasPErefs = 1;
1.22 daniel 7077: } else {
1.55 daniel 7078: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7079: ctxt->sax->error(ctxt->userData,
1.59 daniel 7080: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7081: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7082: ctxt->wellFormed = 0;
1.180 daniel 7083: ctxt->disableSAX = 1;
1.22 daniel 7084: }
1.119 daniel 7085: xmlFree(name);
1.3 veillard 7086: }
7087: }
7088: }
7089:
1.50 daniel 7090: /**
1.135 daniel 7091: * xmlParseStringPEReference:
7092: * @ctxt: an XML parser context
7093: * @str: a pointer to an index in the string
7094: *
7095: * parse PEReference declarations
7096: *
7097: * [69] PEReference ::= '%' Name ';'
7098: *
7099: * [ WFC: No Recursion ]
7100: * TODO A parsed entity must not contain a recursive
7101: * reference to itself, either directly or indirectly.
7102: *
7103: * [ WFC: Entity Declared ]
7104: * In a document without any DTD, a document with only an internal DTD
7105: * subset which contains no parameter entity references, or a document
7106: * with "standalone='yes'", ... ... The declaration of a parameter
7107: * entity must precede any reference to it...
7108: *
7109: * [ VC: Entity Declared ]
7110: * In a document with an external subset or external parameter entities
7111: * with "standalone='no'", ... ... The declaration of a parameter entity
7112: * must precede any reference to it...
7113: *
7114: * [ WFC: In DTD ]
7115: * Parameter-entity references may only appear in the DTD.
7116: * NOTE: misleading but this is handled.
7117: *
7118: * Returns the string of the entity content.
7119: * str is updated to the current value of the index
7120: */
7121: xmlEntityPtr
7122: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7123: const xmlChar *ptr;
7124: xmlChar cur;
7125: xmlChar *name;
7126: xmlEntityPtr entity = NULL;
7127:
7128: if ((str == NULL) || (*str == NULL)) return(NULL);
7129: ptr = *str;
7130: cur = *ptr;
7131: if (cur == '%') {
7132: ptr++;
7133: cur = *ptr;
7134: name = xmlParseStringName(ctxt, &ptr);
7135: if (name == NULL) {
7136: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7137: ctxt->sax->error(ctxt->userData,
7138: "xmlParseStringPEReference: no name\n");
7139: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7140: ctxt->wellFormed = 0;
1.180 daniel 7141: ctxt->disableSAX = 1;
1.135 daniel 7142: } else {
7143: cur = *ptr;
7144: if (cur == ';') {
7145: ptr++;
7146: cur = *ptr;
7147: if ((ctxt->sax != NULL) &&
7148: (ctxt->sax->getParameterEntity != NULL))
7149: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7150: name);
7151: if (entity == NULL) {
7152: /*
7153: * [ WFC: Entity Declared ]
7154: * In a document without any DTD, a document with only an
7155: * internal DTD subset which contains no parameter entity
7156: * references, or a document with "standalone='yes'", ...
7157: * ... The declaration of a parameter entity must precede
7158: * any reference to it...
7159: */
7160: if ((ctxt->standalone == 1) ||
7161: ((ctxt->hasExternalSubset == 0) &&
7162: (ctxt->hasPErefs == 0))) {
7163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7164: ctxt->sax->error(ctxt->userData,
7165: "PEReference: %%%s; not found\n", name);
7166: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7167: ctxt->wellFormed = 0;
1.180 daniel 7168: ctxt->disableSAX = 1;
1.135 daniel 7169: } else {
7170: /*
7171: * [ VC: Entity Declared ]
7172: * In a document with an external subset or external
7173: * parameter entities with "standalone='no'", ...
7174: * ... The declaration of a parameter entity must
7175: * precede any reference to it...
7176: */
7177: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7178: ctxt->sax->warning(ctxt->userData,
7179: "PEReference: %%%s; not found\n", name);
7180: ctxt->valid = 0;
7181: }
7182: } else {
7183: /*
7184: * Internal checking in case the entity quest barfed
7185: */
1.159 daniel 7186: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7187: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7188: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7189: ctxt->sax->warning(ctxt->userData,
7190: "Internal: %%%s; is not a parameter entity\n", name);
7191: }
7192: }
7193: ctxt->hasPErefs = 1;
7194: } else {
7195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7196: ctxt->sax->error(ctxt->userData,
7197: "xmlParseStringPEReference: expecting ';'\n");
7198: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7199: ctxt->wellFormed = 0;
1.180 daniel 7200: ctxt->disableSAX = 1;
1.135 daniel 7201: }
7202: xmlFree(name);
7203: }
7204: }
7205: *str = ptr;
7206: return(entity);
7207: }
7208:
7209: /**
1.181 daniel 7210: * xmlParseDocTypeDecl:
1.50 daniel 7211: * @ctxt: an XML parser context
7212: *
7213: * parse a DOCTYPE declaration
1.21 daniel 7214: *
1.22 daniel 7215: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7216: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7217: *
7218: * [ VC: Root Element Type ]
1.99 daniel 7219: * The Name in the document type declaration must match the element
1.98 daniel 7220: * type of the root element.
1.21 daniel 7221: */
7222:
1.55 daniel 7223: void
7224: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7225: xmlChar *name = NULL;
1.123 daniel 7226: xmlChar *ExternalID = NULL;
7227: xmlChar *URI = NULL;
1.21 daniel 7228:
7229: /*
7230: * We know that '<!DOCTYPE' has been detected.
7231: */
1.40 daniel 7232: SKIP(9);
1.21 daniel 7233:
1.42 daniel 7234: SKIP_BLANKS;
1.21 daniel 7235:
7236: /*
7237: * Parse the DOCTYPE name.
7238: */
7239: name = xmlParseName(ctxt);
7240: if (name == NULL) {
1.55 daniel 7241: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7242: ctxt->sax->error(ctxt->userData,
7243: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7244: ctxt->wellFormed = 0;
1.180 daniel 7245: ctxt->disableSAX = 1;
1.123 daniel 7246: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7247: }
1.165 daniel 7248: ctxt->intSubName = name;
1.21 daniel 7249:
1.42 daniel 7250: SKIP_BLANKS;
1.21 daniel 7251:
7252: /*
1.22 daniel 7253: * Check for SystemID and ExternalID
7254: */
1.67 daniel 7255: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7256:
7257: if ((URI != NULL) || (ExternalID != NULL)) {
7258: ctxt->hasExternalSubset = 1;
7259: }
1.165 daniel 7260: ctxt->extSubURI = URI;
7261: ctxt->extSubSystem = ExternalID;
1.98 daniel 7262:
1.42 daniel 7263: SKIP_BLANKS;
1.36 daniel 7264:
1.76 daniel 7265: /*
1.165 daniel 7266: * Create and update the internal subset.
1.76 daniel 7267: */
1.171 daniel 7268: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7269: (!ctxt->disableSAX))
1.74 daniel 7270: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7271:
7272: /*
1.140 daniel 7273: * Is there any internal subset declarations ?
7274: * they are handled separately in xmlParseInternalSubset()
7275: */
1.152 daniel 7276: if (RAW == '[')
1.140 daniel 7277: return;
7278:
7279: /*
7280: * We should be at the end of the DOCTYPE declaration.
7281: */
1.152 daniel 7282: if (RAW != '>') {
1.140 daniel 7283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7284: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7285: ctxt->wellFormed = 0;
1.180 daniel 7286: ctxt->disableSAX = 1;
1.140 daniel 7287: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7288: }
7289: NEXT;
7290: }
7291:
7292: /**
1.181 daniel 7293: * xmlParseInternalsubset:
1.140 daniel 7294: * @ctxt: an XML parser context
7295: *
7296: * parse the internal subset declaration
7297: *
7298: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7299: */
7300:
7301: void
7302: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7303: /*
1.22 daniel 7304: * Is there any DTD definition ?
7305: */
1.152 daniel 7306: if (RAW == '[') {
1.96 daniel 7307: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7308: NEXT;
1.22 daniel 7309: /*
7310: * Parse the succession of Markup declarations and
7311: * PEReferences.
7312: * Subsequence (markupdecl | PEReference | S)*
7313: */
1.152 daniel 7314: while (RAW != ']') {
1.123 daniel 7315: const xmlChar *check = CUR_PTR;
1.115 daniel 7316: int cons = ctxt->input->consumed;
1.22 daniel 7317:
1.42 daniel 7318: SKIP_BLANKS;
1.22 daniel 7319: xmlParseMarkupDecl(ctxt);
1.50 daniel 7320: xmlParsePEReference(ctxt);
1.22 daniel 7321:
1.115 daniel 7322: /*
7323: * Pop-up of finished entities.
7324: */
1.152 daniel 7325: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7326: xmlPopInput(ctxt);
7327:
1.118 daniel 7328: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7330: ctxt->sax->error(ctxt->userData,
1.140 daniel 7331: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7332: ctxt->wellFormed = 0;
1.180 daniel 7333: ctxt->disableSAX = 1;
1.123 daniel 7334: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7335: break;
7336: }
7337: }
1.152 daniel 7338: if (RAW == ']') NEXT;
1.22 daniel 7339: }
7340:
7341: /*
7342: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7343: */
1.152 daniel 7344: if (RAW != '>') {
1.55 daniel 7345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7346: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7347: ctxt->wellFormed = 0;
1.180 daniel 7348: ctxt->disableSAX = 1;
1.123 daniel 7349: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7350: }
1.40 daniel 7351: NEXT;
1.21 daniel 7352: }
7353:
1.50 daniel 7354: /**
7355: * xmlParseAttribute:
7356: * @ctxt: an XML parser context
1.123 daniel 7357: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7358: *
7359: * parse an attribute
1.3 veillard 7360: *
1.22 daniel 7361: * [41] Attribute ::= Name Eq AttValue
7362: *
1.98 daniel 7363: * [ WFC: No External Entity References ]
7364: * Attribute values cannot contain direct or indirect entity references
7365: * to external entities.
7366: *
7367: * [ WFC: No < in Attribute Values ]
7368: * The replacement text of any entity referred to directly or indirectly in
7369: * an attribute value (other than "<") must not contain a <.
7370: *
7371: * [ VC: Attribute Value Type ]
1.117 daniel 7372: * The attribute must have been declared; the value must be of the type
1.99 daniel 7373: * declared for it.
1.98 daniel 7374: *
1.22 daniel 7375: * [25] Eq ::= S? '=' S?
7376: *
1.29 daniel 7377: * With namespace:
7378: *
7379: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7380: *
7381: * Also the case QName == xmlns:??? is handled independently as a namespace
7382: * definition.
1.69 daniel 7383: *
1.72 daniel 7384: * Returns the attribute name, and the value in *value.
1.3 veillard 7385: */
7386:
1.123 daniel 7387: xmlChar *
7388: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7389: xmlChar *name, *val;
1.3 veillard 7390:
1.72 daniel 7391: *value = NULL;
7392: name = xmlParseName(ctxt);
1.22 daniel 7393: if (name == NULL) {
1.55 daniel 7394: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7395: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7396: ctxt->wellFormed = 0;
1.180 daniel 7397: ctxt->disableSAX = 1;
1.123 daniel 7398: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7399: return(NULL);
1.3 veillard 7400: }
7401:
7402: /*
1.29 daniel 7403: * read the value
1.3 veillard 7404: */
1.42 daniel 7405: SKIP_BLANKS;
1.152 daniel 7406: if (RAW == '=') {
1.40 daniel 7407: NEXT;
1.42 daniel 7408: SKIP_BLANKS;
1.72 daniel 7409: val = xmlParseAttValue(ctxt);
1.96 daniel 7410: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7411: } else {
1.55 daniel 7412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7413: ctxt->sax->error(ctxt->userData,
1.59 daniel 7414: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7415: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7416: ctxt->wellFormed = 0;
1.180 daniel 7417: ctxt->disableSAX = 1;
1.170 daniel 7418: xmlFree(name);
1.52 daniel 7419: return(NULL);
1.43 daniel 7420: }
7421:
1.172 daniel 7422: /*
7423: * Check that xml:lang conforms to the specification
7424: */
7425: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7426: if (!xmlCheckLanguageID(val)) {
7427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7428: ctxt->sax->error(ctxt->userData,
7429: "Invalid value for xml:lang : %s\n", val);
7430: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7431: ctxt->wellFormed = 0;
1.180 daniel 7432: ctxt->disableSAX = 1;
1.172 daniel 7433: }
7434: }
7435:
1.176 daniel 7436: /*
7437: * Check that xml:space conforms to the specification
7438: */
7439: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7440: if (!xmlStrcmp(val, BAD_CAST "default"))
7441: *(ctxt->space) = 0;
7442: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7443: *(ctxt->space) = 1;
7444: else {
7445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7446: ctxt->sax->error(ctxt->userData,
7447: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7448: val);
7449: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7450: ctxt->wellFormed = 0;
1.180 daniel 7451: ctxt->disableSAX = 1;
1.176 daniel 7452: }
7453: }
7454:
1.72 daniel 7455: *value = val;
7456: return(name);
1.3 veillard 7457: }
7458:
1.50 daniel 7459: /**
7460: * xmlParseStartTag:
7461: * @ctxt: an XML parser context
7462: *
7463: * parse a start of tag either for rule element or
7464: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7465: *
7466: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7467: *
1.98 daniel 7468: * [ WFC: Unique Att Spec ]
7469: * No attribute name may appear more than once in the same start-tag or
7470: * empty-element tag.
7471: *
1.29 daniel 7472: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7473: *
1.98 daniel 7474: * [ WFC: Unique Att Spec ]
7475: * No attribute name may appear more than once in the same start-tag or
7476: * empty-element tag.
7477: *
1.29 daniel 7478: * With namespace:
7479: *
7480: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7481: *
7482: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7483: *
1.192 daniel 7484: * Returns the element name parsed
1.2 veillard 7485: */
7486:
1.123 daniel 7487: xmlChar *
1.69 daniel 7488: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7489: xmlChar *name;
7490: xmlChar *attname;
7491: xmlChar *attvalue;
7492: const xmlChar **atts = NULL;
1.72 daniel 7493: int nbatts = 0;
7494: int maxatts = 0;
7495: int i;
1.2 veillard 7496:
1.152 daniel 7497: if (RAW != '<') return(NULL);
1.40 daniel 7498: NEXT;
1.3 veillard 7499:
1.72 daniel 7500: name = xmlParseName(ctxt);
1.59 daniel 7501: if (name == NULL) {
7502: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7503: ctxt->sax->error(ctxt->userData,
1.59 daniel 7504: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7505: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7506: ctxt->wellFormed = 0;
1.180 daniel 7507: ctxt->disableSAX = 1;
1.83 daniel 7508: return(NULL);
1.50 daniel 7509: }
7510:
7511: /*
1.3 veillard 7512: * Now parse the attributes, it ends up with the ending
7513: *
7514: * (S Attribute)* S?
7515: */
1.42 daniel 7516: SKIP_BLANKS;
1.91 daniel 7517: GROW;
1.168 daniel 7518:
1.153 daniel 7519: while ((IS_CHAR(RAW)) &&
1.152 daniel 7520: (RAW != '>') &&
7521: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7522: const xmlChar *q = CUR_PTR;
1.91 daniel 7523: int cons = ctxt->input->consumed;
1.29 daniel 7524:
1.72 daniel 7525: attname = xmlParseAttribute(ctxt, &attvalue);
7526: if ((attname != NULL) && (attvalue != NULL)) {
7527: /*
1.98 daniel 7528: * [ WFC: Unique Att Spec ]
7529: * No attribute name may appear more than once in the same
7530: * start-tag or empty-element tag.
1.72 daniel 7531: */
7532: for (i = 0; i < nbatts;i += 2) {
7533: if (!xmlStrcmp(atts[i], attname)) {
7534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7535: ctxt->sax->error(ctxt->userData,
7536: "Attribute %s redefined\n",
7537: attname);
1.72 daniel 7538: ctxt->wellFormed = 0;
1.180 daniel 7539: ctxt->disableSAX = 1;
1.123 daniel 7540: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7541: xmlFree(attname);
7542: xmlFree(attvalue);
1.98 daniel 7543: goto failed;
1.72 daniel 7544: }
7545: }
7546:
7547: /*
7548: * Add the pair to atts
7549: */
7550: if (atts == NULL) {
7551: maxatts = 10;
1.123 daniel 7552: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7553: if (atts == NULL) {
1.86 daniel 7554: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7555: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7556: return(NULL);
1.72 daniel 7557: }
1.127 daniel 7558: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7559: maxatts *= 2;
1.123 daniel 7560: atts = (const xmlChar **) xmlRealloc(atts,
7561: maxatts * sizeof(xmlChar *));
1.72 daniel 7562: if (atts == NULL) {
1.86 daniel 7563: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7564: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7565: return(NULL);
1.72 daniel 7566: }
7567: }
7568: atts[nbatts++] = attname;
7569: atts[nbatts++] = attvalue;
7570: atts[nbatts] = NULL;
7571: atts[nbatts + 1] = NULL;
1.176 daniel 7572: } else {
7573: if (attname != NULL)
7574: xmlFree(attname);
7575: if (attvalue != NULL)
7576: xmlFree(attvalue);
1.72 daniel 7577: }
7578:
1.116 daniel 7579: failed:
1.168 daniel 7580:
7581: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7582: break;
7583: if (!IS_BLANK(RAW)) {
7584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7585: ctxt->sax->error(ctxt->userData,
7586: "attributes construct error\n");
7587: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7588: ctxt->wellFormed = 0;
1.180 daniel 7589: ctxt->disableSAX = 1;
1.168 daniel 7590: }
1.42 daniel 7591: SKIP_BLANKS;
1.91 daniel 7592: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7593: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7594: ctxt->sax->error(ctxt->userData,
1.31 daniel 7595: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7596: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7597: ctxt->wellFormed = 0;
1.180 daniel 7598: ctxt->disableSAX = 1;
1.29 daniel 7599: break;
1.3 veillard 7600: }
1.91 daniel 7601: GROW;
1.3 veillard 7602: }
7603:
1.43 daniel 7604: /*
1.72 daniel 7605: * SAX: Start of Element !
1.43 daniel 7606: */
1.171 daniel 7607: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7608: (!ctxt->disableSAX))
1.74 daniel 7609: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7610:
1.72 daniel 7611: if (atts != NULL) {
1.123 daniel 7612: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7613: xmlFree(atts);
1.72 daniel 7614: }
1.83 daniel 7615: return(name);
1.3 veillard 7616: }
7617:
1.50 daniel 7618: /**
7619: * xmlParseEndTag:
7620: * @ctxt: an XML parser context
7621: *
7622: * parse an end of tag
1.27 daniel 7623: *
7624: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7625: *
7626: * With namespace
7627: *
1.72 daniel 7628: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7629: */
7630:
1.55 daniel 7631: void
1.140 daniel 7632: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7633: xmlChar *name;
1.140 daniel 7634: xmlChar *oldname;
1.7 veillard 7635:
1.91 daniel 7636: GROW;
1.152 daniel 7637: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7639: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7640: ctxt->wellFormed = 0;
1.180 daniel 7641: ctxt->disableSAX = 1;
1.123 daniel 7642: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7643: return;
7644: }
1.40 daniel 7645: SKIP(2);
1.7 veillard 7646:
1.72 daniel 7647: name = xmlParseName(ctxt);
1.7 veillard 7648:
7649: /*
7650: * We should definitely be at the ending "S? '>'" part
7651: */
1.91 daniel 7652: GROW;
1.42 daniel 7653: SKIP_BLANKS;
1.153 daniel 7654: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7656: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7657: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7658: ctxt->wellFormed = 0;
1.180 daniel 7659: ctxt->disableSAX = 1;
1.7 veillard 7660: } else
1.40 daniel 7661: NEXT;
1.7 veillard 7662:
1.72 daniel 7663: /*
1.98 daniel 7664: * [ WFC: Element Type Match ]
7665: * The Name in an element's end-tag must match the element type in the
7666: * start-tag.
7667: *
1.83 daniel 7668: */
1.147 daniel 7669: if ((name == NULL) || (ctxt->name == NULL) ||
7670: (xmlStrcmp(name, ctxt->name))) {
7671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7672: if ((name != NULL) && (ctxt->name != NULL)) {
7673: ctxt->sax->error(ctxt->userData,
7674: "Opening and ending tag mismatch: %s and %s\n",
7675: ctxt->name, name);
7676: } else if (ctxt->name != NULL) {
7677: ctxt->sax->error(ctxt->userData,
7678: "Ending tag eror for: %s\n", ctxt->name);
7679: } else {
7680: ctxt->sax->error(ctxt->userData,
7681: "Ending tag error: internal error ???\n");
7682: }
1.122 daniel 7683:
1.147 daniel 7684: }
1.123 daniel 7685: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7686: ctxt->wellFormed = 0;
1.180 daniel 7687: ctxt->disableSAX = 1;
1.83 daniel 7688: }
7689:
7690: /*
1.72 daniel 7691: * SAX: End of Tag
7692: */
1.171 daniel 7693: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7694: (!ctxt->disableSAX))
1.74 daniel 7695: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7696:
7697: if (name != NULL)
1.119 daniel 7698: xmlFree(name);
1.140 daniel 7699: oldname = namePop(ctxt);
1.176 daniel 7700: spacePop(ctxt);
1.140 daniel 7701: if (oldname != NULL) {
7702: #ifdef DEBUG_STACK
7703: fprintf(stderr,"Close: popped %s\n", oldname);
7704: #endif
7705: xmlFree(oldname);
7706: }
1.7 veillard 7707: return;
7708: }
7709:
1.50 daniel 7710: /**
7711: * xmlParseCDSect:
7712: * @ctxt: an XML parser context
7713: *
7714: * Parse escaped pure raw content.
1.29 daniel 7715: *
7716: * [18] CDSect ::= CDStart CData CDEnd
7717: *
7718: * [19] CDStart ::= '<![CDATA['
7719: *
7720: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7721: *
7722: * [21] CDEnd ::= ']]>'
1.3 veillard 7723: */
1.55 daniel 7724: void
7725: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7726: xmlChar *buf = NULL;
7727: int len = 0;
1.140 daniel 7728: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7729: int r, rl;
7730: int s, sl;
7731: int cur, l;
1.3 veillard 7732:
1.106 daniel 7733: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7734: (NXT(2) == '[') && (NXT(3) == 'C') &&
7735: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7736: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7737: (NXT(8) == '[')) {
7738: SKIP(9);
1.29 daniel 7739: } else
1.45 daniel 7740: return;
1.109 daniel 7741:
7742: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7743: r = CUR_CHAR(rl);
7744: if (!IS_CHAR(r)) {
1.55 daniel 7745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7746: ctxt->sax->error(ctxt->userData,
1.135 daniel 7747: "CData section not finished\n");
1.59 daniel 7748: ctxt->wellFormed = 0;
1.180 daniel 7749: ctxt->disableSAX = 1;
1.123 daniel 7750: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7751: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7752: return;
1.3 veillard 7753: }
1.152 daniel 7754: NEXTL(rl);
7755: s = CUR_CHAR(sl);
7756: if (!IS_CHAR(s)) {
1.55 daniel 7757: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7758: ctxt->sax->error(ctxt->userData,
1.135 daniel 7759: "CData section not finished\n");
1.123 daniel 7760: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7761: ctxt->wellFormed = 0;
1.180 daniel 7762: ctxt->disableSAX = 1;
1.109 daniel 7763: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7764: return;
1.3 veillard 7765: }
1.152 daniel 7766: NEXTL(sl);
7767: cur = CUR_CHAR(l);
1.135 daniel 7768: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7769: if (buf == NULL) {
7770: fprintf(stderr, "malloc of %d byte failed\n", size);
7771: return;
7772: }
1.108 veillard 7773: while (IS_CHAR(cur) &&
1.110 daniel 7774: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7775: if (len + 5 >= size) {
1.135 daniel 7776: size *= 2;
7777: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7778: if (buf == NULL) {
7779: fprintf(stderr, "realloc of %d byte failed\n", size);
7780: return;
7781: }
7782: }
1.152 daniel 7783: COPY_BUF(rl,buf,len,r);
1.110 daniel 7784: r = s;
1.152 daniel 7785: rl = sl;
1.110 daniel 7786: s = cur;
1.152 daniel 7787: sl = l;
7788: NEXTL(l);
7789: cur = CUR_CHAR(l);
1.3 veillard 7790: }
1.135 daniel 7791: buf[len] = 0;
1.109 daniel 7792: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7793: if (cur != '>') {
1.55 daniel 7794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7795: ctxt->sax->error(ctxt->userData,
1.135 daniel 7796: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7797: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7798: ctxt->wellFormed = 0;
1.180 daniel 7799: ctxt->disableSAX = 1;
1.135 daniel 7800: xmlFree(buf);
1.45 daniel 7801: return;
1.3 veillard 7802: }
1.152 daniel 7803: NEXTL(l);
1.16 daniel 7804:
1.45 daniel 7805: /*
1.135 daniel 7806: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7807: */
1.171 daniel 7808: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7809: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7810: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7811: }
1.135 daniel 7812: xmlFree(buf);
1.2 veillard 7813: }
7814:
1.50 daniel 7815: /**
7816: * xmlParseContent:
7817: * @ctxt: an XML parser context
7818: *
7819: * Parse a content:
1.2 veillard 7820: *
1.27 daniel 7821: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7822: */
7823:
1.55 daniel 7824: void
7825: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7826: GROW;
1.176 daniel 7827: while (((RAW != 0) || (ctxt->token != 0)) &&
7828: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7829: const xmlChar *test = CUR_PTR;
1.91 daniel 7830: int cons = ctxt->input->consumed;
1.123 daniel 7831: xmlChar tok = ctxt->token;
1.27 daniel 7832:
7833: /*
1.152 daniel 7834: * Handle possible processed charrefs.
7835: */
7836: if (ctxt->token != 0) {
7837: xmlParseCharData(ctxt, 0);
7838: }
7839: /*
1.27 daniel 7840: * First case : a Processing Instruction.
7841: */
1.152 daniel 7842: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7843: xmlParsePI(ctxt);
7844: }
1.72 daniel 7845:
1.27 daniel 7846: /*
7847: * Second case : a CDSection
7848: */
1.152 daniel 7849: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7850: (NXT(2) == '[') && (NXT(3) == 'C') &&
7851: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7852: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7853: (NXT(8) == '[')) {
1.45 daniel 7854: xmlParseCDSect(ctxt);
1.27 daniel 7855: }
1.72 daniel 7856:
1.27 daniel 7857: /*
7858: * Third case : a comment
7859: */
1.152 daniel 7860: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7861: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7862: xmlParseComment(ctxt);
1.97 daniel 7863: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7864: }
1.72 daniel 7865:
1.27 daniel 7866: /*
7867: * Fourth case : a sub-element.
7868: */
1.152 daniel 7869: else if (RAW == '<') {
1.72 daniel 7870: xmlParseElement(ctxt);
1.45 daniel 7871: }
1.72 daniel 7872:
1.45 daniel 7873: /*
1.50 daniel 7874: * Fifth case : a reference. If if has not been resolved,
7875: * parsing returns it's Name, create the node
1.45 daniel 7876: */
1.97 daniel 7877:
1.152 daniel 7878: else if (RAW == '&') {
1.77 daniel 7879: xmlParseReference(ctxt);
1.27 daniel 7880: }
1.72 daniel 7881:
1.27 daniel 7882: /*
7883: * Last case, text. Note that References are handled directly.
7884: */
7885: else {
1.45 daniel 7886: xmlParseCharData(ctxt, 0);
1.3 veillard 7887: }
1.14 veillard 7888:
1.91 daniel 7889: GROW;
1.14 veillard 7890: /*
1.45 daniel 7891: * Pop-up of finished entities.
1.14 veillard 7892: */
1.152 daniel 7893: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7894: xmlPopInput(ctxt);
1.135 daniel 7895: SHRINK;
1.45 daniel 7896:
1.113 daniel 7897: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7898: (tok == ctxt->token)) {
1.55 daniel 7899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7900: ctxt->sax->error(ctxt->userData,
1.59 daniel 7901: "detected an error in element content\n");
1.123 daniel 7902: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7903: ctxt->wellFormed = 0;
1.180 daniel 7904: ctxt->disableSAX = 1;
1.29 daniel 7905: break;
7906: }
1.3 veillard 7907: }
1.2 veillard 7908: }
7909:
1.50 daniel 7910: /**
7911: * xmlParseElement:
7912: * @ctxt: an XML parser context
7913: *
7914: * parse an XML element, this is highly recursive
1.26 daniel 7915: *
7916: * [39] element ::= EmptyElemTag | STag content ETag
7917: *
1.98 daniel 7918: * [ WFC: Element Type Match ]
7919: * The Name in an element's end-tag must match the element type in the
7920: * start-tag.
7921: *
7922: * [ VC: Element Valid ]
1.117 daniel 7923: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7924: * where the Name matches the element type and one of the following holds:
7925: * - The declaration matches EMPTY and the element has no content.
7926: * - The declaration matches children and the sequence of child elements
7927: * belongs to the language generated by the regular expression in the
7928: * content model, with optional white space (characters matching the
7929: * nonterminal S) between each pair of child elements.
7930: * - The declaration matches Mixed and the content consists of character
7931: * data and child elements whose types match names in the content model.
7932: * - The declaration matches ANY, and the types of any child elements have
7933: * been declared.
1.2 veillard 7934: */
1.26 daniel 7935:
1.72 daniel 7936: void
1.69 daniel 7937: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7938: const xmlChar *openTag = CUR_PTR;
7939: xmlChar *name;
1.140 daniel 7940: xmlChar *oldname;
1.32 daniel 7941: xmlParserNodeInfo node_info;
1.118 daniel 7942: xmlNodePtr ret;
1.2 veillard 7943:
1.32 daniel 7944: /* Capture start position */
1.118 daniel 7945: if (ctxt->record_info) {
7946: node_info.begin_pos = ctxt->input->consumed +
7947: (CUR_PTR - ctxt->input->base);
7948: node_info.begin_line = ctxt->input->line;
7949: }
1.32 daniel 7950:
1.176 daniel 7951: if (ctxt->spaceNr == 0)
7952: spacePush(ctxt, -1);
7953: else
7954: spacePush(ctxt, *ctxt->space);
7955:
1.83 daniel 7956: name = xmlParseStartTag(ctxt);
7957: if (name == NULL) {
1.176 daniel 7958: spacePop(ctxt);
1.83 daniel 7959: return;
7960: }
1.140 daniel 7961: namePush(ctxt, name);
1.118 daniel 7962: ret = ctxt->node;
1.2 veillard 7963:
7964: /*
1.99 daniel 7965: * [ VC: Root Element Type ]
7966: * The Name in the document type declaration must match the element
7967: * type of the root element.
7968: */
1.105 daniel 7969: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7970: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7971: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7972:
7973: /*
1.2 veillard 7974: * Check for an Empty Element.
7975: */
1.152 daniel 7976: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7977: SKIP(2);
1.171 daniel 7978: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7979: (!ctxt->disableSAX))
1.83 daniel 7980: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7981: oldname = namePop(ctxt);
1.176 daniel 7982: spacePop(ctxt);
1.140 daniel 7983: if (oldname != NULL) {
7984: #ifdef DEBUG_STACK
7985: fprintf(stderr,"Close: popped %s\n", oldname);
7986: #endif
7987: xmlFree(oldname);
7988: }
1.72 daniel 7989: return;
1.2 veillard 7990: }
1.152 daniel 7991: if (RAW == '>') {
1.91 daniel 7992: NEXT;
7993: } else {
1.55 daniel 7994: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7995: ctxt->sax->error(ctxt->userData,
7996: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7997: openTag);
1.59 daniel 7998: ctxt->wellFormed = 0;
1.180 daniel 7999: ctxt->disableSAX = 1;
1.123 daniel 8000: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 8001:
8002: /*
8003: * end of parsing of this node.
8004: */
8005: nodePop(ctxt);
1.140 daniel 8006: oldname = namePop(ctxt);
1.176 daniel 8007: spacePop(ctxt);
1.140 daniel 8008: if (oldname != NULL) {
8009: #ifdef DEBUG_STACK
8010: fprintf(stderr,"Close: popped %s\n", oldname);
8011: #endif
8012: xmlFree(oldname);
8013: }
1.118 daniel 8014:
8015: /*
8016: * Capture end position and add node
8017: */
8018: if ( ret != NULL && ctxt->record_info ) {
8019: node_info.end_pos = ctxt->input->consumed +
8020: (CUR_PTR - ctxt->input->base);
8021: node_info.end_line = ctxt->input->line;
8022: node_info.node = ret;
8023: xmlParserAddNodeInfo(ctxt, &node_info);
8024: }
1.72 daniel 8025: return;
1.2 veillard 8026: }
8027:
8028: /*
8029: * Parse the content of the element:
8030: */
1.45 daniel 8031: xmlParseContent(ctxt);
1.153 daniel 8032: if (!IS_CHAR(RAW)) {
1.55 daniel 8033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8034: ctxt->sax->error(ctxt->userData,
1.57 daniel 8035: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 8036: ctxt->wellFormed = 0;
1.180 daniel 8037: ctxt->disableSAX = 1;
1.123 daniel 8038: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 8039:
8040: /*
8041: * end of parsing of this node.
8042: */
8043: nodePop(ctxt);
1.140 daniel 8044: oldname = namePop(ctxt);
1.176 daniel 8045: spacePop(ctxt);
1.140 daniel 8046: if (oldname != NULL) {
8047: #ifdef DEBUG_STACK
8048: fprintf(stderr,"Close: popped %s\n", oldname);
8049: #endif
8050: xmlFree(oldname);
8051: }
1.72 daniel 8052: return;
1.2 veillard 8053: }
8054:
8055: /*
1.27 daniel 8056: * parse the end of tag: '</' should be here.
1.2 veillard 8057: */
1.140 daniel 8058: xmlParseEndTag(ctxt);
1.118 daniel 8059:
8060: /*
8061: * Capture end position and add node
8062: */
8063: if ( ret != NULL && ctxt->record_info ) {
8064: node_info.end_pos = ctxt->input->consumed +
8065: (CUR_PTR - ctxt->input->base);
8066: node_info.end_line = ctxt->input->line;
8067: node_info.node = ret;
8068: xmlParserAddNodeInfo(ctxt, &node_info);
8069: }
1.2 veillard 8070: }
8071:
1.50 daniel 8072: /**
8073: * xmlParseVersionNum:
8074: * @ctxt: an XML parser context
8075: *
8076: * parse the XML version value.
1.29 daniel 8077: *
8078: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8079: *
8080: * Returns the string giving the XML version number, or NULL
1.29 daniel 8081: */
1.123 daniel 8082: xmlChar *
1.55 daniel 8083: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8084: xmlChar *buf = NULL;
8085: int len = 0;
8086: int size = 10;
8087: xmlChar cur;
1.29 daniel 8088:
1.135 daniel 8089: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8090: if (buf == NULL) {
8091: fprintf(stderr, "malloc of %d byte failed\n", size);
8092: return(NULL);
8093: }
8094: cur = CUR;
1.152 daniel 8095: while (((cur >= 'a') && (cur <= 'z')) ||
8096: ((cur >= 'A') && (cur <= 'Z')) ||
8097: ((cur >= '0') && (cur <= '9')) ||
8098: (cur == '_') || (cur == '.') ||
8099: (cur == ':') || (cur == '-')) {
1.135 daniel 8100: if (len + 1 >= size) {
8101: size *= 2;
8102: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8103: if (buf == NULL) {
8104: fprintf(stderr, "realloc of %d byte failed\n", size);
8105: return(NULL);
8106: }
8107: }
8108: buf[len++] = cur;
8109: NEXT;
8110: cur=CUR;
8111: }
8112: buf[len] = 0;
8113: return(buf);
1.29 daniel 8114: }
8115:
1.50 daniel 8116: /**
8117: * xmlParseVersionInfo:
8118: * @ctxt: an XML parser context
8119: *
8120: * parse the XML version.
1.29 daniel 8121: *
8122: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8123: *
8124: * [25] Eq ::= S? '=' S?
1.50 daniel 8125: *
1.68 daniel 8126: * Returns the version string, e.g. "1.0"
1.29 daniel 8127: */
8128:
1.123 daniel 8129: xmlChar *
1.55 daniel 8130: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8131: xmlChar *version = NULL;
8132: const xmlChar *q;
1.29 daniel 8133:
1.152 daniel 8134: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8135: (NXT(2) == 'r') && (NXT(3) == 's') &&
8136: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8137: (NXT(6) == 'n')) {
8138: SKIP(7);
1.42 daniel 8139: SKIP_BLANKS;
1.152 daniel 8140: if (RAW != '=') {
1.55 daniel 8141: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8142: ctxt->sax->error(ctxt->userData,
8143: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8144: ctxt->wellFormed = 0;
1.180 daniel 8145: ctxt->disableSAX = 1;
1.123 daniel 8146: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8147: return(NULL);
8148: }
1.40 daniel 8149: NEXT;
1.42 daniel 8150: SKIP_BLANKS;
1.152 daniel 8151: if (RAW == '"') {
1.40 daniel 8152: NEXT;
8153: q = CUR_PTR;
1.29 daniel 8154: version = xmlParseVersionNum(ctxt);
1.152 daniel 8155: if (RAW != '"') {
1.55 daniel 8156: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8157: ctxt->sax->error(ctxt->userData,
8158: "String not closed\n%.50s\n", q);
1.59 daniel 8159: ctxt->wellFormed = 0;
1.180 daniel 8160: ctxt->disableSAX = 1;
1.123 daniel 8161: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8162: } else
1.40 daniel 8163: NEXT;
1.152 daniel 8164: } else if (RAW == '\''){
1.40 daniel 8165: NEXT;
8166: q = CUR_PTR;
1.29 daniel 8167: version = xmlParseVersionNum(ctxt);
1.152 daniel 8168: if (RAW != '\'') {
1.55 daniel 8169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8170: ctxt->sax->error(ctxt->userData,
8171: "String not closed\n%.50s\n", q);
1.123 daniel 8172: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8173: ctxt->wellFormed = 0;
1.180 daniel 8174: ctxt->disableSAX = 1;
1.55 daniel 8175: } else
1.40 daniel 8176: NEXT;
1.31 daniel 8177: } else {
1.55 daniel 8178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8179: ctxt->sax->error(ctxt->userData,
1.59 daniel 8180: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8181: ctxt->wellFormed = 0;
1.180 daniel 8182: ctxt->disableSAX = 1;
1.123 daniel 8183: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8184: }
8185: }
8186: return(version);
8187: }
8188:
1.50 daniel 8189: /**
8190: * xmlParseEncName:
8191: * @ctxt: an XML parser context
8192: *
8193: * parse the XML encoding name
1.29 daniel 8194: *
8195: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8196: *
1.68 daniel 8197: * Returns the encoding name value or NULL
1.29 daniel 8198: */
1.123 daniel 8199: xmlChar *
1.55 daniel 8200: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8201: xmlChar *buf = NULL;
8202: int len = 0;
8203: int size = 10;
8204: xmlChar cur;
1.29 daniel 8205:
1.135 daniel 8206: cur = CUR;
8207: if (((cur >= 'a') && (cur <= 'z')) ||
8208: ((cur >= 'A') && (cur <= 'Z'))) {
8209: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8210: if (buf == NULL) {
8211: fprintf(stderr, "malloc of %d byte failed\n", size);
8212: return(NULL);
8213: }
8214:
8215: buf[len++] = cur;
1.40 daniel 8216: NEXT;
1.135 daniel 8217: cur = CUR;
1.152 daniel 8218: while (((cur >= 'a') && (cur <= 'z')) ||
8219: ((cur >= 'A') && (cur <= 'Z')) ||
8220: ((cur >= '0') && (cur <= '9')) ||
8221: (cur == '.') || (cur == '_') ||
8222: (cur == '-')) {
1.135 daniel 8223: if (len + 1 >= size) {
8224: size *= 2;
8225: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8226: if (buf == NULL) {
8227: fprintf(stderr, "realloc of %d byte failed\n", size);
8228: return(NULL);
8229: }
8230: }
8231: buf[len++] = cur;
8232: NEXT;
8233: cur = CUR;
8234: if (cur == 0) {
8235: SHRINK;
8236: GROW;
8237: cur = CUR;
8238: }
8239: }
8240: buf[len] = 0;
1.29 daniel 8241: } else {
1.55 daniel 8242: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8243: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8244: ctxt->wellFormed = 0;
1.180 daniel 8245: ctxt->disableSAX = 1;
1.123 daniel 8246: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8247: }
1.135 daniel 8248: return(buf);
1.29 daniel 8249: }
8250:
1.50 daniel 8251: /**
8252: * xmlParseEncodingDecl:
8253: * @ctxt: an XML parser context
8254: *
8255: * parse the XML encoding declaration
1.29 daniel 8256: *
8257: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8258: *
8259: * TODO: this should setup the conversion filters.
8260: *
1.68 daniel 8261: * Returns the encoding value or NULL
1.29 daniel 8262: */
8263:
1.123 daniel 8264: xmlChar *
1.55 daniel 8265: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8266: xmlChar *encoding = NULL;
8267: const xmlChar *q;
1.29 daniel 8268:
1.42 daniel 8269: SKIP_BLANKS;
1.152 daniel 8270: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8271: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8272: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8273: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8274: SKIP(8);
1.42 daniel 8275: SKIP_BLANKS;
1.152 daniel 8276: if (RAW != '=') {
1.55 daniel 8277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8278: ctxt->sax->error(ctxt->userData,
8279: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8280: ctxt->wellFormed = 0;
1.180 daniel 8281: ctxt->disableSAX = 1;
1.123 daniel 8282: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8283: return(NULL);
8284: }
1.40 daniel 8285: NEXT;
1.42 daniel 8286: SKIP_BLANKS;
1.152 daniel 8287: if (RAW == '"') {
1.40 daniel 8288: NEXT;
8289: q = CUR_PTR;
1.29 daniel 8290: encoding = xmlParseEncName(ctxt);
1.152 daniel 8291: if (RAW != '"') {
1.55 daniel 8292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8293: ctxt->sax->error(ctxt->userData,
8294: "String not closed\n%.50s\n", q);
1.59 daniel 8295: ctxt->wellFormed = 0;
1.180 daniel 8296: ctxt->disableSAX = 1;
1.123 daniel 8297: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8298: } else
1.40 daniel 8299: NEXT;
1.152 daniel 8300: } else if (RAW == '\''){
1.40 daniel 8301: NEXT;
8302: q = CUR_PTR;
1.29 daniel 8303: encoding = xmlParseEncName(ctxt);
1.152 daniel 8304: if (RAW != '\'') {
1.55 daniel 8305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8306: ctxt->sax->error(ctxt->userData,
8307: "String not closed\n%.50s\n", q);
1.59 daniel 8308: ctxt->wellFormed = 0;
1.180 daniel 8309: ctxt->disableSAX = 1;
1.123 daniel 8310: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8311: } else
1.40 daniel 8312: NEXT;
1.152 daniel 8313: } else if (RAW == '"'){
1.55 daniel 8314: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8315: ctxt->sax->error(ctxt->userData,
1.59 daniel 8316: "xmlParseEncodingDecl : expected ' or \"\n");
8317: ctxt->wellFormed = 0;
1.180 daniel 8318: ctxt->disableSAX = 1;
1.123 daniel 8319: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8320: }
1.193 daniel 8321: if (encoding != NULL) {
8322: xmlCharEncoding enc;
8323: xmlCharEncodingHandlerPtr handler;
8324:
1.195 daniel 8325: if (ctxt->input->encoding != NULL)
8326: xmlFree((xmlChar *) ctxt->input->encoding);
8327: ctxt->input->encoding = encoding;
8328:
1.193 daniel 8329: enc = xmlParseCharEncoding((const char *) encoding);
8330: /*
8331: * registered set of known encodings
8332: */
8333: if (enc != XML_CHAR_ENCODING_ERROR) {
8334: xmlSwitchEncoding(ctxt, enc);
8335: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8336: xmlFree(encoding);
8337: return(NULL);
8338: }
8339: } else {
8340: /*
8341: * fallback for unknown encodings
8342: */
8343: handler = xmlFindCharEncodingHandler((const char *) encoding);
8344: if (handler != NULL) {
8345: xmlSwitchToEncoding(ctxt, handler);
8346: } else {
8347: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8348: xmlFree(encoding);
8349: return(NULL);
8350: }
8351: }
8352: }
1.29 daniel 8353: }
8354: return(encoding);
8355: }
8356:
1.50 daniel 8357: /**
8358: * xmlParseSDDecl:
8359: * @ctxt: an XML parser context
8360: *
8361: * parse the XML standalone declaration
1.29 daniel 8362: *
8363: * [32] SDDecl ::= S 'standalone' Eq
8364: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8365: *
8366: * [ VC: Standalone Document Declaration ]
8367: * TODO The standalone document declaration must have the value "no"
8368: * if any external markup declarations contain declarations of:
8369: * - attributes with default values, if elements to which these
8370: * attributes apply appear in the document without specifications
8371: * of values for these attributes, or
8372: * - entities (other than amp, lt, gt, apos, quot), if references
8373: * to those entities appear in the document, or
8374: * - attributes with values subject to normalization, where the
8375: * attribute appears in the document with a value which will change
8376: * as a result of normalization, or
8377: * - element types with element content, if white space occurs directly
8378: * within any instance of those types.
1.68 daniel 8379: *
8380: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8381: */
8382:
1.55 daniel 8383: int
8384: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8385: int standalone = -1;
8386:
1.42 daniel 8387: SKIP_BLANKS;
1.152 daniel 8388: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8389: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8390: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8391: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8392: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8393: SKIP(10);
1.81 daniel 8394: SKIP_BLANKS;
1.152 daniel 8395: if (RAW != '=') {
1.55 daniel 8396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8397: ctxt->sax->error(ctxt->userData,
1.59 daniel 8398: "XML standalone declaration : expected '='\n");
1.123 daniel 8399: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8400: ctxt->wellFormed = 0;
1.180 daniel 8401: ctxt->disableSAX = 1;
1.32 daniel 8402: return(standalone);
8403: }
1.40 daniel 8404: NEXT;
1.42 daniel 8405: SKIP_BLANKS;
1.152 daniel 8406: if (RAW == '\''){
1.40 daniel 8407: NEXT;
1.152 daniel 8408: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8409: standalone = 0;
1.40 daniel 8410: SKIP(2);
1.152 daniel 8411: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8412: (NXT(2) == 's')) {
1.29 daniel 8413: standalone = 1;
1.40 daniel 8414: SKIP(3);
1.29 daniel 8415: } else {
1.55 daniel 8416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8417: ctxt->sax->error(ctxt->userData,
8418: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8419: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8420: ctxt->wellFormed = 0;
1.180 daniel 8421: ctxt->disableSAX = 1;
1.29 daniel 8422: }
1.152 daniel 8423: if (RAW != '\'') {
1.55 daniel 8424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8425: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8426: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8427: ctxt->wellFormed = 0;
1.180 daniel 8428: ctxt->disableSAX = 1;
1.55 daniel 8429: } else
1.40 daniel 8430: NEXT;
1.152 daniel 8431: } else if (RAW == '"'){
1.40 daniel 8432: NEXT;
1.152 daniel 8433: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8434: standalone = 0;
1.40 daniel 8435: SKIP(2);
1.152 daniel 8436: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8437: (NXT(2) == 's')) {
1.29 daniel 8438: standalone = 1;
1.40 daniel 8439: SKIP(3);
1.29 daniel 8440: } else {
1.55 daniel 8441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8442: ctxt->sax->error(ctxt->userData,
1.59 daniel 8443: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8444: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8445: ctxt->wellFormed = 0;
1.180 daniel 8446: ctxt->disableSAX = 1;
1.29 daniel 8447: }
1.152 daniel 8448: if (RAW != '"') {
1.55 daniel 8449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8450: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8451: ctxt->wellFormed = 0;
1.180 daniel 8452: ctxt->disableSAX = 1;
1.123 daniel 8453: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8454: } else
1.40 daniel 8455: NEXT;
1.37 daniel 8456: } else {
1.55 daniel 8457: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8458: ctxt->sax->error(ctxt->userData,
8459: "Standalone value not found\n");
1.59 daniel 8460: ctxt->wellFormed = 0;
1.180 daniel 8461: ctxt->disableSAX = 1;
1.123 daniel 8462: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8463: }
1.29 daniel 8464: }
8465: return(standalone);
8466: }
8467:
1.50 daniel 8468: /**
8469: * xmlParseXMLDecl:
8470: * @ctxt: an XML parser context
8471: *
8472: * parse an XML declaration header
1.29 daniel 8473: *
8474: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8475: */
8476:
1.55 daniel 8477: void
8478: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8479: xmlChar *version;
1.1 veillard 8480:
8481: /*
1.19 daniel 8482: * We know that '<?xml' is here.
1.1 veillard 8483: */
1.40 daniel 8484: SKIP(5);
1.1 veillard 8485:
1.153 daniel 8486: if (!IS_BLANK(RAW)) {
1.59 daniel 8487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8488: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8489: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8490: ctxt->wellFormed = 0;
1.180 daniel 8491: ctxt->disableSAX = 1;
1.59 daniel 8492: }
1.42 daniel 8493: SKIP_BLANKS;
1.1 veillard 8494:
8495: /*
1.29 daniel 8496: * We should have the VersionInfo here.
1.1 veillard 8497: */
1.29 daniel 8498: version = xmlParseVersionInfo(ctxt);
8499: if (version == NULL)
1.45 daniel 8500: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8501: ctxt->version = xmlStrdup(version);
1.119 daniel 8502: xmlFree(version);
1.29 daniel 8503:
8504: /*
8505: * We may have the encoding declaration
8506: */
1.153 daniel 8507: if (!IS_BLANK(RAW)) {
1.152 daniel 8508: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8509: SKIP(2);
8510: return;
8511: }
8512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8513: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8514: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8515: ctxt->wellFormed = 0;
1.180 daniel 8516: ctxt->disableSAX = 1;
1.59 daniel 8517: }
1.195 daniel 8518: xmlParseEncodingDecl(ctxt);
1.193 daniel 8519: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8520: /*
8521: * The XML REC instructs us to stop parsing right here
8522: */
8523: return;
8524: }
1.1 veillard 8525:
8526: /*
1.29 daniel 8527: * We may have the standalone status.
1.1 veillard 8528: */
1.164 daniel 8529: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8530: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8531: SKIP(2);
8532: return;
8533: }
8534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8535: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8536: ctxt->wellFormed = 0;
1.180 daniel 8537: ctxt->disableSAX = 1;
1.123 daniel 8538: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8539: }
8540: SKIP_BLANKS;
1.167 daniel 8541: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8542:
1.42 daniel 8543: SKIP_BLANKS;
1.152 daniel 8544: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8545: SKIP(2);
1.152 daniel 8546: } else if (RAW == '>') {
1.31 daniel 8547: /* Deprecated old WD ... */
1.55 daniel 8548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8549: ctxt->sax->error(ctxt->userData,
8550: "XML declaration must end-up with '?>'\n");
1.59 daniel 8551: ctxt->wellFormed = 0;
1.180 daniel 8552: ctxt->disableSAX = 1;
1.123 daniel 8553: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8554: NEXT;
1.29 daniel 8555: } else {
1.55 daniel 8556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8557: ctxt->sax->error(ctxt->userData,
8558: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8559: ctxt->wellFormed = 0;
1.180 daniel 8560: ctxt->disableSAX = 1;
1.123 daniel 8561: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8562: MOVETO_ENDTAG(CUR_PTR);
8563: NEXT;
1.29 daniel 8564: }
1.1 veillard 8565: }
8566:
1.50 daniel 8567: /**
8568: * xmlParseMisc:
8569: * @ctxt: an XML parser context
8570: *
8571: * parse an XML Misc* optionnal field.
1.21 daniel 8572: *
1.22 daniel 8573: * [27] Misc ::= Comment | PI | S
1.1 veillard 8574: */
8575:
1.55 daniel 8576: void
8577: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8578: while (((RAW == '<') && (NXT(1) == '?')) ||
8579: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8580: (NXT(2) == '-') && (NXT(3) == '-')) ||
8581: IS_BLANK(CUR)) {
1.152 daniel 8582: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8583: xmlParsePI(ctxt);
1.40 daniel 8584: } else if (IS_BLANK(CUR)) {
8585: NEXT;
1.1 veillard 8586: } else
1.114 daniel 8587: xmlParseComment(ctxt);
1.1 veillard 8588: }
8589: }
8590:
1.50 daniel 8591: /**
1.181 daniel 8592: * xmlParseDocument:
1.50 daniel 8593: * @ctxt: an XML parser context
8594: *
8595: * parse an XML document (and build a tree if using the standard SAX
8596: * interface).
1.21 daniel 8597: *
1.22 daniel 8598: * [1] document ::= prolog element Misc*
1.29 daniel 8599: *
8600: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8601: *
1.68 daniel 8602: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8603: * as a result of the parsing.
1.1 veillard 8604: */
8605:
1.55 daniel 8606: int
8607: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8608: xmlChar start[4];
8609: xmlCharEncoding enc;
8610:
1.45 daniel 8611: xmlDefaultSAXHandlerInit();
8612:
1.91 daniel 8613: GROW;
8614:
1.14 veillard 8615: /*
1.44 daniel 8616: * SAX: beginning of the document processing.
8617: */
1.72 daniel 8618: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8619: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8620:
1.156 daniel 8621: /*
8622: * Get the 4 first bytes and decode the charset
8623: * if enc != XML_CHAR_ENCODING_NONE
8624: * plug some encoding conversion routines.
8625: */
8626: start[0] = RAW;
8627: start[1] = NXT(1);
8628: start[2] = NXT(2);
8629: start[3] = NXT(3);
8630: enc = xmlDetectCharEncoding(start, 4);
8631: if (enc != XML_CHAR_ENCODING_NONE) {
8632: xmlSwitchEncoding(ctxt, enc);
8633: }
8634:
1.1 veillard 8635:
1.59 daniel 8636: if (CUR == 0) {
8637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8638: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8639: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8640: ctxt->wellFormed = 0;
1.180 daniel 8641: ctxt->disableSAX = 1;
1.59 daniel 8642: }
1.1 veillard 8643:
8644: /*
8645: * Check for the XMLDecl in the Prolog.
8646: */
1.91 daniel 8647: GROW;
1.152 daniel 8648: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8649: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8650: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 8651:
8652: /*
8653: * Note that we will switch encoding on the fly.
8654: */
1.19 daniel 8655: xmlParseXMLDecl(ctxt);
1.193 daniel 8656: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8657: /*
8658: * The XML REC instructs us to stop parsing right here
8659: */
8660: return(-1);
8661: }
1.167 daniel 8662: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8663: SKIP_BLANKS;
1.1 veillard 8664: } else {
1.72 daniel 8665: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8666: }
1.171 daniel 8667: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8668: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8669:
8670: /*
8671: * The Misc part of the Prolog
8672: */
1.91 daniel 8673: GROW;
1.16 daniel 8674: xmlParseMisc(ctxt);
1.1 veillard 8675:
8676: /*
1.29 daniel 8677: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8678: * (doctypedecl Misc*)?
8679: */
1.91 daniel 8680: GROW;
1.152 daniel 8681: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8682: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8683: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8684: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8685: (NXT(8) == 'E')) {
1.165 daniel 8686:
1.166 daniel 8687: ctxt->inSubset = 1;
1.22 daniel 8688: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8689: if (RAW == '[') {
1.140 daniel 8690: ctxt->instate = XML_PARSER_DTD;
8691: xmlParseInternalSubset(ctxt);
8692: }
1.165 daniel 8693:
8694: /*
8695: * Create and update the external subset.
8696: */
1.166 daniel 8697: ctxt->inSubset = 2;
1.171 daniel 8698: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8699: (!ctxt->disableSAX))
1.165 daniel 8700: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8701: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8702: ctxt->inSubset = 0;
1.165 daniel 8703:
8704:
1.96 daniel 8705: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8706: xmlParseMisc(ctxt);
1.21 daniel 8707: }
8708:
8709: /*
8710: * Time to start parsing the tree itself
1.1 veillard 8711: */
1.91 daniel 8712: GROW;
1.152 daniel 8713: if (RAW != '<') {
1.59 daniel 8714: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8715: ctxt->sax->error(ctxt->userData,
1.151 daniel 8716: "Start tag expected, '<' not found\n");
1.140 daniel 8717: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8718: ctxt->wellFormed = 0;
1.180 daniel 8719: ctxt->disableSAX = 1;
1.140 daniel 8720: ctxt->instate = XML_PARSER_EOF;
8721: } else {
8722: ctxt->instate = XML_PARSER_CONTENT;
8723: xmlParseElement(ctxt);
8724: ctxt->instate = XML_PARSER_EPILOG;
8725:
8726:
8727: /*
8728: * The Misc part at the end
8729: */
8730: xmlParseMisc(ctxt);
8731:
1.152 daniel 8732: if (RAW != 0) {
1.140 daniel 8733: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8734: ctxt->sax->error(ctxt->userData,
8735: "Extra content at the end of the document\n");
8736: ctxt->wellFormed = 0;
1.180 daniel 8737: ctxt->disableSAX = 1;
1.140 daniel 8738: ctxt->errNo = XML_ERR_DOCUMENT_END;
8739: }
8740: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8741: }
8742:
1.44 daniel 8743: /*
8744: * SAX: end of the document processing.
8745: */
1.171 daniel 8746: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8747: (!ctxt->disableSAX))
1.74 daniel 8748: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8749:
1.59 daniel 8750: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8751: return(0);
8752: }
8753:
1.98 daniel 8754: /************************************************************************
8755: * *
1.128 daniel 8756: * Progressive parsing interfaces *
8757: * *
8758: ************************************************************************/
8759:
8760: /**
8761: * xmlParseLookupSequence:
8762: * @ctxt: an XML parser context
8763: * @first: the first char to lookup
1.140 daniel 8764: * @next: the next char to lookup or zero
8765: * @third: the next char to lookup or zero
1.128 daniel 8766: *
1.140 daniel 8767: * Try to find if a sequence (first, next, third) or just (first next) or
8768: * (first) is available in the input stream.
8769: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8770: * to avoid rescanning sequences of bytes, it DOES change the state of the
8771: * parser, do not use liberally.
1.128 daniel 8772: *
1.140 daniel 8773: * Returns the index to the current parsing point if the full sequence
8774: * is available, -1 otherwise.
1.128 daniel 8775: */
8776: int
1.140 daniel 8777: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8778: xmlChar next, xmlChar third) {
8779: int base, len;
8780: xmlParserInputPtr in;
8781: const xmlChar *buf;
8782:
8783: in = ctxt->input;
8784: if (in == NULL) return(-1);
8785: base = in->cur - in->base;
8786: if (base < 0) return(-1);
8787: if (ctxt->checkIndex > base)
8788: base = ctxt->checkIndex;
8789: if (in->buf == NULL) {
8790: buf = in->base;
8791: len = in->length;
8792: } else {
8793: buf = in->buf->buffer->content;
8794: len = in->buf->buffer->use;
8795: }
8796: /* take into account the sequence length */
8797: if (third) len -= 2;
8798: else if (next) len --;
8799: for (;base < len;base++) {
8800: if (buf[base] == first) {
8801: if (third != 0) {
8802: if ((buf[base + 1] != next) ||
8803: (buf[base + 2] != third)) continue;
8804: } else if (next != 0) {
8805: if (buf[base + 1] != next) continue;
8806: }
8807: ctxt->checkIndex = 0;
8808: #ifdef DEBUG_PUSH
8809: if (next == 0)
8810: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8811: first, base);
8812: else if (third == 0)
8813: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8814: first, next, base);
8815: else
8816: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8817: first, next, third, base);
8818: #endif
8819: return(base - (in->cur - in->base));
8820: }
8821: }
8822: ctxt->checkIndex = base;
8823: #ifdef DEBUG_PUSH
8824: if (next == 0)
8825: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8826: else if (third == 0)
8827: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8828: else
8829: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8830: #endif
8831: return(-1);
1.128 daniel 8832: }
8833:
8834: /**
1.143 daniel 8835: * xmlParseTryOrFinish:
1.128 daniel 8836: * @ctxt: an XML parser context
1.143 daniel 8837: * @terminate: last chunk indicator
1.128 daniel 8838: *
8839: * Try to progress on parsing
8840: *
8841: * Returns zero if no parsing was possible
8842: */
8843: int
1.143 daniel 8844: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8845: int ret = 0;
1.140 daniel 8846: int avail;
8847: xmlChar cur, next;
8848:
8849: #ifdef DEBUG_PUSH
8850: switch (ctxt->instate) {
8851: case XML_PARSER_EOF:
8852: fprintf(stderr, "PP: try EOF\n"); break;
8853: case XML_PARSER_START:
8854: fprintf(stderr, "PP: try START\n"); break;
8855: case XML_PARSER_MISC:
8856: fprintf(stderr, "PP: try MISC\n");break;
8857: case XML_PARSER_COMMENT:
8858: fprintf(stderr, "PP: try COMMENT\n");break;
8859: case XML_PARSER_PROLOG:
8860: fprintf(stderr, "PP: try PROLOG\n");break;
8861: case XML_PARSER_START_TAG:
8862: fprintf(stderr, "PP: try START_TAG\n");break;
8863: case XML_PARSER_CONTENT:
8864: fprintf(stderr, "PP: try CONTENT\n");break;
8865: case XML_PARSER_CDATA_SECTION:
8866: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8867: case XML_PARSER_END_TAG:
8868: fprintf(stderr, "PP: try END_TAG\n");break;
8869: case XML_PARSER_ENTITY_DECL:
8870: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8871: case XML_PARSER_ENTITY_VALUE:
8872: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8873: case XML_PARSER_ATTRIBUTE_VALUE:
8874: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8875: case XML_PARSER_DTD:
8876: fprintf(stderr, "PP: try DTD\n");break;
8877: case XML_PARSER_EPILOG:
8878: fprintf(stderr, "PP: try EPILOG\n");break;
8879: case XML_PARSER_PI:
8880: fprintf(stderr, "PP: try PI\n");break;
8881: }
8882: #endif
1.128 daniel 8883:
8884: while (1) {
1.140 daniel 8885: /*
8886: * Pop-up of finished entities.
8887: */
1.152 daniel 8888: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8889: xmlPopInput(ctxt);
8890:
1.184 daniel 8891: if (ctxt->input ==NULL) break;
8892: if (ctxt->input->buf == NULL)
8893: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8894: else
1.184 daniel 8895: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8896: if (avail < 1)
8897: goto done;
1.128 daniel 8898: switch (ctxt->instate) {
8899: case XML_PARSER_EOF:
1.140 daniel 8900: /*
8901: * Document parsing is done !
8902: */
8903: goto done;
8904: case XML_PARSER_START:
8905: /*
8906: * Very first chars read from the document flow.
8907: */
1.184 daniel 8908: cur = ctxt->input->cur[0];
1.140 daniel 8909: if (IS_BLANK(cur)) {
8910: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8911: ctxt->sax->setDocumentLocator(ctxt->userData,
8912: &xmlDefaultSAXLocator);
8913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8914: ctxt->sax->error(ctxt->userData,
8915: "Extra spaces at the beginning of the document are not allowed\n");
8916: ctxt->errNo = XML_ERR_DOCUMENT_START;
8917: ctxt->wellFormed = 0;
1.180 daniel 8918: ctxt->disableSAX = 1;
1.140 daniel 8919: SKIP_BLANKS;
8920: ret++;
1.184 daniel 8921: if (ctxt->input->buf == NULL)
8922: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8923: else
1.184 daniel 8924: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8925: }
8926: if (avail < 2)
8927: goto done;
8928:
1.184 daniel 8929: cur = ctxt->input->cur[0];
8930: next = ctxt->input->cur[1];
1.140 daniel 8931: if (cur == 0) {
8932: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8933: ctxt->sax->setDocumentLocator(ctxt->userData,
8934: &xmlDefaultSAXLocator);
8935: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8936: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8937: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8938: ctxt->wellFormed = 0;
1.180 daniel 8939: ctxt->disableSAX = 1;
1.140 daniel 8940: ctxt->instate = XML_PARSER_EOF;
8941: #ifdef DEBUG_PUSH
8942: fprintf(stderr, "PP: entering EOF\n");
8943: #endif
8944: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8945: ctxt->sax->endDocument(ctxt->userData);
8946: goto done;
8947: }
8948: if ((cur == '<') && (next == '?')) {
8949: /* PI or XML decl */
8950: if (avail < 5) return(ret);
1.143 daniel 8951: if ((!terminate) &&
8952: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8953: return(ret);
8954: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8955: ctxt->sax->setDocumentLocator(ctxt->userData,
8956: &xmlDefaultSAXLocator);
1.184 daniel 8957: if ((ctxt->input->cur[2] == 'x') &&
8958: (ctxt->input->cur[3] == 'm') &&
8959: (ctxt->input->cur[4] == 'l') &&
8960: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8961: ret += 5;
8962: #ifdef DEBUG_PUSH
8963: fprintf(stderr, "PP: Parsing XML Decl\n");
8964: #endif
8965: xmlParseXMLDecl(ctxt);
1.193 daniel 8966: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8967: /*
8968: * The XML REC instructs us to stop parsing right
8969: * here
8970: */
8971: ctxt->instate = XML_PARSER_EOF;
8972: return(0);
8973: }
1.167 daniel 8974: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8975: if ((ctxt->encoding == NULL) &&
8976: (ctxt->input->encoding != NULL))
8977: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8978: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8979: (!ctxt->disableSAX))
1.140 daniel 8980: ctxt->sax->startDocument(ctxt->userData);
8981: ctxt->instate = XML_PARSER_MISC;
8982: #ifdef DEBUG_PUSH
8983: fprintf(stderr, "PP: entering MISC\n");
8984: #endif
8985: } else {
8986: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8987: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8988: (!ctxt->disableSAX))
1.140 daniel 8989: ctxt->sax->startDocument(ctxt->userData);
8990: ctxt->instate = XML_PARSER_MISC;
8991: #ifdef DEBUG_PUSH
8992: fprintf(stderr, "PP: entering MISC\n");
8993: #endif
8994: }
8995: } else {
8996: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8997: ctxt->sax->setDocumentLocator(ctxt->userData,
8998: &xmlDefaultSAXLocator);
8999: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9000: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9001: (!ctxt->disableSAX))
1.140 daniel 9002: ctxt->sax->startDocument(ctxt->userData);
9003: ctxt->instate = XML_PARSER_MISC;
9004: #ifdef DEBUG_PUSH
9005: fprintf(stderr, "PP: entering MISC\n");
9006: #endif
9007: }
9008: break;
9009: case XML_PARSER_MISC:
9010: SKIP_BLANKS;
1.184 daniel 9011: if (ctxt->input->buf == NULL)
9012: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9013: else
1.184 daniel 9014: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9015: if (avail < 2)
9016: goto done;
1.184 daniel 9017: cur = ctxt->input->cur[0];
9018: next = ctxt->input->cur[1];
1.140 daniel 9019: if ((cur == '<') && (next == '?')) {
1.143 daniel 9020: if ((!terminate) &&
9021: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9022: goto done;
9023: #ifdef DEBUG_PUSH
9024: fprintf(stderr, "PP: Parsing PI\n");
9025: #endif
9026: xmlParsePI(ctxt);
9027: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9028: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9029: if ((!terminate) &&
9030: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9031: goto done;
9032: #ifdef DEBUG_PUSH
9033: fprintf(stderr, "PP: Parsing Comment\n");
9034: #endif
9035: xmlParseComment(ctxt);
9036: ctxt->instate = XML_PARSER_MISC;
9037: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9038: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9039: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9040: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9041: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 9042: if ((!terminate) &&
9043: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9044: goto done;
9045: #ifdef DEBUG_PUSH
9046: fprintf(stderr, "PP: Parsing internal subset\n");
9047: #endif
1.166 daniel 9048: ctxt->inSubset = 1;
1.140 daniel 9049: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9050: if (RAW == '[') {
1.140 daniel 9051: ctxt->instate = XML_PARSER_DTD;
9052: #ifdef DEBUG_PUSH
9053: fprintf(stderr, "PP: entering DTD\n");
9054: #endif
9055: } else {
1.166 daniel 9056: /*
9057: * Create and update the external subset.
9058: */
9059: ctxt->inSubset = 2;
1.171 daniel 9060: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9061: (ctxt->sax->externalSubset != NULL))
9062: ctxt->sax->externalSubset(ctxt->userData,
9063: ctxt->intSubName, ctxt->extSubSystem,
9064: ctxt->extSubURI);
9065: ctxt->inSubset = 0;
1.140 daniel 9066: ctxt->instate = XML_PARSER_PROLOG;
9067: #ifdef DEBUG_PUSH
9068: fprintf(stderr, "PP: entering PROLOG\n");
9069: #endif
9070: }
9071: } else if ((cur == '<') && (next == '!') &&
9072: (avail < 9)) {
9073: goto done;
9074: } else {
9075: ctxt->instate = XML_PARSER_START_TAG;
9076: #ifdef DEBUG_PUSH
9077: fprintf(stderr, "PP: entering START_TAG\n");
9078: #endif
9079: }
9080: break;
1.128 daniel 9081: case XML_PARSER_PROLOG:
1.140 daniel 9082: SKIP_BLANKS;
1.184 daniel 9083: if (ctxt->input->buf == NULL)
9084: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9085: else
1.184 daniel 9086: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9087: if (avail < 2)
9088: goto done;
1.184 daniel 9089: cur = ctxt->input->cur[0];
9090: next = ctxt->input->cur[1];
1.140 daniel 9091: if ((cur == '<') && (next == '?')) {
1.143 daniel 9092: if ((!terminate) &&
9093: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9094: goto done;
9095: #ifdef DEBUG_PUSH
9096: fprintf(stderr, "PP: Parsing PI\n");
9097: #endif
9098: xmlParsePI(ctxt);
9099: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9100: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9101: if ((!terminate) &&
9102: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9103: goto done;
9104: #ifdef DEBUG_PUSH
9105: fprintf(stderr, "PP: Parsing Comment\n");
9106: #endif
9107: xmlParseComment(ctxt);
9108: ctxt->instate = XML_PARSER_PROLOG;
9109: } else if ((cur == '<') && (next == '!') &&
9110: (avail < 4)) {
9111: goto done;
9112: } else {
9113: ctxt->instate = XML_PARSER_START_TAG;
9114: #ifdef DEBUG_PUSH
9115: fprintf(stderr, "PP: entering START_TAG\n");
9116: #endif
9117: }
9118: break;
9119: case XML_PARSER_EPILOG:
9120: SKIP_BLANKS;
1.184 daniel 9121: if (ctxt->input->buf == NULL)
9122: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9123: else
1.184 daniel 9124: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9125: if (avail < 2)
9126: goto done;
1.184 daniel 9127: cur = ctxt->input->cur[0];
9128: next = ctxt->input->cur[1];
1.140 daniel 9129: if ((cur == '<') && (next == '?')) {
1.143 daniel 9130: if ((!terminate) &&
9131: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9132: goto done;
9133: #ifdef DEBUG_PUSH
9134: fprintf(stderr, "PP: Parsing PI\n");
9135: #endif
9136: xmlParsePI(ctxt);
9137: ctxt->instate = XML_PARSER_EPILOG;
9138: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9139: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9140: if ((!terminate) &&
9141: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9142: goto done;
9143: #ifdef DEBUG_PUSH
9144: fprintf(stderr, "PP: Parsing Comment\n");
9145: #endif
9146: xmlParseComment(ctxt);
9147: ctxt->instate = XML_PARSER_EPILOG;
9148: } else if ((cur == '<') && (next == '!') &&
9149: (avail < 4)) {
9150: goto done;
9151: } else {
9152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9153: ctxt->sax->error(ctxt->userData,
9154: "Extra content at the end of the document\n");
9155: ctxt->wellFormed = 0;
1.180 daniel 9156: ctxt->disableSAX = 1;
1.140 daniel 9157: ctxt->errNo = XML_ERR_DOCUMENT_END;
9158: ctxt->instate = XML_PARSER_EOF;
9159: #ifdef DEBUG_PUSH
9160: fprintf(stderr, "PP: entering EOF\n");
9161: #endif
1.171 daniel 9162: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9163: (!ctxt->disableSAX))
1.140 daniel 9164: ctxt->sax->endDocument(ctxt->userData);
9165: goto done;
9166: }
9167: break;
9168: case XML_PARSER_START_TAG: {
9169: xmlChar *name, *oldname;
9170:
1.184 daniel 9171: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9172: goto done;
1.184 daniel 9173: cur = ctxt->input->cur[0];
1.140 daniel 9174: if (cur != '<') {
9175: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9176: ctxt->sax->error(ctxt->userData,
9177: "Start tag expect, '<' not found\n");
9178: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9179: ctxt->wellFormed = 0;
1.180 daniel 9180: ctxt->disableSAX = 1;
1.140 daniel 9181: ctxt->instate = XML_PARSER_EOF;
9182: #ifdef DEBUG_PUSH
9183: fprintf(stderr, "PP: entering EOF\n");
9184: #endif
1.171 daniel 9185: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9186: (!ctxt->disableSAX))
1.140 daniel 9187: ctxt->sax->endDocument(ctxt->userData);
9188: goto done;
9189: }
1.143 daniel 9190: if ((!terminate) &&
9191: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9192: goto done;
1.176 daniel 9193: if (ctxt->spaceNr == 0)
9194: spacePush(ctxt, -1);
9195: else
9196: spacePush(ctxt, *ctxt->space);
1.140 daniel 9197: name = xmlParseStartTag(ctxt);
9198: if (name == NULL) {
1.176 daniel 9199: spacePop(ctxt);
1.140 daniel 9200: ctxt->instate = XML_PARSER_EOF;
9201: #ifdef DEBUG_PUSH
9202: fprintf(stderr, "PP: entering EOF\n");
9203: #endif
1.171 daniel 9204: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9205: (!ctxt->disableSAX))
1.140 daniel 9206: ctxt->sax->endDocument(ctxt->userData);
9207: goto done;
9208: }
9209: namePush(ctxt, xmlStrdup(name));
9210:
9211: /*
9212: * [ VC: Root Element Type ]
9213: * The Name in the document type declaration must match
9214: * the element type of the root element.
9215: */
9216: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9217: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9218: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9219:
9220: /*
9221: * Check for an Empty Element.
9222: */
1.152 daniel 9223: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9224: SKIP(2);
1.171 daniel 9225: if ((ctxt->sax != NULL) &&
9226: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9227: ctxt->sax->endElement(ctxt->userData, name);
9228: xmlFree(name);
9229: oldname = namePop(ctxt);
1.176 daniel 9230: spacePop(ctxt);
1.140 daniel 9231: if (oldname != NULL) {
9232: #ifdef DEBUG_STACK
9233: fprintf(stderr,"Close: popped %s\n", oldname);
9234: #endif
9235: xmlFree(oldname);
9236: }
9237: if (ctxt->name == NULL) {
9238: ctxt->instate = XML_PARSER_EPILOG;
9239: #ifdef DEBUG_PUSH
9240: fprintf(stderr, "PP: entering EPILOG\n");
9241: #endif
9242: } else {
9243: ctxt->instate = XML_PARSER_CONTENT;
9244: #ifdef DEBUG_PUSH
9245: fprintf(stderr, "PP: entering CONTENT\n");
9246: #endif
9247: }
9248: break;
9249: }
1.152 daniel 9250: if (RAW == '>') {
1.140 daniel 9251: NEXT;
9252: } else {
9253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9254: ctxt->sax->error(ctxt->userData,
9255: "Couldn't find end of Start Tag %s\n",
9256: name);
9257: ctxt->wellFormed = 0;
1.180 daniel 9258: ctxt->disableSAX = 1;
1.140 daniel 9259: ctxt->errNo = XML_ERR_GT_REQUIRED;
9260:
9261: /*
9262: * end of parsing of this node.
9263: */
9264: nodePop(ctxt);
9265: oldname = namePop(ctxt);
1.176 daniel 9266: spacePop(ctxt);
1.140 daniel 9267: if (oldname != NULL) {
9268: #ifdef DEBUG_STACK
9269: fprintf(stderr,"Close: popped %s\n", oldname);
9270: #endif
9271: xmlFree(oldname);
9272: }
9273: }
9274: xmlFree(name);
9275: ctxt->instate = XML_PARSER_CONTENT;
9276: #ifdef DEBUG_PUSH
9277: fprintf(stderr, "PP: entering CONTENT\n");
9278: #endif
9279: break;
9280: }
1.128 daniel 9281: case XML_PARSER_CONTENT:
1.140 daniel 9282: /*
9283: * Handle preparsed entities and charRef
9284: */
9285: if (ctxt->token != 0) {
9286: xmlChar cur[2] = { 0 , 0 } ;
9287:
9288: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9289: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9290: (ctxt->sax->characters != NULL))
1.140 daniel 9291: ctxt->sax->characters(ctxt->userData, cur, 1);
9292: ctxt->token = 0;
9293: }
1.184 daniel 9294: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9295: goto done;
1.184 daniel 9296: cur = ctxt->input->cur[0];
9297: next = ctxt->input->cur[1];
1.140 daniel 9298: if ((cur == '<') && (next == '?')) {
1.143 daniel 9299: if ((!terminate) &&
9300: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9301: goto done;
9302: #ifdef DEBUG_PUSH
9303: fprintf(stderr, "PP: Parsing PI\n");
9304: #endif
9305: xmlParsePI(ctxt);
9306: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9307: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9308: if ((!terminate) &&
9309: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9310: goto done;
9311: #ifdef DEBUG_PUSH
9312: fprintf(stderr, "PP: Parsing Comment\n");
9313: #endif
9314: xmlParseComment(ctxt);
9315: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9316: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9317: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9318: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9319: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9320: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9321: SKIP(9);
9322: ctxt->instate = XML_PARSER_CDATA_SECTION;
9323: #ifdef DEBUG_PUSH
9324: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9325: #endif
9326: break;
9327: } else if ((cur == '<') && (next == '!') &&
9328: (avail < 9)) {
9329: goto done;
9330: } else if ((cur == '<') && (next == '/')) {
9331: ctxt->instate = XML_PARSER_END_TAG;
9332: #ifdef DEBUG_PUSH
9333: fprintf(stderr, "PP: entering END_TAG\n");
9334: #endif
9335: break;
9336: } else if (cur == '<') {
9337: ctxt->instate = XML_PARSER_START_TAG;
9338: #ifdef DEBUG_PUSH
9339: fprintf(stderr, "PP: entering START_TAG\n");
9340: #endif
9341: break;
9342: } else if (cur == '&') {
1.143 daniel 9343: if ((!terminate) &&
9344: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9345: goto done;
9346: #ifdef DEBUG_PUSH
9347: fprintf(stderr, "PP: Parsing Reference\n");
9348: #endif
9349: /* TODO: check generation of subtrees if noent !!! */
9350: xmlParseReference(ctxt);
9351: } else {
1.156 daniel 9352: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9353: /*
1.181 daniel 9354: * Goal of the following test is:
1.140 daniel 9355: * - minimize calls to the SAX 'character' callback
9356: * when they are mergeable
9357: * - handle an problem for isBlank when we only parse
9358: * a sequence of blank chars and the next one is
9359: * not available to check against '<' presence.
9360: * - tries to homogenize the differences in SAX
9361: * callbacks beween the push and pull versions
9362: * of the parser.
9363: */
9364: if ((ctxt->inputNr == 1) &&
9365: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9366: if ((!terminate) &&
9367: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9368: goto done;
9369: }
9370: ctxt->checkIndex = 0;
9371: #ifdef DEBUG_PUSH
9372: fprintf(stderr, "PP: Parsing char data\n");
9373: #endif
9374: xmlParseCharData(ctxt, 0);
9375: }
9376: /*
9377: * Pop-up of finished entities.
9378: */
1.152 daniel 9379: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9380: xmlPopInput(ctxt);
9381: break;
9382: case XML_PARSER_CDATA_SECTION: {
9383: /*
9384: * The Push mode need to have the SAX callback for
9385: * cdataBlock merge back contiguous callbacks.
9386: */
9387: int base;
9388:
9389: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9390: if (base < 0) {
9391: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9392: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9393: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9394: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9395: XML_PARSER_BIG_BUFFER_SIZE);
9396: }
9397: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9398: ctxt->checkIndex = 0;
9399: }
9400: goto done;
9401: } else {
1.171 daniel 9402: if ((ctxt->sax != NULL) && (base > 0) &&
9403: (!ctxt->disableSAX)) {
1.140 daniel 9404: if (ctxt->sax->cdataBlock != NULL)
9405: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9406: ctxt->input->cur, base);
1.140 daniel 9407: }
9408: SKIP(base + 3);
9409: ctxt->checkIndex = 0;
9410: ctxt->instate = XML_PARSER_CONTENT;
9411: #ifdef DEBUG_PUSH
9412: fprintf(stderr, "PP: entering CONTENT\n");
9413: #endif
9414: }
9415: break;
9416: }
1.141 daniel 9417: case XML_PARSER_END_TAG:
1.140 daniel 9418: if (avail < 2)
9419: goto done;
1.143 daniel 9420: if ((!terminate) &&
9421: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9422: goto done;
9423: xmlParseEndTag(ctxt);
9424: if (ctxt->name == NULL) {
9425: ctxt->instate = XML_PARSER_EPILOG;
9426: #ifdef DEBUG_PUSH
9427: fprintf(stderr, "PP: entering EPILOG\n");
9428: #endif
9429: } else {
9430: ctxt->instate = XML_PARSER_CONTENT;
9431: #ifdef DEBUG_PUSH
9432: fprintf(stderr, "PP: entering CONTENT\n");
9433: #endif
9434: }
9435: break;
9436: case XML_PARSER_DTD: {
9437: /*
9438: * Sorry but progressive parsing of the internal subset
9439: * is not expected to be supported. We first check that
9440: * the full content of the internal subset is available and
9441: * the parsing is launched only at that point.
9442: * Internal subset ends up with "']' S? '>'" in an unescaped
9443: * section and not in a ']]>' sequence which are conditional
9444: * sections (whoever argued to keep that crap in XML deserve
9445: * a place in hell !).
9446: */
9447: int base, i;
9448: xmlChar *buf;
9449: xmlChar quote = 0;
9450:
1.184 daniel 9451: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9452: if (base < 0) return(0);
9453: if (ctxt->checkIndex > base)
9454: base = ctxt->checkIndex;
1.184 daniel 9455: buf = ctxt->input->buf->buffer->content;
9456: for (;base < ctxt->input->buf->buffer->use;base++) {
1.140 daniel 9457: if (quote != 0) {
9458: if (buf[base] == quote)
9459: quote = 0;
9460: continue;
9461: }
9462: if (buf[base] == '"') {
9463: quote = '"';
9464: continue;
9465: }
9466: if (buf[base] == '\'') {
9467: quote = '\'';
9468: continue;
9469: }
9470: if (buf[base] == ']') {
1.184 daniel 9471: if (base +1 >= ctxt->input->buf->buffer->use)
1.140 daniel 9472: break;
9473: if (buf[base + 1] == ']') {
9474: /* conditional crap, skip both ']' ! */
9475: base++;
9476: continue;
9477: }
1.184 daniel 9478: for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
1.140 daniel 9479: if (buf[base + i] == '>')
9480: goto found_end_int_subset;
9481: }
9482: break;
9483: }
9484: }
9485: /*
9486: * We didn't found the end of the Internal subset
9487: */
9488: if (quote == 0)
9489: ctxt->checkIndex = base;
9490: #ifdef DEBUG_PUSH
9491: if (next == 0)
9492: fprintf(stderr, "PP: lookup of int subset end filed\n");
9493: #endif
9494: goto done;
9495:
9496: found_end_int_subset:
9497: xmlParseInternalSubset(ctxt);
1.166 daniel 9498: ctxt->inSubset = 2;
1.171 daniel 9499: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9500: (ctxt->sax->externalSubset != NULL))
9501: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9502: ctxt->extSubSystem, ctxt->extSubURI);
9503: ctxt->inSubset = 0;
1.140 daniel 9504: ctxt->instate = XML_PARSER_PROLOG;
9505: ctxt->checkIndex = 0;
9506: #ifdef DEBUG_PUSH
9507: fprintf(stderr, "PP: entering PROLOG\n");
9508: #endif
9509: break;
9510: }
9511: case XML_PARSER_COMMENT:
9512: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9513: ctxt->instate = XML_PARSER_CONTENT;
9514: #ifdef DEBUG_PUSH
9515: fprintf(stderr, "PP: entering CONTENT\n");
9516: #endif
9517: break;
9518: case XML_PARSER_PI:
9519: fprintf(stderr, "PP: internal error, state == PI\n");
9520: ctxt->instate = XML_PARSER_CONTENT;
9521: #ifdef DEBUG_PUSH
9522: fprintf(stderr, "PP: entering CONTENT\n");
9523: #endif
9524: break;
1.128 daniel 9525: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9526: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9527: ctxt->instate = XML_PARSER_DTD;
9528: #ifdef DEBUG_PUSH
9529: fprintf(stderr, "PP: entering DTD\n");
9530: #endif
9531: break;
1.128 daniel 9532: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9533: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9534: ctxt->instate = XML_PARSER_CONTENT;
9535: #ifdef DEBUG_PUSH
9536: fprintf(stderr, "PP: entering DTD\n");
9537: #endif
9538: break;
1.128 daniel 9539: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9540: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9541: ctxt->instate = XML_PARSER_START_TAG;
9542: #ifdef DEBUG_PUSH
9543: fprintf(stderr, "PP: entering START_TAG\n");
9544: #endif
9545: break;
9546: case XML_PARSER_SYSTEM_LITERAL:
9547: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9548: ctxt->instate = XML_PARSER_START_TAG;
9549: #ifdef DEBUG_PUSH
9550: fprintf(stderr, "PP: entering START_TAG\n");
9551: #endif
9552: break;
1.128 daniel 9553: }
9554: }
1.140 daniel 9555: done:
9556: #ifdef DEBUG_PUSH
9557: fprintf(stderr, "PP: done %d\n", ret);
9558: #endif
1.128 daniel 9559: return(ret);
9560: }
9561:
9562: /**
1.143 daniel 9563: * xmlParseTry:
9564: * @ctxt: an XML parser context
9565: *
9566: * Try to progress on parsing
9567: *
9568: * Returns zero if no parsing was possible
9569: */
9570: int
9571: xmlParseTry(xmlParserCtxtPtr ctxt) {
9572: return(xmlParseTryOrFinish(ctxt, 0));
9573: }
9574:
9575: /**
1.128 daniel 9576: * xmlParseChunk:
9577: * @ctxt: an XML parser context
9578: * @chunk: an char array
9579: * @size: the size in byte of the chunk
9580: * @terminate: last chunk indicator
9581: *
9582: * Parse a Chunk of memory
9583: *
9584: * Returns zero if no error, the xmlParserErrors otherwise.
9585: */
1.140 daniel 9586: int
1.128 daniel 9587: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9588: int terminate) {
1.132 daniel 9589: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9590: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9591: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9592: int cur = ctxt->input->cur - ctxt->input->base;
9593:
1.132 daniel 9594: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9595: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9596: ctxt->input->cur = ctxt->input->base + cur;
9597: #ifdef DEBUG_PUSH
9598: fprintf(stderr, "PP: pushed %d\n", size);
9599: #endif
9600:
1.150 daniel 9601: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9602: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9603: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9604: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9605: if (terminate) {
1.151 daniel 9606: /*
9607: * Check for termination
9608: */
1.140 daniel 9609: if ((ctxt->instate != XML_PARSER_EOF) &&
9610: (ctxt->instate != XML_PARSER_EPILOG)) {
9611: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9612: ctxt->sax->error(ctxt->userData,
9613: "Extra content at the end of the document\n");
9614: ctxt->wellFormed = 0;
1.180 daniel 9615: ctxt->disableSAX = 1;
1.140 daniel 9616: ctxt->errNo = XML_ERR_DOCUMENT_END;
9617: }
9618: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9619: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9620: (!ctxt->disableSAX))
1.140 daniel 9621: ctxt->sax->endDocument(ctxt->userData);
9622: }
9623: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9624: }
9625: return((xmlParserErrors) ctxt->errNo);
9626: }
9627:
9628: /************************************************************************
9629: * *
1.98 daniel 9630: * I/O front end functions to the parser *
9631: * *
9632: ************************************************************************/
1.201 ! daniel 9633:
! 9634: /**
! 9635: * xmlCreatePushParserCtxt:
! 9636: * @ctxt: an XML parser context
! 9637: *
! 9638: * Blocks further parser processing
! 9639: */
! 9640: void
! 9641: xmlStopParser(xmlParserCtxtPtr ctxt) {
! 9642: ctxt->instate = XML_PARSER_EOF;
! 9643: if (ctxt->input != NULL)
! 9644: ctxt->input->cur = BAD_CAST"";
! 9645: }
1.98 daniel 9646:
1.50 daniel 9647: /**
1.181 daniel 9648: * xmlCreatePushParserCtxt:
1.140 daniel 9649: * @sax: a SAX handler
9650: * @user_data: The user data returned on SAX callbacks
9651: * @chunk: a pointer to an array of chars
9652: * @size: number of chars in the array
9653: * @filename: an optional file name or URI
9654: *
9655: * Create a parser context for using the XML parser in push mode
9656: * To allow content encoding detection, @size should be >= 4
9657: * The value of @filename is used for fetching external entities
9658: * and error/warning reports.
9659: *
9660: * Returns the new parser context or NULL
9661: */
9662: xmlParserCtxtPtr
9663: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9664: const char *chunk, int size, const char *filename) {
9665: xmlParserCtxtPtr ctxt;
9666: xmlParserInputPtr inputStream;
9667: xmlParserInputBufferPtr buf;
9668: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9669:
9670: /*
1.156 daniel 9671: * plug some encoding conversion routines
1.140 daniel 9672: */
9673: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9674: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9675:
9676: buf = xmlAllocParserInputBuffer(enc);
9677: if (buf == NULL) return(NULL);
9678:
9679: ctxt = xmlNewParserCtxt();
9680: if (ctxt == NULL) {
9681: xmlFree(buf);
9682: return(NULL);
9683: }
9684: if (sax != NULL) {
9685: if (ctxt->sax != &xmlDefaultSAXHandler)
9686: xmlFree(ctxt->sax);
9687: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9688: if (ctxt->sax == NULL) {
9689: xmlFree(buf);
9690: xmlFree(ctxt);
9691: return(NULL);
9692: }
9693: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9694: if (user_data != NULL)
9695: ctxt->userData = user_data;
9696: }
9697: if (filename == NULL) {
9698: ctxt->directory = NULL;
9699: } else {
9700: ctxt->directory = xmlParserGetDirectory(filename);
9701: }
9702:
9703: inputStream = xmlNewInputStream(ctxt);
9704: if (inputStream == NULL) {
9705: xmlFreeParserCtxt(ctxt);
9706: return(NULL);
9707: }
9708:
9709: if (filename == NULL)
9710: inputStream->filename = NULL;
9711: else
9712: inputStream->filename = xmlMemStrdup(filename);
9713: inputStream->buf = buf;
9714: inputStream->base = inputStream->buf->buffer->content;
9715: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9716: if (enc != XML_CHAR_ENCODING_NONE) {
9717: xmlSwitchEncoding(ctxt, enc);
9718: }
1.140 daniel 9719:
9720: inputPush(ctxt, inputStream);
9721:
9722: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9723: (ctxt->input->buf != NULL)) {
9724: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9725: #ifdef DEBUG_PUSH
9726: fprintf(stderr, "PP: pushed %d\n", size);
9727: #endif
9728: }
1.190 daniel 9729:
9730: return(ctxt);
9731: }
9732:
9733: /**
9734: * xmlCreateIOParserCtxt:
9735: * @sax: a SAX handler
9736: * @user_data: The user data returned on SAX callbacks
9737: * @ioread: an I/O read function
9738: * @ioclose: an I/O close function
9739: * @ioctx: an I/O handler
9740: * @enc: the charset encoding if known
9741: *
9742: * Create a parser context for using the XML parser with an existing
9743: * I/O stream
9744: *
9745: * Returns the new parser context or NULL
9746: */
9747: xmlParserCtxtPtr
9748: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9749: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9750: void *ioctx, xmlCharEncoding enc) {
9751: xmlParserCtxtPtr ctxt;
9752: xmlParserInputPtr inputStream;
9753: xmlParserInputBufferPtr buf;
9754:
9755: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9756: if (buf == NULL) return(NULL);
9757:
9758: ctxt = xmlNewParserCtxt();
9759: if (ctxt == NULL) {
9760: xmlFree(buf);
9761: return(NULL);
9762: }
9763: if (sax != NULL) {
9764: if (ctxt->sax != &xmlDefaultSAXHandler)
9765: xmlFree(ctxt->sax);
9766: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9767: if (ctxt->sax == NULL) {
9768: xmlFree(buf);
9769: xmlFree(ctxt);
9770: return(NULL);
9771: }
9772: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9773: if (user_data != NULL)
9774: ctxt->userData = user_data;
9775: }
9776:
9777: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9778: if (inputStream == NULL) {
9779: xmlFreeParserCtxt(ctxt);
9780: return(NULL);
9781: }
9782: inputPush(ctxt, inputStream);
1.140 daniel 9783:
9784: return(ctxt);
9785: }
9786:
9787: /**
1.181 daniel 9788: * xmlCreateDocParserCtxt:
1.123 daniel 9789: * @cur: a pointer to an array of xmlChar
1.50 daniel 9790: *
1.192 daniel 9791: * Creates a parser context for an XML in-memory document.
1.69 daniel 9792: *
9793: * Returns the new parser context or NULL
1.16 daniel 9794: */
1.69 daniel 9795: xmlParserCtxtPtr
1.123 daniel 9796: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9797: xmlParserCtxtPtr ctxt;
1.40 daniel 9798: xmlParserInputPtr input;
1.16 daniel 9799:
1.97 daniel 9800: ctxt = xmlNewParserCtxt();
1.16 daniel 9801: if (ctxt == NULL) {
9802: return(NULL);
9803: }
1.96 daniel 9804: input = xmlNewInputStream(ctxt);
1.40 daniel 9805: if (input == NULL) {
1.97 daniel 9806: xmlFreeParserCtxt(ctxt);
1.40 daniel 9807: return(NULL);
9808: }
9809:
9810: input->base = cur;
9811: input->cur = cur;
9812:
9813: inputPush(ctxt, input);
1.69 daniel 9814: return(ctxt);
9815: }
9816:
9817: /**
1.181 daniel 9818: * xmlSAXParseDoc:
1.69 daniel 9819: * @sax: the SAX handler block
1.123 daniel 9820: * @cur: a pointer to an array of xmlChar
1.69 daniel 9821: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9822: * documents
9823: *
9824: * parse an XML in-memory document and build a tree.
9825: * It use the given SAX function block to handle the parsing callback.
9826: * If sax is NULL, fallback to the default DOM tree building routines.
9827: *
9828: * Returns the resulting document tree
9829: */
9830:
9831: xmlDocPtr
1.123 daniel 9832: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9833: xmlDocPtr ret;
9834: xmlParserCtxtPtr ctxt;
9835:
9836: if (cur == NULL) return(NULL);
1.16 daniel 9837:
9838:
1.69 daniel 9839: ctxt = xmlCreateDocParserCtxt(cur);
9840: if (ctxt == NULL) return(NULL);
1.74 daniel 9841: if (sax != NULL) {
9842: ctxt->sax = sax;
9843: ctxt->userData = NULL;
9844: }
1.69 daniel 9845:
1.16 daniel 9846: xmlParseDocument(ctxt);
1.72 daniel 9847: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9848: else {
9849: ret = NULL;
1.72 daniel 9850: xmlFreeDoc(ctxt->myDoc);
9851: ctxt->myDoc = NULL;
1.59 daniel 9852: }
1.86 daniel 9853: if (sax != NULL)
9854: ctxt->sax = NULL;
1.69 daniel 9855: xmlFreeParserCtxt(ctxt);
1.16 daniel 9856:
1.1 veillard 9857: return(ret);
9858: }
9859:
1.50 daniel 9860: /**
1.181 daniel 9861: * xmlParseDoc:
1.123 daniel 9862: * @cur: a pointer to an array of xmlChar
1.55 daniel 9863: *
9864: * parse an XML in-memory document and build a tree.
9865: *
1.68 daniel 9866: * Returns the resulting document tree
1.55 daniel 9867: */
9868:
1.69 daniel 9869: xmlDocPtr
1.123 daniel 9870: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9871: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9872: }
9873:
9874: /**
1.181 daniel 9875: * xmlSAXParseDTD:
1.76 daniel 9876: * @sax: the SAX handler block
9877: * @ExternalID: a NAME* containing the External ID of the DTD
9878: * @SystemID: a NAME* containing the URL to the DTD
9879: *
9880: * Load and parse an external subset.
9881: *
9882: * Returns the resulting xmlDtdPtr or NULL in case of error.
9883: */
9884:
9885: xmlDtdPtr
1.123 daniel 9886: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9887: const xmlChar *SystemID) {
1.76 daniel 9888: xmlDtdPtr ret = NULL;
9889: xmlParserCtxtPtr ctxt;
1.83 daniel 9890: xmlParserInputPtr input = NULL;
1.76 daniel 9891: xmlCharEncoding enc;
9892:
9893: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9894:
1.97 daniel 9895: ctxt = xmlNewParserCtxt();
1.76 daniel 9896: if (ctxt == NULL) {
9897: return(NULL);
9898: }
9899:
9900: /*
9901: * Set-up the SAX context
9902: */
9903: if (ctxt == NULL) return(NULL);
9904: if (sax != NULL) {
1.93 veillard 9905: if (ctxt->sax != NULL)
1.119 daniel 9906: xmlFree(ctxt->sax);
1.76 daniel 9907: ctxt->sax = sax;
9908: ctxt->userData = NULL;
9909: }
9910:
9911: /*
9912: * Ask the Entity resolver to load the damn thing
9913: */
9914:
9915: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9916: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9917: if (input == NULL) {
1.86 daniel 9918: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9919: xmlFreeParserCtxt(ctxt);
9920: return(NULL);
9921: }
9922:
9923: /*
1.156 daniel 9924: * plug some encoding conversion routines here.
1.76 daniel 9925: */
9926: xmlPushInput(ctxt, input);
1.156 daniel 9927: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9928: xmlSwitchEncoding(ctxt, enc);
9929:
1.95 veillard 9930: if (input->filename == NULL)
1.156 daniel 9931: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9932: input->line = 1;
9933: input->col = 1;
9934: input->base = ctxt->input->cur;
9935: input->cur = ctxt->input->cur;
9936: input->free = NULL;
9937:
9938: /*
9939: * let's parse that entity knowing it's an external subset.
9940: */
1.191 daniel 9941: ctxt->inSubset = 2;
9942: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9943: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9944: ExternalID, SystemID);
1.79 daniel 9945: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9946:
9947: if (ctxt->myDoc != NULL) {
9948: if (ctxt->wellFormed) {
1.191 daniel 9949: ret = ctxt->myDoc->extSubset;
9950: ctxt->myDoc->extSubset = NULL;
1.76 daniel 9951: } else {
9952: ret = NULL;
9953: }
9954: xmlFreeDoc(ctxt->myDoc);
9955: ctxt->myDoc = NULL;
9956: }
1.86 daniel 9957: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9958: xmlFreeParserCtxt(ctxt);
9959:
9960: return(ret);
9961: }
9962:
9963: /**
1.181 daniel 9964: * xmlParseDTD:
1.76 daniel 9965: * @ExternalID: a NAME* containing the External ID of the DTD
9966: * @SystemID: a NAME* containing the URL to the DTD
9967: *
9968: * Load and parse an external subset.
9969: *
9970: * Returns the resulting xmlDtdPtr or NULL in case of error.
9971: */
9972:
9973: xmlDtdPtr
1.123 daniel 9974: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9975: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9976: }
9977:
9978: /**
1.181 daniel 9979: * xmlSAXParseBalancedChunk:
1.144 daniel 9980: * @ctx: an XML parser context (possibly NULL)
9981: * @sax: the SAX handler bloc (possibly NULL)
9982: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9983: * @input: a parser input stream
9984: * @enc: the encoding
9985: *
9986: * Parse a well-balanced chunk of an XML document
9987: * The user has to provide SAX callback block whose routines will be
9988: * called by the parser
9989: * The allowed sequence for the Well Balanced Chunk is the one defined by
9990: * the content production in the XML grammar:
9991: *
9992: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9993: *
1.176 daniel 9994: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9995: * the error code otherwise
9996: */
9997:
9998: int
9999: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10000: void *user_data, xmlParserInputPtr input,
10001: xmlCharEncoding enc) {
10002: xmlParserCtxtPtr ctxt;
10003: int ret;
10004:
10005: if (input == NULL) return(-1);
10006:
10007: if (ctx != NULL)
10008: ctxt = ctx;
10009: else {
10010: ctxt = xmlNewParserCtxt();
10011: if (ctxt == NULL)
10012: return(-1);
10013: if (sax == NULL)
10014: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10015: }
10016:
10017: /*
10018: * Set-up the SAX context
10019: */
10020: if (sax != NULL) {
10021: if (ctxt->sax != NULL)
10022: xmlFree(ctxt->sax);
10023: ctxt->sax = sax;
10024: ctxt->userData = user_data;
10025: }
10026:
10027: /*
10028: * plug some encoding conversion routines here.
10029: */
10030: xmlPushInput(ctxt, input);
10031: if (enc != XML_CHAR_ENCODING_NONE)
10032: xmlSwitchEncoding(ctxt, enc);
10033:
10034: /*
10035: * let's parse that entity knowing it's an external subset.
10036: */
10037: xmlParseContent(ctxt);
10038: ret = ctxt->errNo;
10039:
10040: if (ctx == NULL) {
10041: if (sax != NULL)
10042: ctxt->sax = NULL;
10043: else
10044: xmlFreeDoc(ctxt->myDoc);
10045: xmlFreeParserCtxt(ctxt);
10046: }
10047: return(ret);
10048: }
10049:
10050: /**
1.181 daniel 10051: * xmlParseExternalEntity:
10052: * @doc: the document the chunk pertains to
10053: * @sax: the SAX handler bloc (possibly NULL)
10054: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10055: * @depth: Used for loop detection, use 0
1.181 daniel 10056: * @URL: the URL for the entity to load
10057: * @ID: the System ID for the entity to load
10058: * @list: the return value for the set of parsed nodes
10059: *
10060: * Parse an external general entity
10061: * An external general parsed entity is well-formed if it matches the
10062: * production labeled extParsedEnt.
10063: *
10064: * [78] extParsedEnt ::= TextDecl? content
10065: *
10066: * Returns 0 if the entity is well formed, -1 in case of args problem and
10067: * the parser error code otherwise
10068: */
10069:
10070: int
10071: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10072: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10073: xmlParserCtxtPtr ctxt;
10074: xmlDocPtr newDoc;
10075: xmlSAXHandlerPtr oldsax = NULL;
10076: int ret = 0;
10077:
1.185 daniel 10078: if (depth > 40) {
10079: return(XML_ERR_ENTITY_LOOP);
10080: }
10081:
10082:
1.181 daniel 10083:
10084: if (list != NULL)
10085: *list = NULL;
10086: if ((URL == NULL) && (ID == NULL))
10087: return(-1);
10088:
10089:
10090: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10091: if (ctxt == NULL) return(-1);
10092: ctxt->userData = ctxt;
10093: if (sax != NULL) {
10094: oldsax = ctxt->sax;
10095: ctxt->sax = sax;
10096: if (user_data != NULL)
10097: ctxt->userData = user_data;
10098: }
10099: newDoc = xmlNewDoc(BAD_CAST "1.0");
10100: if (newDoc == NULL) {
10101: xmlFreeParserCtxt(ctxt);
10102: return(-1);
10103: }
10104: if (doc != NULL) {
10105: newDoc->intSubset = doc->intSubset;
10106: newDoc->extSubset = doc->extSubset;
10107: }
10108: if (doc->URL != NULL) {
10109: newDoc->URL = xmlStrdup(doc->URL);
10110: }
10111: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10112: if (newDoc->children == NULL) {
10113: if (sax != NULL)
10114: ctxt->sax = oldsax;
10115: xmlFreeParserCtxt(ctxt);
10116: newDoc->intSubset = NULL;
10117: newDoc->extSubset = NULL;
10118: xmlFreeDoc(newDoc);
10119: return(-1);
10120: }
10121: nodePush(ctxt, newDoc->children);
10122: if (doc == NULL) {
10123: ctxt->myDoc = newDoc;
10124: } else {
10125: ctxt->myDoc = doc;
10126: newDoc->children->doc = doc;
10127: }
10128:
10129: /*
10130: * Parse a possible text declaration first
10131: */
10132: GROW;
10133: if ((RAW == '<') && (NXT(1) == '?') &&
10134: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10135: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10136: xmlParseTextDecl(ctxt);
10137: }
10138:
10139: /*
10140: * Doing validity checking on chunk doesn't make sense
10141: */
10142: ctxt->instate = XML_PARSER_CONTENT;
10143: ctxt->validate = 0;
1.185 daniel 10144: ctxt->depth = depth;
1.181 daniel 10145:
10146: xmlParseContent(ctxt);
10147:
10148: if ((RAW == '<') && (NXT(1) == '/')) {
10149: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10150: ctxt->sax->error(ctxt->userData,
10151: "chunk is not well balanced\n");
10152: ctxt->wellFormed = 0;
10153: ctxt->disableSAX = 1;
10154: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10155: } else if (RAW != 0) {
10156: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10157: ctxt->sax->error(ctxt->userData,
10158: "extra content at the end of well balanced chunk\n");
10159: ctxt->wellFormed = 0;
10160: ctxt->disableSAX = 1;
10161: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10162: }
10163: if (ctxt->node != newDoc->children) {
10164: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10165: ctxt->sax->error(ctxt->userData,
10166: "chunk is not well balanced\n");
10167: ctxt->wellFormed = 0;
10168: ctxt->disableSAX = 1;
10169: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10170: }
10171:
10172: if (!ctxt->wellFormed) {
10173: if (ctxt->errNo == 0)
10174: ret = 1;
10175: else
10176: ret = ctxt->errNo;
10177: } else {
10178: if (list != NULL) {
10179: xmlNodePtr cur;
10180:
10181: /*
10182: * Return the newly created nodeset after unlinking it from
10183: * they pseudo parent.
10184: */
10185: cur = newDoc->children->children;
10186: *list = cur;
10187: while (cur != NULL) {
10188: cur->parent = NULL;
10189: cur = cur->next;
10190: }
10191: newDoc->children->children = NULL;
10192: }
10193: ret = 0;
10194: }
10195: if (sax != NULL)
10196: ctxt->sax = oldsax;
10197: xmlFreeParserCtxt(ctxt);
10198: newDoc->intSubset = NULL;
10199: newDoc->extSubset = NULL;
10200: xmlFreeDoc(newDoc);
10201:
10202: return(ret);
10203: }
10204:
10205: /**
10206: * xmlParseBalancedChunk:
1.176 daniel 10207: * @doc: the document the chunk pertains to
10208: * @sax: the SAX handler bloc (possibly NULL)
10209: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10210: * @depth: Used for loop detection, use 0
1.176 daniel 10211: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10212: * @list: the return value for the set of parsed nodes
10213: *
10214: * Parse a well-balanced chunk of an XML document
10215: * called by the parser
10216: * The allowed sequence for the Well Balanced Chunk is the one defined by
10217: * the content production in the XML grammar:
1.144 daniel 10218: *
1.175 daniel 10219: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10220: *
1.176 daniel 10221: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10222: * the parser error code otherwise
1.144 daniel 10223: */
10224:
1.175 daniel 10225: int
10226: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10227: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10228: xmlParserCtxtPtr ctxt;
1.175 daniel 10229: xmlDocPtr newDoc;
1.181 daniel 10230: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10231: int size;
1.176 daniel 10232: int ret = 0;
1.175 daniel 10233:
1.185 daniel 10234: if (depth > 40) {
10235: return(XML_ERR_ENTITY_LOOP);
10236: }
10237:
1.175 daniel 10238:
1.176 daniel 10239: if (list != NULL)
10240: *list = NULL;
10241: if (string == NULL)
10242: return(-1);
10243:
10244: size = xmlStrlen(string);
10245:
1.183 daniel 10246: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10247: if (ctxt == NULL) return(-1);
10248: ctxt->userData = ctxt;
1.175 daniel 10249: if (sax != NULL) {
1.176 daniel 10250: oldsax = ctxt->sax;
10251: ctxt->sax = sax;
10252: if (user_data != NULL)
10253: ctxt->userData = user_data;
1.175 daniel 10254: }
10255: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10256: if (newDoc == NULL) {
10257: xmlFreeParserCtxt(ctxt);
10258: return(-1);
10259: }
1.175 daniel 10260: if (doc != NULL) {
10261: newDoc->intSubset = doc->intSubset;
10262: newDoc->extSubset = doc->extSubset;
10263: }
1.176 daniel 10264: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10265: if (newDoc->children == NULL) {
10266: if (sax != NULL)
10267: ctxt->sax = oldsax;
10268: xmlFreeParserCtxt(ctxt);
10269: newDoc->intSubset = NULL;
10270: newDoc->extSubset = NULL;
10271: xmlFreeDoc(newDoc);
10272: return(-1);
10273: }
10274: nodePush(ctxt, newDoc->children);
10275: if (doc == NULL) {
10276: ctxt->myDoc = newDoc;
10277: } else {
10278: ctxt->myDoc = doc;
10279: newDoc->children->doc = doc;
10280: }
10281: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10282: ctxt->depth = depth;
1.176 daniel 10283:
10284: /*
10285: * Doing validity checking on chunk doesn't make sense
10286: */
10287: ctxt->validate = 0;
10288:
1.175 daniel 10289: xmlParseContent(ctxt);
1.176 daniel 10290:
10291: if ((RAW == '<') && (NXT(1) == '/')) {
10292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10293: ctxt->sax->error(ctxt->userData,
10294: "chunk is not well balanced\n");
10295: ctxt->wellFormed = 0;
1.180 daniel 10296: ctxt->disableSAX = 1;
1.176 daniel 10297: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10298: } else if (RAW != 0) {
10299: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10300: ctxt->sax->error(ctxt->userData,
10301: "extra content at the end of well balanced chunk\n");
10302: ctxt->wellFormed = 0;
1.180 daniel 10303: ctxt->disableSAX = 1;
1.176 daniel 10304: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10305: }
10306: if (ctxt->node != newDoc->children) {
10307: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10308: ctxt->sax->error(ctxt->userData,
10309: "chunk is not well balanced\n");
10310: ctxt->wellFormed = 0;
1.180 daniel 10311: ctxt->disableSAX = 1;
1.176 daniel 10312: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10313: }
1.175 daniel 10314:
1.176 daniel 10315: if (!ctxt->wellFormed) {
10316: if (ctxt->errNo == 0)
10317: ret = 1;
10318: else
10319: ret = ctxt->errNo;
10320: } else {
10321: if (list != NULL) {
10322: xmlNodePtr cur;
1.175 daniel 10323:
1.176 daniel 10324: /*
10325: * Return the newly created nodeset after unlinking it from
10326: * they pseudo parent.
10327: */
10328: cur = newDoc->children->children;
10329: *list = cur;
10330: while (cur != NULL) {
10331: cur->parent = NULL;
10332: cur = cur->next;
10333: }
10334: newDoc->children->children = NULL;
10335: }
10336: ret = 0;
1.175 daniel 10337: }
1.176 daniel 10338: if (sax != NULL)
10339: ctxt->sax = oldsax;
1.175 daniel 10340: xmlFreeParserCtxt(ctxt);
10341: newDoc->intSubset = NULL;
10342: newDoc->extSubset = NULL;
1.176 daniel 10343: xmlFreeDoc(newDoc);
1.175 daniel 10344:
1.176 daniel 10345: return(ret);
1.144 daniel 10346: }
10347:
10348: /**
1.181 daniel 10349: * xmlParseBalancedChunkFile:
1.144 daniel 10350: * @doc: the document the chunk pertains to
10351: *
10352: * Parse a well-balanced chunk of an XML document contained in a file
10353: *
10354: * Returns the resulting list of nodes resulting from the parsing,
10355: * they are not added to @node
10356: */
10357:
10358: xmlNodePtr
10359: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10360: /* TODO !!! */
10361: return(NULL);
1.144 daniel 10362: }
10363:
10364: /**
1.181 daniel 10365: * xmlRecoverDoc:
1.123 daniel 10366: * @cur: a pointer to an array of xmlChar
1.59 daniel 10367: *
10368: * parse an XML in-memory document and build a tree.
10369: * In the case the document is not Well Formed, a tree is built anyway
10370: *
1.68 daniel 10371: * Returns the resulting document tree
1.59 daniel 10372: */
10373:
1.69 daniel 10374: xmlDocPtr
1.123 daniel 10375: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10376: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10377: }
10378:
10379: /**
1.181 daniel 10380: * xmlCreateEntityParserCtxt:
10381: * @URL: the entity URL
10382: * @ID: the entity PUBLIC ID
10383: * @base: a posible base for the target URI
10384: *
10385: * Create a parser context for an external entity
10386: * Automatic support for ZLIB/Compress compressed document is provided
10387: * by default if found at compile-time.
10388: *
10389: * Returns the new parser context or NULL
10390: */
10391: xmlParserCtxtPtr
10392: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10393: const xmlChar *base) {
10394: xmlParserCtxtPtr ctxt;
10395: xmlParserInputPtr inputStream;
10396: char *directory = NULL;
10397:
10398: ctxt = xmlNewParserCtxt();
10399: if (ctxt == NULL) {
10400: return(NULL);
10401: }
10402:
1.182 daniel 10403: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
1.181 daniel 10404: if (inputStream == NULL) {
10405: xmlFreeParserCtxt(ctxt);
10406: return(NULL);
10407: }
10408:
10409: inputPush(ctxt, inputStream);
10410:
10411: if ((ctxt->directory == NULL) && (directory == NULL))
1.182 daniel 10412: directory = xmlParserGetDirectory((char *)URL);
1.181 daniel 10413: if ((ctxt->directory == NULL) && (directory != NULL))
10414: ctxt->directory = directory;
10415:
10416: return(ctxt);
10417: }
10418:
10419: /**
10420: * xmlCreateFileParserCtxt:
1.50 daniel 10421: * @filename: the filename
10422: *
1.69 daniel 10423: * Create a parser context for a file content.
10424: * Automatic support for ZLIB/Compress compressed document is provided
10425: * by default if found at compile-time.
1.50 daniel 10426: *
1.69 daniel 10427: * Returns the new parser context or NULL
1.9 httpng 10428: */
1.69 daniel 10429: xmlParserCtxtPtr
10430: xmlCreateFileParserCtxt(const char *filename)
10431: {
10432: xmlParserCtxtPtr ctxt;
1.40 daniel 10433: xmlParserInputPtr inputStream;
1.91 daniel 10434: xmlParserInputBufferPtr buf;
1.111 daniel 10435: char *directory = NULL;
1.9 httpng 10436:
1.91 daniel 10437: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10438: if (buf == NULL) return(NULL);
1.9 httpng 10439:
1.97 daniel 10440: ctxt = xmlNewParserCtxt();
1.16 daniel 10441: if (ctxt == NULL) {
10442: return(NULL);
10443: }
1.97 daniel 10444:
1.96 daniel 10445: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10446: if (inputStream == NULL) {
1.97 daniel 10447: xmlFreeParserCtxt(ctxt);
1.40 daniel 10448: return(NULL);
10449: }
10450:
1.119 daniel 10451: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10452: inputStream->buf = buf;
10453: inputStream->base = inputStream->buf->buffer->content;
10454: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10455:
1.40 daniel 10456: inputPush(ctxt, inputStream);
1.110 daniel 10457: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10458: directory = xmlParserGetDirectory(filename);
10459: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10460: ctxt->directory = directory;
1.106 daniel 10461:
1.69 daniel 10462: return(ctxt);
10463: }
10464:
10465: /**
1.181 daniel 10466: * xmlSAXParseFile:
1.69 daniel 10467: * @sax: the SAX handler block
10468: * @filename: the filename
10469: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10470: * documents
10471: *
10472: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10473: * compressed document is provided by default if found at compile-time.
10474: * It use the given SAX function block to handle the parsing callback.
10475: * If sax is NULL, fallback to the default DOM tree building routines.
10476: *
10477: * Returns the resulting document tree
10478: */
10479:
1.79 daniel 10480: xmlDocPtr
10481: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10482: int recovery) {
10483: xmlDocPtr ret;
10484: xmlParserCtxtPtr ctxt;
1.111 daniel 10485: char *directory = NULL;
1.69 daniel 10486:
10487: ctxt = xmlCreateFileParserCtxt(filename);
10488: if (ctxt == NULL) return(NULL);
1.74 daniel 10489: if (sax != NULL) {
1.93 veillard 10490: if (ctxt->sax != NULL)
1.119 daniel 10491: xmlFree(ctxt->sax);
1.74 daniel 10492: ctxt->sax = sax;
10493: ctxt->userData = NULL;
10494: }
1.106 daniel 10495:
1.110 daniel 10496: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10497: directory = xmlParserGetDirectory(filename);
10498: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10499: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10500:
10501: xmlParseDocument(ctxt);
1.40 daniel 10502:
1.72 daniel 10503: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10504: else {
10505: ret = NULL;
1.72 daniel 10506: xmlFreeDoc(ctxt->myDoc);
10507: ctxt->myDoc = NULL;
1.59 daniel 10508: }
1.86 daniel 10509: if (sax != NULL)
10510: ctxt->sax = NULL;
1.69 daniel 10511: xmlFreeParserCtxt(ctxt);
1.20 daniel 10512:
10513: return(ret);
10514: }
10515:
1.55 daniel 10516: /**
1.181 daniel 10517: * xmlParseFile:
1.55 daniel 10518: * @filename: the filename
10519: *
10520: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10521: * compressed document is provided by default if found at compile-time.
10522: *
1.68 daniel 10523: * Returns the resulting document tree
1.55 daniel 10524: */
10525:
1.79 daniel 10526: xmlDocPtr
10527: xmlParseFile(const char *filename) {
1.59 daniel 10528: return(xmlSAXParseFile(NULL, filename, 0));
10529: }
10530:
10531: /**
1.181 daniel 10532: * xmlRecoverFile:
1.59 daniel 10533: * @filename: the filename
10534: *
10535: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10536: * compressed document is provided by default if found at compile-time.
10537: * In the case the document is not Well Formed, a tree is built anyway
10538: *
1.68 daniel 10539: * Returns the resulting document tree
1.59 daniel 10540: */
10541:
1.79 daniel 10542: xmlDocPtr
10543: xmlRecoverFile(const char *filename) {
1.59 daniel 10544: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10545: }
1.32 daniel 10546:
1.50 daniel 10547: /**
1.181 daniel 10548: * xmlCreateMemoryParserCtxt:
10549: * @buffer: a pointer to a zero terminated char array
10550: * @size: the size of the array (without the trailing 0)
1.50 daniel 10551: *
1.69 daniel 10552: * Create a parser context for an XML in-memory document.
1.50 daniel 10553: *
1.69 daniel 10554: * Returns the new parser context or NULL
1.20 daniel 10555: */
1.69 daniel 10556: xmlParserCtxtPtr
10557: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10558: xmlParserCtxtPtr ctxt;
1.40 daniel 10559: xmlParserInputPtr input;
10560:
1.179 daniel 10561: if (buffer[size] != 0)
1.181 daniel 10562: return(NULL);
1.40 daniel 10563:
1.97 daniel 10564: ctxt = xmlNewParserCtxt();
1.181 daniel 10565: if (ctxt == NULL)
1.20 daniel 10566: return(NULL);
1.97 daniel 10567:
1.96 daniel 10568: input = xmlNewInputStream(ctxt);
1.40 daniel 10569: if (input == NULL) {
1.97 daniel 10570: xmlFreeParserCtxt(ctxt);
1.40 daniel 10571: return(NULL);
10572: }
1.20 daniel 10573:
1.40 daniel 10574: input->filename = NULL;
10575: input->line = 1;
10576: input->col = 1;
1.96 daniel 10577: input->buf = NULL;
1.91 daniel 10578: input->consumed = 0;
1.75 daniel 10579:
1.116 daniel 10580: input->base = BAD_CAST buffer;
10581: input->cur = BAD_CAST buffer;
1.69 daniel 10582: input->free = NULL;
1.20 daniel 10583:
1.40 daniel 10584: inputPush(ctxt, input);
1.69 daniel 10585: return(ctxt);
10586: }
10587:
10588: /**
1.181 daniel 10589: * xmlSAXParseMemory:
1.69 daniel 10590: * @sax: the SAX handler block
10591: * @buffer: an pointer to a char array
1.127 daniel 10592: * @size: the size of the array
10593: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10594: * documents
10595: *
10596: * parse an XML in-memory block and use the given SAX function block
10597: * to handle the parsing callback. If sax is NULL, fallback to the default
10598: * DOM tree building routines.
10599: *
10600: * Returns the resulting document tree
10601: */
10602: xmlDocPtr
10603: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10604: xmlDocPtr ret;
10605: xmlParserCtxtPtr ctxt;
10606:
10607: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10608: if (ctxt == NULL) return(NULL);
1.74 daniel 10609: if (sax != NULL) {
10610: ctxt->sax = sax;
10611: ctxt->userData = NULL;
10612: }
1.20 daniel 10613:
10614: xmlParseDocument(ctxt);
1.40 daniel 10615:
1.72 daniel 10616: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10617: else {
10618: ret = NULL;
1.72 daniel 10619: xmlFreeDoc(ctxt->myDoc);
10620: ctxt->myDoc = NULL;
1.59 daniel 10621: }
1.86 daniel 10622: if (sax != NULL)
10623: ctxt->sax = NULL;
1.69 daniel 10624: xmlFreeParserCtxt(ctxt);
1.16 daniel 10625:
1.9 httpng 10626: return(ret);
1.17 daniel 10627: }
10628:
1.55 daniel 10629: /**
1.181 daniel 10630: * xmlParseMemory:
1.68 daniel 10631: * @buffer: an pointer to a char array
1.55 daniel 10632: * @size: the size of the array
10633: *
10634: * parse an XML in-memory block and build a tree.
10635: *
1.68 daniel 10636: * Returns the resulting document tree
1.55 daniel 10637: */
10638:
10639: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10640: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10641: }
10642:
10643: /**
1.181 daniel 10644: * xmlRecoverMemory:
1.68 daniel 10645: * @buffer: an pointer to a char array
1.59 daniel 10646: * @size: the size of the array
10647: *
10648: * parse an XML in-memory block and build a tree.
10649: * In the case the document is not Well Formed, a tree is built anyway
10650: *
1.68 daniel 10651: * Returns the resulting document tree
1.59 daniel 10652: */
10653:
10654: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10655: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10656: }
10657:
10658:
1.50 daniel 10659: /**
10660: * xmlSetupParserForBuffer:
10661: * @ctxt: an XML parser context
1.123 daniel 10662: * @buffer: a xmlChar * buffer
1.50 daniel 10663: * @filename: a file name
10664: *
1.19 daniel 10665: * Setup the parser context to parse a new buffer; Clears any prior
10666: * contents from the parser context. The buffer parameter must not be
10667: * NULL, but the filename parameter can be
10668: */
1.55 daniel 10669: void
1.123 daniel 10670: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10671: const char* filename)
10672: {
1.96 daniel 10673: xmlParserInputPtr input;
1.40 daniel 10674:
1.96 daniel 10675: input = xmlNewInputStream(ctxt);
10676: if (input == NULL) {
10677: perror("malloc");
1.119 daniel 10678: xmlFree(ctxt);
1.145 daniel 10679: return;
1.96 daniel 10680: }
10681:
10682: xmlClearParserCtxt(ctxt);
10683: if (filename != NULL)
1.119 daniel 10684: input->filename = xmlMemStrdup(filename);
1.96 daniel 10685: input->base = buffer;
10686: input->cur = buffer;
10687: inputPush(ctxt, input);
1.17 daniel 10688: }
10689:
1.123 daniel 10690: /**
10691: * xmlSAXUserParseFile:
10692: * @sax: a SAX handler
10693: * @user_data: The user data returned on SAX callbacks
10694: * @filename: a file name
10695: *
10696: * parse an XML file and call the given SAX handler routines.
10697: * Automatic support for ZLIB/Compress compressed document is provided
10698: *
10699: * Returns 0 in case of success or a error number otherwise
10700: */
1.131 daniel 10701: int
10702: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10703: const char *filename) {
1.123 daniel 10704: int ret = 0;
10705: xmlParserCtxtPtr ctxt;
10706:
10707: ctxt = xmlCreateFileParserCtxt(filename);
10708: if (ctxt == NULL) return -1;
1.134 daniel 10709: if (ctxt->sax != &xmlDefaultSAXHandler)
10710: xmlFree(ctxt->sax);
1.123 daniel 10711: ctxt->sax = sax;
1.140 daniel 10712: if (user_data != NULL)
10713: ctxt->userData = user_data;
1.123 daniel 10714:
10715: xmlParseDocument(ctxt);
10716:
10717: if (ctxt->wellFormed)
10718: ret = 0;
10719: else {
10720: if (ctxt->errNo != 0)
10721: ret = ctxt->errNo;
10722: else
10723: ret = -1;
10724: }
10725: if (sax != NULL)
10726: ctxt->sax = NULL;
10727: xmlFreeParserCtxt(ctxt);
10728:
10729: return ret;
10730: }
10731:
10732: /**
10733: * xmlSAXUserParseMemory:
10734: * @sax: a SAX handler
10735: * @user_data: The user data returned on SAX callbacks
10736: * @buffer: an in-memory XML document input
1.127 daniel 10737: * @size: the length of the XML document in bytes
1.123 daniel 10738: *
10739: * A better SAX parsing routine.
10740: * parse an XML in-memory buffer and call the given SAX handler routines.
10741: *
10742: * Returns 0 in case of success or a error number otherwise
10743: */
10744: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10745: char *buffer, int size) {
10746: int ret = 0;
10747: xmlParserCtxtPtr ctxt;
10748:
10749: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10750: if (ctxt == NULL) return -1;
10751: ctxt->sax = sax;
10752: ctxt->userData = user_data;
10753:
10754: xmlParseDocument(ctxt);
10755:
10756: if (ctxt->wellFormed)
10757: ret = 0;
10758: else {
10759: if (ctxt->errNo != 0)
10760: ret = ctxt->errNo;
10761: else
10762: ret = -1;
10763: }
10764: if (sax != NULL)
10765: ctxt->sax = NULL;
10766: xmlFreeParserCtxt(ctxt);
10767:
10768: return ret;
10769: }
10770:
1.32 daniel 10771:
1.98 daniel 10772: /************************************************************************
10773: * *
1.127 daniel 10774: * Miscellaneous *
1.98 daniel 10775: * *
10776: ************************************************************************/
10777:
1.132 daniel 10778: /**
10779: * xmlCleanupParser:
10780: *
10781: * Cleanup function for the XML parser. It tries to reclaim all
10782: * parsing related global memory allocated for the parser processing.
10783: * It doesn't deallocate any document related memory. Calling this
10784: * function should not prevent reusing the parser.
10785: */
10786:
10787: void
10788: xmlCleanupParser(void) {
10789: xmlCleanupCharEncodingHandlers();
1.133 daniel 10790: xmlCleanupPredefinedEntities();
1.132 daniel 10791: }
1.98 daniel 10792:
1.50 daniel 10793: /**
10794: * xmlParserFindNodeInfo:
10795: * @ctxt: an XML parser context
10796: * @node: an XML node within the tree
10797: *
10798: * Find the parser node info struct for a given node
10799: *
1.68 daniel 10800: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10801: */
10802: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10803: const xmlNode* node)
10804: {
10805: unsigned long pos;
10806:
10807: /* Find position where node should be at */
10808: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10809: if ( ctx->node_seq.buffer[pos].node == node )
10810: return &ctx->node_seq.buffer[pos];
10811: else
10812: return NULL;
10813: }
10814:
10815:
1.50 daniel 10816: /**
1.181 daniel 10817: * xmlInitNodeInfoSeq:
1.50 daniel 10818: * @seq: a node info sequence pointer
10819: *
10820: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10821: */
1.55 daniel 10822: void
10823: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10824: {
10825: seq->length = 0;
10826: seq->maximum = 0;
10827: seq->buffer = NULL;
10828: }
10829:
1.50 daniel 10830: /**
1.181 daniel 10831: * xmlClearNodeInfoSeq:
1.50 daniel 10832: * @seq: a node info sequence pointer
10833: *
10834: * -- Clear (release memory and reinitialize) node
1.32 daniel 10835: * info sequence
10836: */
1.55 daniel 10837: void
10838: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10839: {
10840: if ( seq->buffer != NULL )
1.119 daniel 10841: xmlFree(seq->buffer);
1.32 daniel 10842: xmlInitNodeInfoSeq(seq);
10843: }
10844:
10845:
1.50 daniel 10846: /**
10847: * xmlParserFindNodeInfoIndex:
10848: * @seq: a node info sequence pointer
10849: * @node: an XML node pointer
10850: *
10851: *
1.32 daniel 10852: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10853: * the given node is or should be at in a sorted sequence
1.68 daniel 10854: *
10855: * Returns a long indicating the position of the record
1.32 daniel 10856: */
10857: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10858: const xmlNode* node)
10859: {
10860: unsigned long upper, lower, middle;
10861: int found = 0;
10862:
10863: /* Do a binary search for the key */
10864: lower = 1;
10865: upper = seq->length;
10866: middle = 0;
10867: while ( lower <= upper && !found) {
10868: middle = lower + (upper - lower) / 2;
10869: if ( node == seq->buffer[middle - 1].node )
10870: found = 1;
10871: else if ( node < seq->buffer[middle - 1].node )
10872: upper = middle - 1;
10873: else
10874: lower = middle + 1;
10875: }
10876:
10877: /* Return position */
10878: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10879: return middle;
10880: else
10881: return middle - 1;
10882: }
10883:
10884:
1.50 daniel 10885: /**
10886: * xmlParserAddNodeInfo:
10887: * @ctxt: an XML parser context
1.68 daniel 10888: * @info: a node info sequence pointer
1.50 daniel 10889: *
10890: * Insert node info record into the sorted sequence
1.32 daniel 10891: */
1.55 daniel 10892: void
10893: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10894: const xmlParserNodeInfo* info)
1.32 daniel 10895: {
10896: unsigned long pos;
10897: static unsigned int block_size = 5;
10898:
10899: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10900: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10901: if ( pos < ctxt->node_seq.length
10902: && ctxt->node_seq.buffer[pos].node == info->node ) {
10903: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10904: }
10905:
10906: /* Otherwise, we need to add new node to buffer */
10907: else {
10908: /* Expand buffer by 5 if needed */
1.55 daniel 10909: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10910: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10911: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10912: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10913:
1.55 daniel 10914: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10915: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10916: else
1.119 daniel 10917: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10918:
10919: if ( tmp_buffer == NULL ) {
1.55 daniel 10920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10921: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10922: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10923: return;
10924: }
1.55 daniel 10925: ctxt->node_seq.buffer = tmp_buffer;
10926: ctxt->node_seq.maximum += block_size;
1.32 daniel 10927: }
10928:
10929: /* If position is not at end, move elements out of the way */
1.55 daniel 10930: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10931: unsigned long i;
10932:
1.55 daniel 10933: for ( i = ctxt->node_seq.length; i > pos; i-- )
10934: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10935: }
10936:
10937: /* Copy element and increase length */
1.55 daniel 10938: ctxt->node_seq.buffer[pos] = *info;
10939: ctxt->node_seq.length++;
1.32 daniel 10940: }
10941: }
1.77 daniel 10942:
1.98 daniel 10943:
10944: /**
1.181 daniel 10945: * xmlSubstituteEntitiesDefault:
1.98 daniel 10946: * @val: int 0 or 1
10947: *
10948: * Set and return the previous value for default entity support.
10949: * Initially the parser always keep entity references instead of substituting
10950: * entity values in the output. This function has to be used to change the
10951: * default parser behaviour
10952: * SAX::subtituteEntities() has to be used for changing that on a file by
10953: * file basis.
10954: *
10955: * Returns the last value for 0 for no substitution, 1 for substitution.
10956: */
10957:
10958: int
10959: xmlSubstituteEntitiesDefault(int val) {
10960: int old = xmlSubstituteEntitiesDefaultValue;
10961:
10962: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10963: return(old);
10964: }
10965:
10966: /**
10967: * xmlKeepBlanksDefault:
10968: * @val: int 0 or 1
10969: *
10970: * Set and return the previous value for default blanks text nodes support.
10971: * The 1.x version of the parser used an heuristic to try to detect
10972: * ignorable white spaces. As a result the SAX callback was generating
10973: * ignorableWhitespace() callbacks instead of characters() one, and when
10974: * using the DOM output text nodes containing those blanks were not generated.
10975: * The 2.x and later version will switch to the XML standard way and
10976: * ignorableWhitespace() are only generated when running the parser in
10977: * validating mode and when the current element doesn't allow CDATA or
10978: * mixed content.
10979: * This function is provided as a way to force the standard behaviour
10980: * on 1.X libs and to switch back to the old mode for compatibility when
10981: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10982: * by using xmlIsBlankNode() commodity function to detect the "empty"
10983: * nodes generated.
10984: * This value also affect autogeneration of indentation when saving code
10985: * if blanks sections are kept, indentation is not generated.
10986: *
10987: * Returns the last value for 0 for no substitution, 1 for substitution.
10988: */
10989:
10990: int
10991: xmlKeepBlanksDefault(int val) {
10992: int old = xmlKeepBlanksDefaultValue;
10993:
10994: xmlKeepBlanksDefaultValue = val;
10995: xmlIndentTreeOutput = !val;
1.98 daniel 10996: return(old);
10997: }
1.77 daniel 10998:
Webmaster