Annotation of XML/parser.c, revision 1.204
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.204 ! veillard 16: #include <string.h>
1.121 daniel 17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.200 daniel 65:
66: /*
67: * Version handling
68: */
69: const char *xmlParserVersion = LIBXML_VERSION_STRING;
70:
71: /*
72: * xmlCheckVersion:
73: * @version: the include version number
74: *
75: * check the compiled lib version against the include one.
76: * This can warn or immediately kill the application
77: */
78: void
79: xmlCheckVersion(int version) {
1.202 daniel 80: int myversion = (int) LIBXML_VERSION;
1.200 daniel 81:
82: if ((myversion / 10000) != (version / 10000)) {
83: fprintf(stderr,
84: "Fatal: program compiled against libxml %d using libxml %d\n",
85: (version / 10000), (myversion / 10000));
86: exit(1);
87: }
88: if ((myversion / 100) < (version / 100)) {
89: fprintf(stderr,
90: "Warning: program compiled against libxml %d using older %d\n",
91: (version / 100), (myversion / 100));
92: }
93: }
94:
95:
1.91 daniel 96: /************************************************************************
97: * *
98: * Input handling functions for progressive parsing *
99: * *
100: ************************************************************************/
101:
102: /* #define DEBUG_INPUT */
1.140 daniel 103: /* #define DEBUG_STACK */
104: /* #define DEBUG_PUSH */
105:
1.91 daniel 106:
1.110 daniel 107: #define INPUT_CHUNK 250
108: /* we need to keep enough input to show errors in context */
109: #define LINE_LEN 80
1.91 daniel 110:
111: #ifdef DEBUG_INPUT
112: #define CHECK_BUFFER(in) check_buffer(in)
113:
114: void check_buffer(xmlParserInputPtr in) {
115: if (in->base != in->buf->buffer->content) {
116: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
117: }
118: if (in->cur < in->base) {
119: fprintf(stderr, "xmlParserInput: cur < base problem\n");
120: }
121: if (in->cur > in->base + in->buf->buffer->use) {
122: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
123: }
124: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
125: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
126: in->buf->buffer->use, in->buf->buffer->size);
127: }
128:
1.110 daniel 129: #else
130: #define CHECK_BUFFER(in)
131: #endif
132:
1.91 daniel 133:
134: /**
135: * xmlParserInputRead:
136: * @in: an XML parser input
137: * @len: an indicative size for the lookahead
138: *
139: * This function refresh the input for the parser. It doesn't try to
140: * preserve pointers to the input buffer, and discard already read data
141: *
1.123 daniel 142: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 143: * end of this entity
144: */
145: int
146: xmlParserInputRead(xmlParserInputPtr in, int len) {
147: int ret;
148: int used;
149: int index;
150:
151: #ifdef DEBUG_INPUT
152: fprintf(stderr, "Read\n");
153: #endif
154: if (in->buf == NULL) return(-1);
155: if (in->base == NULL) return(-1);
156: if (in->cur == NULL) return(-1);
157: if (in->buf->buffer == NULL) return(-1);
158:
159: CHECK_BUFFER(in);
160:
161: used = in->cur - in->buf->buffer->content;
162: ret = xmlBufferShrink(in->buf->buffer, used);
163: if (ret > 0) {
164: in->cur -= ret;
165: in->consumed += ret;
166: }
167: ret = xmlParserInputBufferRead(in->buf, len);
168: if (in->base != in->buf->buffer->content) {
169: /*
170: * the buffer has been realloced
171: */
172: index = in->cur - in->base;
173: in->base = in->buf->buffer->content;
174: in->cur = &in->buf->buffer->content[index];
175: }
176:
177: CHECK_BUFFER(in);
178:
179: return(ret);
180: }
181:
182: /**
183: * xmlParserInputGrow:
184: * @in: an XML parser input
185: * @len: an indicative size for the lookahead
186: *
187: * This function increase the input for the parser. It tries to
188: * preserve pointers to the input buffer, and keep already read data
189: *
1.123 daniel 190: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 191: * end of this entity
192: */
193: int
194: xmlParserInputGrow(xmlParserInputPtr in, int len) {
195: int ret;
196: int index;
197:
198: #ifdef DEBUG_INPUT
199: fprintf(stderr, "Grow\n");
200: #endif
201: if (in->buf == NULL) return(-1);
202: if (in->base == NULL) return(-1);
203: if (in->cur == NULL) return(-1);
204: if (in->buf->buffer == NULL) return(-1);
205:
206: CHECK_BUFFER(in);
207:
208: index = in->cur - in->base;
1.202 daniel 209: if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
1.91 daniel 210:
211: CHECK_BUFFER(in);
212:
213: return(0);
214: }
1.189 daniel 215: if (in->buf->readcallback != NULL)
1.140 daniel 216: ret = xmlParserInputBufferGrow(in->buf, len);
217: else
218: return(0);
1.135 daniel 219:
220: /*
221: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
222: * block, but we use it really as an integer to do some
223: * pointer arithmetic. Insure will raise it as a bug but in
224: * that specific case, that's not !
225: */
1.91 daniel 226: if (in->base != in->buf->buffer->content) {
227: /*
228: * the buffer has been realloced
229: */
230: index = in->cur - in->base;
231: in->base = in->buf->buffer->content;
232: in->cur = &in->buf->buffer->content[index];
233: }
234:
235: CHECK_BUFFER(in);
236:
237: return(ret);
238: }
239:
240: /**
241: * xmlParserInputShrink:
242: * @in: an XML parser input
243: *
244: * This function removes used input for the parser.
245: */
246: void
247: xmlParserInputShrink(xmlParserInputPtr in) {
248: int used;
249: int ret;
250: int index;
251:
252: #ifdef DEBUG_INPUT
253: fprintf(stderr, "Shrink\n");
254: #endif
255: if (in->buf == NULL) return;
256: if (in->base == NULL) return;
257: if (in->cur == NULL) return;
258: if (in->buf->buffer == NULL) return;
259:
260: CHECK_BUFFER(in);
261:
262: used = in->cur - in->buf->buffer->content;
263: if (used > INPUT_CHUNK) {
1.110 daniel 264: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 265: if (ret > 0) {
266: in->cur -= ret;
267: in->consumed += ret;
268: }
269: }
270:
271: CHECK_BUFFER(in);
272:
273: if (in->buf->buffer->use > INPUT_CHUNK) {
274: return;
275: }
276: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
277: if (in->base != in->buf->buffer->content) {
278: /*
279: * the buffer has been realloced
280: */
281: index = in->cur - in->base;
282: in->base = in->buf->buffer->content;
283: in->cur = &in->buf->buffer->content[index];
284: }
285:
286: CHECK_BUFFER(in);
287: }
288:
1.45 daniel 289: /************************************************************************
290: * *
291: * Parser stacks related functions and macros *
292: * *
293: ************************************************************************/
1.79 daniel 294:
295: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 296: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 297: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 298: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
299: const xmlChar ** str);
1.79 daniel 300:
1.1 veillard 301: /*
1.40 daniel 302: * Generic function for accessing stacks in the Parser Context
1.1 veillard 303: */
304:
1.140 daniel 305: #define PUSH_AND_POP(scope, type, name) \
306: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 307: if (ctxt->name##Nr >= ctxt->name##Max) { \
308: ctxt->name##Max *= 2; \
1.204 ! veillard 309: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 310: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
311: if (ctxt->name##Tab == NULL) { \
1.31 daniel 312: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 313: return(0); \
1.31 daniel 314: } \
315: } \
1.40 daniel 316: ctxt->name##Tab[ctxt->name##Nr] = value; \
317: ctxt->name = value; \
318: return(ctxt->name##Nr++); \
1.31 daniel 319: } \
1.140 daniel 320: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 321: type ret; \
1.40 daniel 322: if (ctxt->name##Nr <= 0) return(0); \
323: ctxt->name##Nr--; \
1.50 daniel 324: if (ctxt->name##Nr > 0) \
325: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
326: else \
327: ctxt->name = NULL; \
1.69 daniel 328: ret = ctxt->name##Tab[ctxt->name##Nr]; \
329: ctxt->name##Tab[ctxt->name##Nr] = 0; \
330: return(ret); \
1.31 daniel 331: } \
332:
1.140 daniel 333: PUSH_AND_POP(extern, xmlParserInputPtr, input)
334: PUSH_AND_POP(extern, xmlNodePtr, node)
335: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 336:
1.176 daniel 337: int spacePush(xmlParserCtxtPtr ctxt, int val) {
338: if (ctxt->spaceNr >= ctxt->spaceMax) {
339: ctxt->spaceMax *= 2;
1.204 ! veillard 340: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 341: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
342: if (ctxt->spaceTab == NULL) {
343: fprintf(stderr, "realloc failed !\n");
344: return(0);
345: }
346: }
347: ctxt->spaceTab[ctxt->spaceNr] = val;
348: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
349: return(ctxt->spaceNr++);
350: }
351:
352: int spacePop(xmlParserCtxtPtr ctxt) {
353: int ret;
354: if (ctxt->spaceNr <= 0) return(0);
355: ctxt->spaceNr--;
356: if (ctxt->spaceNr > 0)
357: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
358: else
359: ctxt->space = NULL;
360: ret = ctxt->spaceTab[ctxt->spaceNr];
361: ctxt->spaceTab[ctxt->spaceNr] = -1;
362: return(ret);
363: }
364:
1.55 daniel 365: /*
366: * Macros for accessing the content. Those should be used only by the parser,
367: * and not exported.
368: *
369: * Dirty macros, i.e. one need to make assumption on the context to use them
370: *
1.123 daniel 371: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 372: * To be used with extreme caution since operations consuming
373: * characters may move the input buffer to a different location !
1.123 daniel 374: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 375: * in ISO-Latin or UTF-8.
1.151 daniel 376: * This should be used internally by the parser
1.55 daniel 377: * only to compare to ASCII values otherwise it would break when
378: * running with UTF-8 encoding.
1.123 daniel 379: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 380: * to compare on ASCII based substring.
1.123 daniel 381: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 382: * strings within the parser.
383: *
1.77 daniel 384: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 385: *
386: * NEXT Skip to the next character, this does the proper decoding
387: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 388: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 389: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 390: */
1.45 daniel 391:
1.152 daniel 392: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 393: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 394: #define NXT(val) ctxt->input->cur[(val)]
395: #define CUR_PTR ctxt->input->cur
1.154 daniel 396:
1.164 daniel 397: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
398: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 399: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
400: if ((*ctxt->input->cur == 0) && \
401: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
402: xmlPopInput(ctxt)
1.164 daniel 403:
1.97 daniel 404: #define SHRINK xmlParserInputShrink(ctxt->input); \
405: if ((*ctxt->input->cur == 0) && \
406: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
407: xmlPopInput(ctxt)
408:
409: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
410: if ((*ctxt->input->cur == 0) && \
411: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
412: xmlPopInput(ctxt)
1.55 daniel 413:
1.155 daniel 414: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 415:
1.151 daniel 416: #define NEXT xmlNextChar(ctxt);
1.154 daniel 417:
1.153 daniel 418: #define NEXTL(l) \
419: if (*(ctxt->input->cur) == '\n') { \
420: ctxt->input->line++; ctxt->input->col = 1; \
421: } else ctxt->input->col++; \
1.154 daniel 422: ctxt->token = 0; ctxt->input->cur += l; \
423: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
424: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
425:
1.152 daniel 426: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 427: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 428:
1.152 daniel 429: #define COPY_BUF(l,b,i,v) \
430: if (l == 1) b[i++] = (xmlChar) v; \
431: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 432:
433: /**
434: * xmlNextChar:
435: * @ctxt: the XML parser context
436: *
437: * Skip to the next char input char.
438: */
1.55 daniel 439:
1.151 daniel 440: void
441: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.201 daniel 442: if (ctxt->instate == XML_PARSER_EOF)
443: return;
444:
1.176 daniel 445: /*
446: * TODO: 2.11 End-of-Line Handling
447: * the literal two-character sequence "#xD#xA" or a standalone
448: * literal #xD, an XML processor must pass to the application
449: * the single character #xA.
450: */
1.151 daniel 451: if (ctxt->token != 0) ctxt->token = 0;
452: else {
453: if ((*ctxt->input->cur == 0) &&
454: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
455: (ctxt->instate != XML_PARSER_COMMENT)) {
456: /*
457: * If we are at the end of the current entity and
458: * the context allows it, we pop consumed entities
459: * automatically.
460: * TODO: the auto closing should be blocked in other cases
461: */
462: xmlPopInput(ctxt);
463: } else {
464: if (*(ctxt->input->cur) == '\n') {
465: ctxt->input->line++; ctxt->input->col = 1;
466: } else ctxt->input->col++;
1.198 daniel 467: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 468: /*
469: * We are supposed to handle UTF8, check it's valid
470: * From rfc2044: encoding of the Unicode values on UTF-8:
471: *
472: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
473: * 0000 0000-0000 007F 0xxxxxxx
474: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
475: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
476: *
1.160 daniel 477: * Check for the 0x110000 limit too
1.151 daniel 478: */
479: const unsigned char *cur = ctxt->input->cur;
480: unsigned char c;
1.91 daniel 481:
1.151 daniel 482: c = *cur;
483: if (c & 0x80) {
484: if (cur[1] == 0)
485: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486: if ((cur[1] & 0xc0) != 0x80)
487: goto encoding_error;
488: if ((c & 0xe0) == 0xe0) {
489: unsigned int val;
490:
491: if (cur[2] == 0)
492: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
493: if ((cur[2] & 0xc0) != 0x80)
494: goto encoding_error;
495: if ((c & 0xf0) == 0xf0) {
496: if (cur[3] == 0)
497: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
498: if (((c & 0xf8) != 0xf0) ||
499: ((cur[3] & 0xc0) != 0x80))
500: goto encoding_error;
501: /* 4-byte code */
502: ctxt->input->cur += 4;
503: val = (cur[0] & 0x7) << 18;
504: val |= (cur[1] & 0x3f) << 12;
505: val |= (cur[2] & 0x3f) << 6;
506: val |= cur[3] & 0x3f;
507: } else {
508: /* 3-byte code */
509: ctxt->input->cur += 3;
510: val = (cur[0] & 0xf) << 12;
511: val |= (cur[1] & 0x3f) << 6;
512: val |= cur[2] & 0x3f;
513: }
514: if (((val > 0xd7ff) && (val < 0xe000)) ||
515: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 516: (val >= 0x110000)) {
1.151 daniel 517: if ((ctxt->sax != NULL) &&
518: (ctxt->sax->error != NULL))
519: ctxt->sax->error(ctxt->userData,
1.196 daniel 520: "Char 0x%X out of allowed range\n", val);
1.151 daniel 521: ctxt->errNo = XML_ERR_INVALID_ENCODING;
522: ctxt->wellFormed = 0;
1.180 daniel 523: ctxt->disableSAX = 1;
1.151 daniel 524: }
525: } else
526: /* 2-byte code */
527: ctxt->input->cur += 2;
528: } else
529: /* 1-byte code */
530: ctxt->input->cur++;
531: } else {
532: /*
533: * Assume it's a fixed lenght encoding (1) with
534: * a compatibke encoding for the ASCII set, since
535: * XML constructs only use < 128 chars
536: */
537: ctxt->input->cur++;
538: }
539: ctxt->nbChars++;
540: if (*ctxt->input->cur == 0)
541: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
542: }
543: }
1.154 daniel 544: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
545: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 546: if ((*ctxt->input->cur == 0) &&
547: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
548: xmlPopInput(ctxt);
1.151 daniel 549: return;
550: encoding_error:
551: /*
552: * If we detect an UTF8 error that probably mean that the
553: * input encoding didn't get properly advertized in the
554: * declaration header. Report the error and switch the encoding
555: * to ISO-Latin-1 (if you don't like this policy, just declare the
556: * encoding !)
557: */
1.198 daniel 558: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.151 daniel 559: ctxt->sax->error(ctxt->userData,
560: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 561: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
562: ctxt->input->cur[0], ctxt->input->cur[1],
563: ctxt->input->cur[2], ctxt->input->cur[3]);
564: }
1.151 daniel 565: ctxt->errNo = XML_ERR_INVALID_ENCODING;
566:
1.198 daniel 567: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.151 daniel 568: ctxt->input->cur++;
569: return;
570: }
1.42 daniel 571:
1.152 daniel 572: /**
573: * xmlCurrentChar:
574: * @ctxt: the XML parser context
575: * @len: pointer to the length of the char read
576: *
577: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 578: * bytes in the input buffer. Implement the end of line normalization:
579: * 2.11 End-of-Line Handling
580: * Wherever an external parsed entity or the literal entity value
581: * of an internal parsed entity contains either the literal two-character
582: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
583: * must pass to the application the single character #xA.
584: * This behavior can conveniently be produced by normalizing all
585: * line breaks to #xA on input, before parsing.)
1.152 daniel 586: *
587: * Returns the current char value and its lenght
588: */
589:
590: int
591: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1.201 daniel 592: if (ctxt->instate == XML_PARSER_EOF)
593: return(0);
594:
1.152 daniel 595: if (ctxt->token != 0) {
596: *len = 0;
597: return(ctxt->token);
598: }
1.198 daniel 599: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.152 daniel 600: /*
601: * We are supposed to handle UTF8, check it's valid
602: * From rfc2044: encoding of the Unicode values on UTF-8:
603: *
604: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
605: * 0000 0000-0000 007F 0xxxxxxx
606: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
607: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
608: *
1.160 daniel 609: * Check for the 0x110000 limit too
1.152 daniel 610: */
611: const unsigned char *cur = ctxt->input->cur;
612: unsigned char c;
613: unsigned int val;
614:
615: c = *cur;
616: if (c & 0x80) {
617: if (cur[1] == 0)
618: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
619: if ((cur[1] & 0xc0) != 0x80)
620: goto encoding_error;
621: if ((c & 0xe0) == 0xe0) {
622:
623: if (cur[2] == 0)
624: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
625: if ((cur[2] & 0xc0) != 0x80)
626: goto encoding_error;
627: if ((c & 0xf0) == 0xf0) {
628: if (cur[3] == 0)
629: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
630: if (((c & 0xf8) != 0xf0) ||
631: ((cur[3] & 0xc0) != 0x80))
632: goto encoding_error;
633: /* 4-byte code */
634: *len = 4;
635: val = (cur[0] & 0x7) << 18;
636: val |= (cur[1] & 0x3f) << 12;
637: val |= (cur[2] & 0x3f) << 6;
638: val |= cur[3] & 0x3f;
639: } else {
640: /* 3-byte code */
641: *len = 3;
642: val = (cur[0] & 0xf) << 12;
643: val |= (cur[1] & 0x3f) << 6;
644: val |= cur[2] & 0x3f;
645: }
646: } else {
647: /* 2-byte code */
648: *len = 2;
649: val = (cur[0] & 0x1f) << 6;
1.168 daniel 650: val |= cur[1] & 0x3f;
1.152 daniel 651: }
652: if (!IS_CHAR(val)) {
653: if ((ctxt->sax != NULL) &&
654: (ctxt->sax->error != NULL))
655: ctxt->sax->error(ctxt->userData,
1.196 daniel 656: "Char 0x%X out of allowed range\n", val);
1.152 daniel 657: ctxt->errNo = XML_ERR_INVALID_ENCODING;
658: ctxt->wellFormed = 0;
1.180 daniel 659: ctxt->disableSAX = 1;
1.152 daniel 660: }
661: return(val);
662: } else {
663: /* 1-byte code */
664: *len = 1;
1.180 daniel 665: if (*ctxt->input->cur == 0xD) {
666: if (ctxt->input->cur[1] == 0xA) {
667: ctxt->nbChars++;
668: ctxt->input->cur++;
669: }
670: return(0xA);
671: }
1.152 daniel 672: return((int) *ctxt->input->cur);
673: }
674: }
675: /*
676: * Assume it's a fixed lenght encoding (1) with
677: * a compatibke encoding for the ASCII set, since
678: * XML constructs only use < 128 chars
679: */
680: *len = 1;
1.180 daniel 681: if (*ctxt->input->cur == 0xD) {
682: if (ctxt->input->cur[1] == 0xA) {
683: ctxt->nbChars++;
684: ctxt->input->cur++;
685: }
686: return(0xA);
687: }
1.152 daniel 688: return((int) *ctxt->input->cur);
689: encoding_error:
690: /*
691: * If we detect an UTF8 error that probably mean that the
692: * input encoding didn't get properly advertized in the
693: * declaration header. Report the error and switch the encoding
694: * to ISO-Latin-1 (if you don't like this policy, just declare the
695: * encoding !)
696: */
1.198 daniel 697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.152 daniel 698: ctxt->sax->error(ctxt->userData,
699: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 700: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
701: ctxt->input->cur[0], ctxt->input->cur[1],
702: ctxt->input->cur[2], ctxt->input->cur[3]);
703: }
1.152 daniel 704: ctxt->errNo = XML_ERR_INVALID_ENCODING;
705:
1.198 daniel 706: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.152 daniel 707: *len = 1;
708: return((int) *ctxt->input->cur);
709: }
710:
711: /**
1.162 daniel 712: * xmlStringCurrentChar:
713: * @ctxt: the XML parser context
714: * @cur: pointer to the beginning of the char
715: * @len: pointer to the length of the char read
716: *
717: * The current char value, if using UTF-8 this may actaully span multiple
718: * bytes in the input buffer.
719: *
720: * Returns the current char value and its lenght
721: */
722:
723: int
724: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1.198 daniel 725: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.162 daniel 726: /*
727: * We are supposed to handle UTF8, check it's valid
728: * From rfc2044: encoding of the Unicode values on UTF-8:
729: *
730: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
731: * 0000 0000-0000 007F 0xxxxxxx
732: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
733: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
734: *
735: * Check for the 0x110000 limit too
736: */
737: unsigned char c;
738: unsigned int val;
739:
740: c = *cur;
741: if (c & 0x80) {
742: if ((cur[1] & 0xc0) != 0x80)
743: goto encoding_error;
744: if ((c & 0xe0) == 0xe0) {
745:
746: if ((cur[2] & 0xc0) != 0x80)
747: goto encoding_error;
748: if ((c & 0xf0) == 0xf0) {
749: if (((c & 0xf8) != 0xf0) ||
750: ((cur[3] & 0xc0) != 0x80))
751: goto encoding_error;
752: /* 4-byte code */
753: *len = 4;
754: val = (cur[0] & 0x7) << 18;
755: val |= (cur[1] & 0x3f) << 12;
756: val |= (cur[2] & 0x3f) << 6;
757: val |= cur[3] & 0x3f;
758: } else {
759: /* 3-byte code */
760: *len = 3;
761: val = (cur[0] & 0xf) << 12;
762: val |= (cur[1] & 0x3f) << 6;
763: val |= cur[2] & 0x3f;
764: }
765: } else {
766: /* 2-byte code */
767: *len = 2;
768: val = (cur[0] & 0x1f) << 6;
769: val |= cur[2] & 0x3f;
770: }
771: if (!IS_CHAR(val)) {
772: if ((ctxt->sax != NULL) &&
773: (ctxt->sax->error != NULL))
774: ctxt->sax->error(ctxt->userData,
1.196 daniel 775: "Char 0x%X out of allowed range\n", val);
1.162 daniel 776: ctxt->errNo = XML_ERR_INVALID_ENCODING;
777: ctxt->wellFormed = 0;
1.180 daniel 778: ctxt->disableSAX = 1;
1.162 daniel 779: }
780: return(val);
781: } else {
782: /* 1-byte code */
783: *len = 1;
784: return((int) *cur);
785: }
786: }
787: /*
788: * Assume it's a fixed lenght encoding (1) with
789: * a compatibke encoding for the ASCII set, since
790: * XML constructs only use < 128 chars
791: */
792: *len = 1;
793: return((int) *cur);
794: encoding_error:
795: /*
796: * If we detect an UTF8 error that probably mean that the
797: * input encoding didn't get properly advertized in the
798: * declaration header. Report the error and switch the encoding
799: * to ISO-Latin-1 (if you don't like this policy, just declare the
800: * encoding !)
801: */
1.198 daniel 802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.162 daniel 803: ctxt->sax->error(ctxt->userData,
804: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 805: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
806: ctxt->input->cur[0], ctxt->input->cur[1],
807: ctxt->input->cur[2], ctxt->input->cur[3]);
808: }
1.162 daniel 809: ctxt->errNo = XML_ERR_INVALID_ENCODING;
810:
811: *len = 1;
812: return((int) *cur);
813: }
814:
815: /**
1.152 daniel 816: * xmlCopyChar:
817: * @len: pointer to the length of the char read (or zero)
818: * @array: pointer to an arry of xmlChar
819: * @val: the char value
820: *
821: * append the char value in the array
822: *
823: * Returns the number of xmlChar written
824: */
825:
826: int
827: xmlCopyChar(int len, xmlChar *out, int val) {
828: /*
829: * We are supposed to handle UTF8, check it's valid
830: * From rfc2044: encoding of the Unicode values on UTF-8:
831: *
832: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
833: * 0000 0000-0000 007F 0xxxxxxx
834: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
835: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
836: */
837: if (len == 0) {
838: if (val < 0) len = 0;
1.160 daniel 839: else if (val < 0x80) len = 1;
840: else if (val < 0x800) len = 2;
841: else if (val < 0x10000) len = 3;
842: else if (val < 0x110000) len = 4;
1.152 daniel 843: if (len == 0) {
844: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
845: val);
846: return(0);
847: }
848: }
849: if (len > 1) {
850: int bits;
851:
852: if (val < 0x80) { *out++= val; bits= -6; }
853: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
854: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
855: else { *out++= (val >> 18) | 0xF0; bits= 12; }
856:
857: for ( ; bits >= 0; bits-= 6)
858: *out++= ((val >> bits) & 0x3F) | 0x80 ;
859:
860: return(len);
861: }
862: *out = (xmlChar) val;
863: return(1);
1.155 daniel 864: }
865:
866: /**
867: * xmlSkipBlankChars:
868: * @ctxt: the XML parser context
869: *
870: * skip all blanks character found at that point in the input streams.
871: * It pops up finished entities in the process if allowable at that point.
872: *
873: * Returns the number of space chars skipped
874: */
875:
876: int
877: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
878: int cur, res = 0;
879:
880: do {
881: cur = CUR;
882: while (IS_BLANK(cur)) {
883: NEXT;
884: cur = CUR;
885: res++;
886: }
887: while ((cur == 0) && (ctxt->inputNr > 1) &&
888: (ctxt->instate != XML_PARSER_COMMENT)) {
889: xmlPopInput(ctxt);
890: cur = CUR;
891: }
892: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
893: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
894: } while (IS_BLANK(cur));
895: return(res);
1.152 daniel 896: }
897:
1.97 daniel 898: /************************************************************************
899: * *
900: * Commodity functions to handle entities processing *
901: * *
902: ************************************************************************/
1.40 daniel 903:
1.50 daniel 904: /**
905: * xmlPopInput:
906: * @ctxt: an XML parser context
907: *
1.40 daniel 908: * xmlPopInput: the current input pointed by ctxt->input came to an end
909: * pop it and return the next char.
1.45 daniel 910: *
1.123 daniel 911: * Returns the current xmlChar in the parser context
1.40 daniel 912: */
1.123 daniel 913: xmlChar
1.55 daniel 914: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 915: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 916: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 917: if ((*ctxt->input->cur == 0) &&
918: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
919: return(xmlPopInput(ctxt));
1.40 daniel 920: return(CUR);
921: }
922:
1.50 daniel 923: /**
924: * xmlPushInput:
925: * @ctxt: an XML parser context
926: * @input: an XML parser input fragment (entity, XML fragment ...).
927: *
1.40 daniel 928: * xmlPushInput: switch to a new input stream which is stacked on top
929: * of the previous one(s).
930: */
1.55 daniel 931: void
932: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 933: if (input == NULL) return;
934: inputPush(ctxt, input);
1.164 daniel 935: GROW;
1.40 daniel 936: }
937:
1.50 daniel 938: /**
1.69 daniel 939: * xmlFreeInputStream:
1.127 daniel 940: * @input: an xmlParserInputPtr
1.69 daniel 941: *
942: * Free up an input stream.
943: */
944: void
945: xmlFreeInputStream(xmlParserInputPtr input) {
946: if (input == NULL) return;
947:
1.119 daniel 948: if (input->filename != NULL) xmlFree((char *) input->filename);
949: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 950: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 951: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 952: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 953: input->free((xmlChar *) input->base);
1.93 veillard 954: if (input->buf != NULL)
955: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 956: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 957: xmlFree(input);
1.69 daniel 958: }
959:
960: /**
1.96 daniel 961: * xmlNewInputStream:
962: * @ctxt: an XML parser context
963: *
964: * Create a new input stream structure
965: * Returns the new input stream or NULL
966: */
967: xmlParserInputPtr
968: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
969: xmlParserInputPtr input;
970:
1.119 daniel 971: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 972: if (input == NULL) {
1.190 daniel 973: if (ctxt != NULL) {
974: ctxt->errNo = XML_ERR_NO_MEMORY;
975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
976: ctxt->sax->error(ctxt->userData,
977: "malloc: couldn't allocate a new input stream\n");
978: ctxt->errNo = XML_ERR_NO_MEMORY;
979: }
1.96 daniel 980: return(NULL);
981: }
1.165 daniel 982: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 983: input->line = 1;
984: input->col = 1;
1.167 daniel 985: input->standalone = -1;
1.96 daniel 986: return(input);
987: }
988:
989: /**
1.190 daniel 990: * xmlNewIOInputStream:
991: * @ctxt: an XML parser context
992: * @input: an I/O Input
993: * @enc: the charset encoding if known
994: *
995: * Create a new input stream structure encapsulating the @input into
996: * a stream suitable for the parser.
997: *
998: * Returns the new input stream or NULL
999: */
1000: xmlParserInputPtr
1001: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1002: xmlCharEncoding enc) {
1003: xmlParserInputPtr inputStream;
1004:
1005: inputStream = xmlNewInputStream(ctxt);
1006: if (inputStream == NULL) {
1007: return(NULL);
1008: }
1009: inputStream->filename = NULL;
1010: inputStream->buf = input;
1011: inputStream->base = inputStream->buf->buffer->content;
1012: inputStream->cur = inputStream->buf->buffer->content;
1013: if (enc != XML_CHAR_ENCODING_NONE) {
1014: xmlSwitchEncoding(ctxt, enc);
1015: }
1016:
1017: return(inputStream);
1018: }
1019:
1020: /**
1.50 daniel 1021: * xmlNewEntityInputStream:
1022: * @ctxt: an XML parser context
1023: * @entity: an Entity pointer
1024: *
1.82 daniel 1025: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 1026: *
1027: * Returns the new input stream or NULL
1.45 daniel 1028: */
1.50 daniel 1029: xmlParserInputPtr
1030: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1031: xmlParserInputPtr input;
1032:
1033: if (entity == NULL) {
1.123 daniel 1034: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1036: ctxt->sax->error(ctxt->userData,
1.45 daniel 1037: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 1038: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 1039: return(NULL);
1.45 daniel 1040: }
1041: if (entity->content == NULL) {
1.159 daniel 1042: switch (entity->etype) {
1.113 daniel 1043: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 1044: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 1045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1046: ctxt->sax->error(ctxt->userData,
1047: "xmlNewEntityInputStream unparsed entity !\n");
1048: break;
1049: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1050: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1051: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1052: (char *) entity->ExternalID, ctxt));
1.113 daniel 1053: case XML_INTERNAL_GENERAL_ENTITY:
1054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1055: ctxt->sax->error(ctxt->userData,
1056: "Internal entity %s without content !\n", entity->name);
1057: break;
1058: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1059: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1061: ctxt->sax->error(ctxt->userData,
1062: "Internal parameter entity %s without content !\n", entity->name);
1063: break;
1064: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1065: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1067: ctxt->sax->error(ctxt->userData,
1068: "Predefined entity %s without content !\n", entity->name);
1069: break;
1070: }
1.50 daniel 1071: return(NULL);
1.45 daniel 1072: }
1.96 daniel 1073: input = xmlNewInputStream(ctxt);
1.45 daniel 1074: if (input == NULL) {
1.50 daniel 1075: return(NULL);
1.45 daniel 1076: }
1.156 daniel 1077: input->filename = (char *) entity->SystemID;
1.45 daniel 1078: input->base = entity->content;
1079: input->cur = entity->content;
1.140 daniel 1080: input->length = entity->length;
1.50 daniel 1081: return(input);
1.45 daniel 1082: }
1083:
1.59 daniel 1084: /**
1085: * xmlNewStringInputStream:
1086: * @ctxt: an XML parser context
1.96 daniel 1087: * @buffer: an memory buffer
1.59 daniel 1088: *
1089: * Create a new input stream based on a memory buffer.
1.68 daniel 1090: * Returns the new input stream
1.59 daniel 1091: */
1092: xmlParserInputPtr
1.123 daniel 1093: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1094: xmlParserInputPtr input;
1095:
1.96 daniel 1096: if (buffer == NULL) {
1.123 daniel 1097: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1099: ctxt->sax->error(ctxt->userData,
1.59 daniel 1100: "internal: xmlNewStringInputStream string = NULL\n");
1101: return(NULL);
1102: }
1.96 daniel 1103: input = xmlNewInputStream(ctxt);
1.59 daniel 1104: if (input == NULL) {
1105: return(NULL);
1106: }
1.96 daniel 1107: input->base = buffer;
1108: input->cur = buffer;
1.140 daniel 1109: input->length = xmlStrlen(buffer);
1.59 daniel 1110: return(input);
1111: }
1112:
1.76 daniel 1113: /**
1114: * xmlNewInputFromFile:
1115: * @ctxt: an XML parser context
1116: * @filename: the filename to use as entity
1117: *
1118: * Create a new input stream based on a file.
1119: *
1120: * Returns the new input stream or NULL in case of error
1121: */
1122: xmlParserInputPtr
1.79 daniel 1123: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1124: xmlParserInputBufferPtr buf;
1.76 daniel 1125: xmlParserInputPtr inputStream;
1.111 daniel 1126: char *directory = NULL;
1.76 daniel 1127:
1.96 daniel 1128: if (ctxt == NULL) return(NULL);
1.91 daniel 1129: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1130: if (buf == NULL) {
1.140 daniel 1131: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1132:
1.94 daniel 1133: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1134: #ifdef WIN32
1135: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1136: #else
1137: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1138: #endif
1139: buf = xmlParserInputBufferCreateFilename(name,
1140: XML_CHAR_ENCODING_NONE);
1.106 daniel 1141: if (buf != NULL)
1.142 daniel 1142: directory = xmlParserGetDirectory(name);
1.106 daniel 1143: }
1144: if ((buf == NULL) && (ctxt->directory != NULL)) {
1145: #ifdef WIN32
1146: sprintf(name, "%s\\%s", ctxt->directory, filename);
1147: #else
1148: sprintf(name, "%s/%s", ctxt->directory, filename);
1149: #endif
1150: buf = xmlParserInputBufferCreateFilename(name,
1151: XML_CHAR_ENCODING_NONE);
1152: if (buf != NULL)
1.142 daniel 1153: directory = xmlParserGetDirectory(name);
1.106 daniel 1154: }
1155: if (buf == NULL)
1.94 daniel 1156: return(NULL);
1157: }
1158: if (directory == NULL)
1159: directory = xmlParserGetDirectory(filename);
1.76 daniel 1160:
1.96 daniel 1161: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1162: if (inputStream == NULL) {
1.119 daniel 1163: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1164: return(NULL);
1165: }
1166:
1.119 daniel 1167: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1168: inputStream->directory = directory;
1.91 daniel 1169: inputStream->buf = buf;
1.76 daniel 1170:
1.91 daniel 1171: inputStream->base = inputStream->buf->buffer->content;
1172: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1173: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1174: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1175: return(inputStream);
1176: }
1177:
1.77 daniel 1178: /************************************************************************
1179: * *
1.97 daniel 1180: * Commodity functions to handle parser contexts *
1181: * *
1182: ************************************************************************/
1183:
1184: /**
1185: * xmlInitParserCtxt:
1186: * @ctxt: an XML parser context
1187: *
1188: * Initialize a parser context
1189: */
1190:
1191: void
1192: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1193: {
1194: xmlSAXHandler *sax;
1195:
1.168 daniel 1196: xmlDefaultSAXHandlerInit();
1197:
1.119 daniel 1198: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1199: if (sax == NULL) {
1200: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1201: }
1.180 daniel 1202: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1203:
1204: /* Allocate the Input stack */
1.119 daniel 1205: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1206: ctxt->inputNr = 0;
1207: ctxt->inputMax = 5;
1208: ctxt->input = NULL;
1.165 daniel 1209:
1.97 daniel 1210: ctxt->version = NULL;
1211: ctxt->encoding = NULL;
1212: ctxt->standalone = -1;
1.98 daniel 1213: ctxt->hasExternalSubset = 0;
1214: ctxt->hasPErefs = 0;
1.97 daniel 1215: ctxt->html = 0;
1.98 daniel 1216: ctxt->external = 0;
1.140 daniel 1217: ctxt->instate = XML_PARSER_START;
1.97 daniel 1218: ctxt->token = 0;
1.106 daniel 1219: ctxt->directory = NULL;
1.97 daniel 1220:
1221: /* Allocate the Node stack */
1.119 daniel 1222: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1223: ctxt->nodeNr = 0;
1224: ctxt->nodeMax = 10;
1225: ctxt->node = NULL;
1226:
1.140 daniel 1227: /* Allocate the Name stack */
1228: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1229: ctxt->nameNr = 0;
1230: ctxt->nameMax = 10;
1231: ctxt->name = NULL;
1232:
1.176 daniel 1233: /* Allocate the space stack */
1234: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1235: ctxt->spaceNr = 1;
1236: ctxt->spaceMax = 10;
1237: ctxt->spaceTab[0] = -1;
1238: ctxt->space = &ctxt->spaceTab[0];
1239:
1.160 daniel 1240: if (sax == NULL) {
1241: ctxt->sax = &xmlDefaultSAXHandler;
1242: } else {
1.97 daniel 1243: ctxt->sax = sax;
1244: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1245: }
1246: ctxt->userData = ctxt;
1247: ctxt->myDoc = NULL;
1248: ctxt->wellFormed = 1;
1.99 daniel 1249: ctxt->valid = 1;
1.100 daniel 1250: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1251: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1252: ctxt->vctxt.userData = ctxt;
1.149 daniel 1253: if (ctxt->validate) {
1254: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1255: if (xmlGetWarningsDefaultValue == 0)
1256: ctxt->vctxt.warning = NULL;
1257: else
1258: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1259: /* Allocate the Node stack */
1260: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1261: ctxt->vctxt.nodeNr = 0;
1262: ctxt->vctxt.nodeMax = 4;
1263: ctxt->vctxt.node = NULL;
1.149 daniel 1264: } else {
1265: ctxt->vctxt.error = NULL;
1266: ctxt->vctxt.warning = NULL;
1267: }
1.97 daniel 1268: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1269: ctxt->record_info = 0;
1.135 daniel 1270: ctxt->nbChars = 0;
1.140 daniel 1271: ctxt->checkIndex = 0;
1.180 daniel 1272: ctxt->inSubset = 0;
1.140 daniel 1273: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1274: ctxt->depth = 0;
1.198 daniel 1275: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.97 daniel 1276: xmlInitNodeInfoSeq(&ctxt->node_seq);
1277: }
1278:
1279: /**
1280: * xmlFreeParserCtxt:
1281: * @ctxt: an XML parser context
1282: *
1283: * Free all the memory used by a parser context. However the parsed
1284: * document in ctxt->myDoc is not freed.
1285: */
1286:
1287: void
1288: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1289: {
1290: xmlParserInputPtr input;
1.140 daniel 1291: xmlChar *oldname;
1.97 daniel 1292:
1293: if (ctxt == NULL) return;
1294:
1295: while ((input = inputPop(ctxt)) != NULL) {
1296: xmlFreeInputStream(input);
1297: }
1.140 daniel 1298: while ((oldname = namePop(ctxt)) != NULL) {
1299: xmlFree(oldname);
1300: }
1.176 daniel 1301: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1302: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1303: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1304: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1305: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1306: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1307: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1308: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1309: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1310: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1311: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1312: xmlFree(ctxt->sax);
1313: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1314: xmlFree(ctxt);
1.97 daniel 1315: }
1316:
1317: /**
1318: * xmlNewParserCtxt:
1319: *
1320: * Allocate and initialize a new parser context.
1321: *
1322: * Returns the xmlParserCtxtPtr or NULL
1323: */
1324:
1325: xmlParserCtxtPtr
1326: xmlNewParserCtxt()
1327: {
1328: xmlParserCtxtPtr ctxt;
1329:
1.119 daniel 1330: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1331: if (ctxt == NULL) {
1332: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1333: perror("malloc");
1334: return(NULL);
1335: }
1.165 daniel 1336: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1337: xmlInitParserCtxt(ctxt);
1338: return(ctxt);
1339: }
1340:
1341: /**
1342: * xmlClearParserCtxt:
1343: * @ctxt: an XML parser context
1344: *
1345: * Clear (release owned resources) and reinitialize a parser context
1346: */
1347:
1348: void
1349: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1350: {
1351: xmlClearNodeInfoSeq(&ctxt->node_seq);
1352: xmlInitParserCtxt(ctxt);
1353: }
1354:
1355: /************************************************************************
1356: * *
1.77 daniel 1357: * Commodity functions to handle entities *
1358: * *
1359: ************************************************************************/
1360:
1.174 daniel 1361: /**
1362: * xmlCheckEntity:
1363: * @ctxt: an XML parser context
1364: * @content: the entity content string
1365: *
1366: * Parse an entity content and checks the WF constraints
1367: *
1368: */
1369:
1370: void
1371: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1372: }
1.97 daniel 1373:
1374: /**
1375: * xmlParseCharRef:
1376: * @ctxt: an XML parser context
1377: *
1378: * parse Reference declarations
1379: *
1380: * [66] CharRef ::= '&#' [0-9]+ ';' |
1381: * '&#x' [0-9a-fA-F]+ ';'
1382: *
1.98 daniel 1383: * [ WFC: Legal Character ]
1384: * Characters referred to using character references must match the
1385: * production for Char.
1386: *
1.135 daniel 1387: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1388: */
1.97 daniel 1389: int
1390: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1391: int val = 0;
1392:
1.111 daniel 1393: if (ctxt->token != 0) {
1394: val = ctxt->token;
1395: ctxt->token = 0;
1396: return(val);
1397: }
1.152 daniel 1398: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1399: (NXT(2) == 'x')) {
1400: SKIP(3);
1.152 daniel 1401: while (RAW != ';') {
1402: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1403: val = val * 16 + (CUR - '0');
1.152 daniel 1404: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1405: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1406: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1407: val = val * 16 + (CUR - 'A') + 10;
1408: else {
1.123 daniel 1409: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1411: ctxt->sax->error(ctxt->userData,
1412: "xmlParseCharRef: invalid hexadecimal value\n");
1413: ctxt->wellFormed = 0;
1.180 daniel 1414: ctxt->disableSAX = 1;
1.97 daniel 1415: val = 0;
1416: break;
1417: }
1418: NEXT;
1419: }
1.164 daniel 1420: if (RAW == ';') {
1421: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1422: ctxt->nbChars ++;
1423: ctxt->input->cur++;
1424: }
1.152 daniel 1425: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1426: SKIP(2);
1.152 daniel 1427: while (RAW != ';') {
1428: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1429: val = val * 10 + (CUR - '0');
1430: else {
1.123 daniel 1431: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1433: ctxt->sax->error(ctxt->userData,
1434: "xmlParseCharRef: invalid decimal value\n");
1435: ctxt->wellFormed = 0;
1.180 daniel 1436: ctxt->disableSAX = 1;
1.97 daniel 1437: val = 0;
1438: break;
1439: }
1440: NEXT;
1441: }
1.164 daniel 1442: if (RAW == ';') {
1443: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1444: ctxt->nbChars ++;
1445: ctxt->input->cur++;
1446: }
1.97 daniel 1447: } else {
1.123 daniel 1448: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1450: ctxt->sax->error(ctxt->userData,
1451: "xmlParseCharRef: invalid value\n");
1.97 daniel 1452: ctxt->wellFormed = 0;
1.180 daniel 1453: ctxt->disableSAX = 1;
1.97 daniel 1454: }
1.98 daniel 1455:
1.97 daniel 1456: /*
1.98 daniel 1457: * [ WFC: Legal Character ]
1458: * Characters referred to using character references must match the
1459: * production for Char.
1.97 daniel 1460: */
1461: if (IS_CHAR(val)) {
1462: return(val);
1463: } else {
1.123 daniel 1464: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1466: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1467: val);
1468: ctxt->wellFormed = 0;
1.180 daniel 1469: ctxt->disableSAX = 1;
1.97 daniel 1470: }
1471: return(0);
1.77 daniel 1472: }
1473:
1.96 daniel 1474: /**
1.135 daniel 1475: * xmlParseStringCharRef:
1476: * @ctxt: an XML parser context
1477: * @str: a pointer to an index in the string
1478: *
1479: * parse Reference declarations, variant parsing from a string rather
1480: * than an an input flow.
1481: *
1482: * [66] CharRef ::= '&#' [0-9]+ ';' |
1483: * '&#x' [0-9a-fA-F]+ ';'
1484: *
1485: * [ WFC: Legal Character ]
1486: * Characters referred to using character references must match the
1487: * production for Char.
1488: *
1489: * Returns the value parsed (as an int), 0 in case of error, str will be
1490: * updated to the current value of the index
1491: */
1492: int
1493: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1494: const xmlChar *ptr;
1495: xmlChar cur;
1496: int val = 0;
1497:
1498: if ((str == NULL) || (*str == NULL)) return(0);
1499: ptr = *str;
1500: cur = *ptr;
1.137 daniel 1501: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1502: ptr += 3;
1503: cur = *ptr;
1504: while (cur != ';') {
1505: if ((cur >= '0') && (cur <= '9'))
1506: val = val * 16 + (cur - '0');
1507: else if ((cur >= 'a') && (cur <= 'f'))
1508: val = val * 16 + (cur - 'a') + 10;
1509: else if ((cur >= 'A') && (cur <= 'F'))
1510: val = val * 16 + (cur - 'A') + 10;
1511: else {
1512: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1513: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1514: ctxt->sax->error(ctxt->userData,
1.198 daniel 1515: "xmlParseStringCharRef: invalid hexadecimal value\n");
1.135 daniel 1516: ctxt->wellFormed = 0;
1.180 daniel 1517: ctxt->disableSAX = 1;
1.135 daniel 1518: val = 0;
1519: break;
1520: }
1521: ptr++;
1522: cur = *ptr;
1523: }
1524: if (cur == ';')
1525: ptr++;
1.145 daniel 1526: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1527: ptr += 2;
1528: cur = *ptr;
1529: while (cur != ';') {
1530: if ((cur >= '0') && (cur <= '9'))
1531: val = val * 10 + (cur - '0');
1532: else {
1533: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1535: ctxt->sax->error(ctxt->userData,
1.198 daniel 1536: "xmlParseStringCharRef: invalid decimal value\n");
1.135 daniel 1537: ctxt->wellFormed = 0;
1.180 daniel 1538: ctxt->disableSAX = 1;
1.135 daniel 1539: val = 0;
1540: break;
1541: }
1542: ptr++;
1543: cur = *ptr;
1544: }
1545: if (cur == ';')
1546: ptr++;
1547: } else {
1548: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1550: ctxt->sax->error(ctxt->userData,
1551: "xmlParseCharRef: invalid value\n");
1552: ctxt->wellFormed = 0;
1.180 daniel 1553: ctxt->disableSAX = 1;
1.135 daniel 1554: return(0);
1555: }
1556: *str = ptr;
1557:
1558: /*
1559: * [ WFC: Legal Character ]
1560: * Characters referred to using character references must match the
1561: * production for Char.
1562: */
1563: if (IS_CHAR(val)) {
1564: return(val);
1565: } else {
1566: ctxt->errNo = XML_ERR_INVALID_CHAR;
1567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1568: ctxt->sax->error(ctxt->userData,
1569: "CharRef: invalid xmlChar value %d\n", val);
1570: ctxt->wellFormed = 0;
1.180 daniel 1571: ctxt->disableSAX = 1;
1.135 daniel 1572: }
1573: return(0);
1574: }
1575:
1576: /**
1.96 daniel 1577: * xmlParserHandleReference:
1578: * @ctxt: the parser context
1579: *
1.97 daniel 1580: * [67] Reference ::= EntityRef | CharRef
1581: *
1.96 daniel 1582: * [68] EntityRef ::= '&' Name ';'
1583: *
1.98 daniel 1584: * [ WFC: Entity Declared ]
1585: * the Name given in the entity reference must match that in an entity
1586: * declaration, except that well-formed documents need not declare any
1587: * of the following entities: amp, lt, gt, apos, quot.
1588: *
1589: * [ WFC: Parsed Entity ]
1590: * An entity reference must not contain the name of an unparsed entity
1591: *
1.97 daniel 1592: * [66] CharRef ::= '&#' [0-9]+ ';' |
1593: * '&#x' [0-9a-fA-F]+ ';'
1594: *
1.96 daniel 1595: * A PEReference may have been detectect in the current input stream
1596: * the handling is done accordingly to
1597: * http://www.w3.org/TR/REC-xml#entproc
1598: */
1599: void
1600: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1601: xmlParserInputPtr input;
1.123 daniel 1602: xmlChar *name;
1.97 daniel 1603: xmlEntityPtr ent = NULL;
1604:
1.126 daniel 1605: if (ctxt->token != 0) {
1606: return;
1607: }
1.152 daniel 1608: if (RAW != '&') return;
1.97 daniel 1609: GROW;
1.152 daniel 1610: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1611: switch(ctxt->instate) {
1.140 daniel 1612: case XML_PARSER_ENTITY_DECL:
1613: case XML_PARSER_PI:
1.109 daniel 1614: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1615: case XML_PARSER_COMMENT:
1.168 daniel 1616: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1617: /* we just ignore it there */
1618: return;
1619: case XML_PARSER_START_TAG:
1.109 daniel 1620: return;
1.140 daniel 1621: case XML_PARSER_END_TAG:
1.97 daniel 1622: return;
1623: case XML_PARSER_EOF:
1.123 daniel 1624: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1626: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1627: ctxt->wellFormed = 0;
1.180 daniel 1628: ctxt->disableSAX = 1;
1.97 daniel 1629: return;
1630: case XML_PARSER_PROLOG:
1.140 daniel 1631: case XML_PARSER_START:
1632: case XML_PARSER_MISC:
1.123 daniel 1633: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1634: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1635: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1636: ctxt->wellFormed = 0;
1.180 daniel 1637: ctxt->disableSAX = 1;
1.97 daniel 1638: return;
1639: case XML_PARSER_EPILOG:
1.123 daniel 1640: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1643: ctxt->wellFormed = 0;
1.180 daniel 1644: ctxt->disableSAX = 1;
1.97 daniel 1645: return;
1646: case XML_PARSER_DTD:
1.123 daniel 1647: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649: ctxt->sax->error(ctxt->userData,
1650: "CharRef are forbiden in DTDs!\n");
1651: ctxt->wellFormed = 0;
1.180 daniel 1652: ctxt->disableSAX = 1;
1.97 daniel 1653: return;
1654: case XML_PARSER_ENTITY_VALUE:
1655: /*
1656: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1657: * substitution here since we need the literal
1.97 daniel 1658: * entity value to be able to save the internal
1659: * subset of the document.
1660: * This will be handled by xmlDecodeEntities
1661: */
1662: return;
1663: case XML_PARSER_CONTENT:
1664: case XML_PARSER_ATTRIBUTE_VALUE:
1665: ctxt->token = xmlParseCharRef(ctxt);
1666: return;
1667: }
1668: return;
1669: }
1670:
1671: switch(ctxt->instate) {
1.109 daniel 1672: case XML_PARSER_CDATA_SECTION:
1673: return;
1.140 daniel 1674: case XML_PARSER_PI:
1.97 daniel 1675: case XML_PARSER_COMMENT:
1.168 daniel 1676: case XML_PARSER_SYSTEM_LITERAL:
1677: case XML_PARSER_CONTENT:
1.97 daniel 1678: return;
1.140 daniel 1679: case XML_PARSER_START_TAG:
1680: return;
1681: case XML_PARSER_END_TAG:
1682: return;
1.97 daniel 1683: case XML_PARSER_EOF:
1.123 daniel 1684: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1685: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1686: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1687: ctxt->wellFormed = 0;
1.180 daniel 1688: ctxt->disableSAX = 1;
1.97 daniel 1689: return;
1690: case XML_PARSER_PROLOG:
1.140 daniel 1691: case XML_PARSER_START:
1692: case XML_PARSER_MISC:
1.123 daniel 1693: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1694: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1695: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1696: ctxt->wellFormed = 0;
1.180 daniel 1697: ctxt->disableSAX = 1;
1.97 daniel 1698: return;
1699: case XML_PARSER_EPILOG:
1.123 daniel 1700: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1703: ctxt->wellFormed = 0;
1.180 daniel 1704: ctxt->disableSAX = 1;
1.97 daniel 1705: return;
1706: case XML_PARSER_ENTITY_VALUE:
1707: /*
1708: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1709: * substitution here since we need the literal
1.97 daniel 1710: * entity value to be able to save the internal
1711: * subset of the document.
1712: * This will be handled by xmlDecodeEntities
1713: */
1714: return;
1715: case XML_PARSER_ATTRIBUTE_VALUE:
1716: /*
1717: * NOTE: in the case of attributes values, we don't do the
1718: * substitution here unless we are in a mode where
1719: * the parser is explicitely asked to substitute
1720: * entities. The SAX callback is called with values
1721: * without entity substitution.
1722: * This will then be handled by xmlDecodeEntities
1723: */
1.113 daniel 1724: return;
1.97 daniel 1725: case XML_PARSER_ENTITY_DECL:
1726: /*
1727: * we just ignore it there
1728: * the substitution will be done once the entity is referenced
1729: */
1730: return;
1731: case XML_PARSER_DTD:
1.123 daniel 1732: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1733: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1734: ctxt->sax->error(ctxt->userData,
1735: "Entity references are forbiden in DTDs!\n");
1736: ctxt->wellFormed = 0;
1.180 daniel 1737: ctxt->disableSAX = 1;
1.97 daniel 1738: return;
1739: }
1740:
1741: NEXT;
1742: name = xmlScanName(ctxt);
1743: if (name == NULL) {
1.123 daniel 1744: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1746: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1747: ctxt->wellFormed = 0;
1.180 daniel 1748: ctxt->disableSAX = 1;
1.97 daniel 1749: ctxt->token = '&';
1750: return;
1751: }
1752: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1753: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1755: ctxt->sax->error(ctxt->userData,
1756: "Entity reference: ';' expected\n");
1757: ctxt->wellFormed = 0;
1.180 daniel 1758: ctxt->disableSAX = 1;
1.97 daniel 1759: ctxt->token = '&';
1.119 daniel 1760: xmlFree(name);
1.97 daniel 1761: return;
1762: }
1763: SKIP(xmlStrlen(name) + 1);
1764: if (ctxt->sax != NULL) {
1765: if (ctxt->sax->getEntity != NULL)
1766: ent = ctxt->sax->getEntity(ctxt->userData, name);
1767: }
1.98 daniel 1768:
1769: /*
1770: * [ WFC: Entity Declared ]
1771: * the Name given in the entity reference must match that in an entity
1772: * declaration, except that well-formed documents need not declare any
1773: * of the following entities: amp, lt, gt, apos, quot.
1774: */
1.97 daniel 1775: if (ent == NULL)
1776: ent = xmlGetPredefinedEntity(name);
1777: if (ent == NULL) {
1.123 daniel 1778: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1780: ctxt->sax->error(ctxt->userData,
1.98 daniel 1781: "Entity reference: entity %s not declared\n",
1782: name);
1.97 daniel 1783: ctxt->wellFormed = 0;
1.180 daniel 1784: ctxt->disableSAX = 1;
1.119 daniel 1785: xmlFree(name);
1.97 daniel 1786: return;
1787: }
1.98 daniel 1788:
1789: /*
1790: * [ WFC: Parsed Entity ]
1791: * An entity reference must not contain the name of an unparsed entity
1792: */
1.159 daniel 1793: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1794: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1796: ctxt->sax->error(ctxt->userData,
1797: "Entity reference to unparsed entity %s\n", name);
1798: ctxt->wellFormed = 0;
1.180 daniel 1799: ctxt->disableSAX = 1;
1.98 daniel 1800: }
1801:
1.159 daniel 1802: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1803: ctxt->token = ent->content[0];
1.119 daniel 1804: xmlFree(name);
1.97 daniel 1805: return;
1806: }
1807: input = xmlNewEntityInputStream(ctxt, ent);
1808: xmlPushInput(ctxt, input);
1.119 daniel 1809: xmlFree(name);
1.96 daniel 1810: return;
1811: }
1812:
1813: /**
1814: * xmlParserHandlePEReference:
1815: * @ctxt: the parser context
1816: *
1817: * [69] PEReference ::= '%' Name ';'
1818: *
1.98 daniel 1819: * [ WFC: No Recursion ]
1820: * TODO A parsed entity must not contain a recursive
1821: * reference to itself, either directly or indirectly.
1822: *
1823: * [ WFC: Entity Declared ]
1824: * In a document without any DTD, a document with only an internal DTD
1825: * subset which contains no parameter entity references, or a document
1826: * with "standalone='yes'", ... ... The declaration of a parameter
1827: * entity must precede any reference to it...
1828: *
1829: * [ VC: Entity Declared ]
1830: * In a document with an external subset or external parameter entities
1831: * with "standalone='no'", ... ... The declaration of a parameter entity
1832: * must precede any reference to it...
1833: *
1834: * [ WFC: In DTD ]
1835: * Parameter-entity references may only appear in the DTD.
1836: * NOTE: misleading but this is handled.
1837: *
1838: * A PEReference may have been detected in the current input stream
1.96 daniel 1839: * the handling is done accordingly to
1840: * http://www.w3.org/TR/REC-xml#entproc
1841: * i.e.
1842: * - Included in literal in entity values
1843: * - Included as Paraemeter Entity reference within DTDs
1844: */
1845: void
1846: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1847: xmlChar *name;
1.96 daniel 1848: xmlEntityPtr entity = NULL;
1849: xmlParserInputPtr input;
1850:
1.126 daniel 1851: if (ctxt->token != 0) {
1852: return;
1853: }
1.152 daniel 1854: if (RAW != '%') return;
1.96 daniel 1855: switch(ctxt->instate) {
1.109 daniel 1856: case XML_PARSER_CDATA_SECTION:
1857: return;
1.97 daniel 1858: case XML_PARSER_COMMENT:
1859: return;
1.140 daniel 1860: case XML_PARSER_START_TAG:
1861: return;
1862: case XML_PARSER_END_TAG:
1863: return;
1.96 daniel 1864: case XML_PARSER_EOF:
1.123 daniel 1865: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1866: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1867: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1868: ctxt->wellFormed = 0;
1.180 daniel 1869: ctxt->disableSAX = 1;
1.96 daniel 1870: return;
1871: case XML_PARSER_PROLOG:
1.140 daniel 1872: case XML_PARSER_START:
1873: case XML_PARSER_MISC:
1.123 daniel 1874: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1877: ctxt->wellFormed = 0;
1.180 daniel 1878: ctxt->disableSAX = 1;
1.96 daniel 1879: return;
1.97 daniel 1880: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1881: case XML_PARSER_CONTENT:
1882: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1883: case XML_PARSER_PI:
1.168 daniel 1884: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1885: /* we just ignore it there */
1886: return;
1887: case XML_PARSER_EPILOG:
1.123 daniel 1888: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1889: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1890: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1891: ctxt->wellFormed = 0;
1.180 daniel 1892: ctxt->disableSAX = 1;
1.96 daniel 1893: return;
1.97 daniel 1894: case XML_PARSER_ENTITY_VALUE:
1895: /*
1896: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1897: * substitution here since we need the literal
1.97 daniel 1898: * entity value to be able to save the internal
1899: * subset of the document.
1900: * This will be handled by xmlDecodeEntities
1901: */
1902: return;
1.96 daniel 1903: case XML_PARSER_DTD:
1.98 daniel 1904: /*
1905: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1906: * In the internal DTD subset, parameter-entity references
1907: * can occur only where markup declarations can occur, not
1908: * within markup declarations.
1909: * In that case this is handled in xmlParseMarkupDecl
1910: */
1911: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1912: return;
1.96 daniel 1913: }
1914:
1915: NEXT;
1916: name = xmlParseName(ctxt);
1917: if (name == NULL) {
1.123 daniel 1918: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1919: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1920: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1921: ctxt->wellFormed = 0;
1.180 daniel 1922: ctxt->disableSAX = 1;
1.96 daniel 1923: } else {
1.152 daniel 1924: if (RAW == ';') {
1.96 daniel 1925: NEXT;
1.98 daniel 1926: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1927: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1928: if (entity == NULL) {
1.98 daniel 1929:
1930: /*
1931: * [ WFC: Entity Declared ]
1932: * In a document without any DTD, a document with only an
1933: * internal DTD subset which contains no parameter entity
1934: * references, or a document with "standalone='yes'", ...
1935: * ... The declaration of a parameter entity must precede
1936: * any reference to it...
1937: */
1938: if ((ctxt->standalone == 1) ||
1939: ((ctxt->hasExternalSubset == 0) &&
1940: (ctxt->hasPErefs == 0))) {
1941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1942: ctxt->sax->error(ctxt->userData,
1943: "PEReference: %%%s; not found\n", name);
1944: ctxt->wellFormed = 0;
1.180 daniel 1945: ctxt->disableSAX = 1;
1.98 daniel 1946: } else {
1947: /*
1948: * [ VC: Entity Declared ]
1949: * In a document with an external subset or external
1950: * parameter entities with "standalone='no'", ...
1951: * ... The declaration of a parameter entity must precede
1952: * any reference to it...
1953: */
1954: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1955: ctxt->sax->warning(ctxt->userData,
1956: "PEReference: %%%s; not found\n", name);
1957: ctxt->valid = 0;
1958: }
1.96 daniel 1959: } else {
1.159 daniel 1960: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1961: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1962: /*
1.156 daniel 1963: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1964: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1965: */
1966: input = xmlNewEntityInputStream(ctxt, entity);
1967: xmlPushInput(ctxt, input);
1.164 daniel 1968: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1969: (RAW == '<') && (NXT(1) == '?') &&
1970: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1971: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1972: xmlParseTextDecl(ctxt);
1.164 daniel 1973: }
1974: if (ctxt->token == 0)
1975: ctxt->token = ' ';
1.96 daniel 1976: } else {
1977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1978: ctxt->sax->error(ctxt->userData,
1979: "xmlHandlePEReference: %s is not a parameter entity\n",
1980: name);
1981: ctxt->wellFormed = 0;
1.180 daniel 1982: ctxt->disableSAX = 1;
1.96 daniel 1983: }
1984: }
1985: } else {
1.123 daniel 1986: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1988: ctxt->sax->error(ctxt->userData,
1989: "xmlHandlePEReference: expecting ';'\n");
1990: ctxt->wellFormed = 0;
1.180 daniel 1991: ctxt->disableSAX = 1;
1.96 daniel 1992: }
1.119 daniel 1993: xmlFree(name);
1.97 daniel 1994: }
1995: }
1996:
1997: /*
1998: * Macro used to grow the current buffer.
1999: */
2000: #define growBuffer(buffer) { \
2001: buffer##_size *= 2; \
1.145 daniel 2002: buffer = (xmlChar *) \
2003: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 2004: if (buffer == NULL) { \
2005: perror("realloc failed"); \
1.145 daniel 2006: return(NULL); \
1.97 daniel 2007: } \
1.96 daniel 2008: }
1.77 daniel 2009:
2010: /**
2011: * xmlDecodeEntities:
2012: * @ctxt: the parser context
2013: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2014: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 2015: * @end: an end marker xmlChar, 0 if none
2016: * @end2: an end marker xmlChar, 0 if none
2017: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 2018: *
2019: * [67] Reference ::= EntityRef | CharRef
2020: *
2021: * [69] PEReference ::= '%' Name ';'
2022: *
2023: * Returns A newly allocated string with the substitution done. The caller
2024: * must deallocate it !
2025: */
1.123 daniel 2026: xmlChar *
1.77 daniel 2027: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 2028: xmlChar end, xmlChar end2, xmlChar end3) {
2029: xmlChar *buffer = NULL;
1.202 daniel 2030: unsigned int buffer_size = 0;
2031: unsigned int nbchars = 0;
1.78 daniel 2032:
1.123 daniel 2033: xmlChar *current = NULL;
1.77 daniel 2034: xmlEntityPtr ent;
2035: unsigned int max = (unsigned int) len;
1.161 daniel 2036: int c,l;
1.77 daniel 2037:
1.185 daniel 2038: if (ctxt->depth > 40) {
2039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2040: ctxt->sax->error(ctxt->userData,
2041: "Detected entity reference loop\n");
2042: ctxt->wellFormed = 0;
2043: ctxt->disableSAX = 1;
2044: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2045: return(NULL);
2046: }
2047:
1.77 daniel 2048: /*
2049: * allocate a translation buffer.
2050: */
1.140 daniel 2051: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2052: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2053: if (buffer == NULL) {
2054: perror("xmlDecodeEntities: malloc failed");
2055: return(NULL);
2056: }
2057:
1.78 daniel 2058: /*
2059: * Ok loop until we reach one of the ending char or a size limit.
2060: */
1.161 daniel 2061: c = CUR_CHAR(l);
2062: while ((nbchars < max) && (c != end) &&
2063: (c != end2) && (c != end3)) {
1.77 daniel 2064:
1.161 daniel 2065: if (c == 0) break;
2066: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2067: int val = xmlParseCharRef(ctxt);
1.161 daniel 2068: COPY_BUF(0,buffer,nbchars,val);
2069: NEXTL(l);
2070: } else if ((c == '&') && (ctxt->token != '&') &&
2071: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2072: ent = xmlParseEntityRef(ctxt);
2073: if ((ent != NULL) &&
2074: (ctxt->replaceEntities != 0)) {
2075: current = ent->content;
2076: while (*current != 0) {
1.161 daniel 2077: buffer[nbchars++] = *current++;
2078: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2079: growBuffer(buffer);
1.77 daniel 2080: }
2081: }
1.98 daniel 2082: } else if (ent != NULL) {
1.123 daniel 2083: const xmlChar *cur = ent->name;
1.98 daniel 2084:
1.161 daniel 2085: buffer[nbchars++] = '&';
2086: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2087: growBuffer(buffer);
2088: }
1.161 daniel 2089: while (*cur != 0) {
2090: buffer[nbchars++] = *cur++;
2091: }
2092: buffer[nbchars++] = ';';
1.77 daniel 2093: }
1.161 daniel 2094: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2095: /*
1.77 daniel 2096: * a PEReference induce to switch the entity flow,
2097: * we break here to flush the current set of chars
2098: * parsed if any. We will be called back later.
1.97 daniel 2099: */
1.91 daniel 2100: if (nbchars != 0) break;
1.77 daniel 2101:
2102: xmlParsePEReference(ctxt);
1.79 daniel 2103:
1.97 daniel 2104: /*
1.79 daniel 2105: * Pop-up of finished entities.
1.97 daniel 2106: */
1.152 daniel 2107: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2108: xmlPopInput(ctxt);
2109:
1.98 daniel 2110: break;
1.77 daniel 2111: } else {
1.161 daniel 2112: COPY_BUF(l,buffer,nbchars,c);
2113: NEXTL(l);
2114: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2115: growBuffer(buffer);
2116: }
1.77 daniel 2117: }
1.161 daniel 2118: c = CUR_CHAR(l);
1.77 daniel 2119: }
1.161 daniel 2120: buffer[nbchars++] = 0;
1.77 daniel 2121: return(buffer);
2122: }
2123:
1.135 daniel 2124: /**
2125: * xmlStringDecodeEntities:
2126: * @ctxt: the parser context
2127: * @str: the input string
2128: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2129: * @end: an end marker xmlChar, 0 if none
2130: * @end2: an end marker xmlChar, 0 if none
2131: * @end3: an end marker xmlChar, 0 if none
2132: *
2133: * [67] Reference ::= EntityRef | CharRef
2134: *
2135: * [69] PEReference ::= '%' Name ';'
2136: *
2137: * Returns A newly allocated string with the substitution done. The caller
2138: * must deallocate it !
2139: */
2140: xmlChar *
2141: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2142: xmlChar end, xmlChar end2, xmlChar end3) {
2143: xmlChar *buffer = NULL;
2144: int buffer_size = 0;
2145:
2146: xmlChar *current = NULL;
2147: xmlEntityPtr ent;
1.176 daniel 2148: int c,l;
2149: int nbchars = 0;
1.135 daniel 2150:
1.185 daniel 2151: if (ctxt->depth > 40) {
2152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153: ctxt->sax->error(ctxt->userData,
2154: "Detected entity reference loop\n");
2155: ctxt->wellFormed = 0;
2156: ctxt->disableSAX = 1;
2157: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2158: return(NULL);
2159: }
2160:
1.135 daniel 2161: /*
2162: * allocate a translation buffer.
2163: */
1.140 daniel 2164: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2165: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2166: if (buffer == NULL) {
2167: perror("xmlDecodeEntities: malloc failed");
2168: return(NULL);
2169: }
2170:
2171: /*
2172: * Ok loop until we reach one of the ending char or a size limit.
2173: */
1.176 daniel 2174: c = CUR_SCHAR(str, l);
2175: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2176:
1.176 daniel 2177: if (c == 0) break;
2178: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2179: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2180: if (val != 0) {
2181: COPY_BUF(0,buffer,nbchars,val);
2182: }
2183: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2184: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2185: if ((ent != NULL) && (ent->content != NULL)) {
2186: xmlChar *rep;
2187:
2188: ctxt->depth++;
2189: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2190: 0, 0, 0);
2191: ctxt->depth--;
2192: if (rep != NULL) {
2193: current = rep;
2194: while (*current != 0) {
2195: buffer[nbchars++] = *current++;
2196: if (nbchars >
2197: buffer_size - XML_PARSER_BUFFER_SIZE) {
2198: growBuffer(buffer);
2199: }
1.135 daniel 2200: }
1.185 daniel 2201: xmlFree(rep);
1.135 daniel 2202: }
2203: } else if (ent != NULL) {
2204: int i = xmlStrlen(ent->name);
2205: const xmlChar *cur = ent->name;
2206:
1.176 daniel 2207: buffer[nbchars++] = '&';
2208: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2209: growBuffer(buffer);
2210: }
2211: for (;i > 0;i--)
1.176 daniel 2212: buffer[nbchars++] = *cur++;
2213: buffer[nbchars++] = ';';
1.135 daniel 2214: }
1.176 daniel 2215: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2216: ent = xmlParseStringPEReference(ctxt, &str);
2217: if (ent != NULL) {
1.185 daniel 2218: xmlChar *rep;
2219:
2220: ctxt->depth++;
2221: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2222: 0, 0, 0);
2223: ctxt->depth--;
2224: if (rep != NULL) {
2225: current = rep;
2226: while (*current != 0) {
2227: buffer[nbchars++] = *current++;
2228: if (nbchars >
2229: buffer_size - XML_PARSER_BUFFER_SIZE) {
2230: growBuffer(buffer);
2231: }
1.135 daniel 2232: }
1.185 daniel 2233: xmlFree(rep);
1.135 daniel 2234: }
2235: }
2236: } else {
1.176 daniel 2237: COPY_BUF(l,buffer,nbchars,c);
2238: str += l;
2239: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2240: growBuffer(buffer);
2241: }
2242: }
1.176 daniel 2243: c = CUR_SCHAR(str, l);
1.135 daniel 2244: }
1.176 daniel 2245: buffer[nbchars++] = 0;
1.135 daniel 2246: return(buffer);
2247: }
2248:
1.1 veillard 2249:
1.28 daniel 2250: /************************************************************************
2251: * *
1.75 daniel 2252: * Commodity functions to handle encodings *
2253: * *
2254: ************************************************************************/
2255:
1.172 daniel 2256: /*
2257: * xmlCheckLanguageID
2258: * @lang: pointer to the string value
2259: *
2260: * Checks that the value conforms to the LanguageID production:
2261: *
2262: * [33] LanguageID ::= Langcode ('-' Subcode)*
2263: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2264: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2265: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2266: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2267: * [38] Subcode ::= ([a-z] | [A-Z])+
2268: *
2269: * Returns 1 if correct 0 otherwise
2270: **/
2271: int
2272: xmlCheckLanguageID(const xmlChar *lang) {
2273: const xmlChar *cur = lang;
2274:
2275: if (cur == NULL)
2276: return(0);
2277: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2278: ((cur[0] == 'I') && (cur[1] == '-'))) {
2279: /*
2280: * IANA code
2281: */
2282: cur += 2;
2283: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2284: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2285: cur++;
2286: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2287: ((cur[0] == 'X') && (cur[1] == '-'))) {
2288: /*
2289: * User code
2290: */
2291: cur += 2;
2292: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2293: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2294: cur++;
2295: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2296: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2297: /*
2298: * ISO639
2299: */
2300: cur++;
2301: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2302: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2303: cur++;
2304: else
2305: return(0);
2306: } else
2307: return(0);
2308: while (cur[0] != 0) {
2309: if (cur[0] != '-')
2310: return(0);
2311: cur++;
2312: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2313: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2314: cur++;
2315: else
2316: return(0);
2317: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2318: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2319: cur++;
2320: }
2321: return(1);
2322: }
2323:
1.75 daniel 2324: /**
2325: * xmlSwitchEncoding:
2326: * @ctxt: the parser context
1.124 daniel 2327: * @enc: the encoding value (number)
1.75 daniel 2328: *
2329: * change the input functions when discovering the character encoding
2330: * of a given entity.
1.193 daniel 2331: *
2332: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2333: */
1.193 daniel 2334: int
1.75 daniel 2335: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2336: {
1.156 daniel 2337: xmlCharEncodingHandlerPtr handler;
2338:
1.193 daniel 2339: switch (enc) {
2340: case XML_CHAR_ENCODING_ERROR:
2341: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2343: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2344: ctxt->wellFormed = 0;
2345: ctxt->disableSAX = 1;
2346: break;
2347: case XML_CHAR_ENCODING_NONE:
2348: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2349: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2350: return(0);
2351: case XML_CHAR_ENCODING_UTF8:
2352: /* default encoding, no conversion should be needed */
1.198 daniel 2353: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2354: return(0);
2355: default:
2356: break;
2357: }
1.156 daniel 2358: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2359: if (handler == NULL) {
2360: /*
2361: * Default handlers.
2362: */
2363: switch (enc) {
2364: case XML_CHAR_ENCODING_ERROR:
2365: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2367: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2368: ctxt->wellFormed = 0;
2369: ctxt->disableSAX = 1;
1.198 daniel 2370: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2371: break;
2372: case XML_CHAR_ENCODING_NONE:
2373: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2374: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2375: return(0);
2376: case XML_CHAR_ENCODING_UTF8:
2377: /* default encoding, no conversion should be needed */
1.198 daniel 2378: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2379: return(0);
2380: case XML_CHAR_ENCODING_UTF16LE:
2381: break;
2382: case XML_CHAR_ENCODING_UTF16BE:
2383: break;
2384: case XML_CHAR_ENCODING_UCS4LE:
2385: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2387: ctxt->sax->error(ctxt->userData,
2388: "char encoding USC4 little endian not supported\n");
2389: break;
2390: case XML_CHAR_ENCODING_UCS4BE:
2391: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2393: ctxt->sax->error(ctxt->userData,
2394: "char encoding USC4 big endian not supported\n");
2395: break;
2396: case XML_CHAR_ENCODING_EBCDIC:
2397: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2398: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399: ctxt->sax->error(ctxt->userData,
2400: "char encoding EBCDIC not supported\n");
2401: break;
2402: case XML_CHAR_ENCODING_UCS4_2143:
2403: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2405: ctxt->sax->error(ctxt->userData,
2406: "char encoding UCS4 2143 not supported\n");
2407: break;
2408: case XML_CHAR_ENCODING_UCS4_3412:
2409: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2411: ctxt->sax->error(ctxt->userData,
2412: "char encoding UCS4 3412 not supported\n");
2413: break;
2414: case XML_CHAR_ENCODING_UCS2:
2415: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2417: ctxt->sax->error(ctxt->userData,
2418: "char encoding UCS2 not supported\n");
2419: break;
2420: case XML_CHAR_ENCODING_8859_1:
2421: case XML_CHAR_ENCODING_8859_2:
2422: case XML_CHAR_ENCODING_8859_3:
2423: case XML_CHAR_ENCODING_8859_4:
2424: case XML_CHAR_ENCODING_8859_5:
2425: case XML_CHAR_ENCODING_8859_6:
2426: case XML_CHAR_ENCODING_8859_7:
2427: case XML_CHAR_ENCODING_8859_8:
2428: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2429: /*
1.203 veillard 2430: * We used to keep the internal content in the
2431: * document encoding however this turns being unmaintainable
2432: * So xmlGetCharEncodingHandler() will return non-null
2433: * values for this now.
1.195 daniel 2434: */
2435: if ((ctxt->inputNr == 1) &&
2436: (ctxt->encoding == NULL) &&
2437: (ctxt->input->encoding != NULL)) {
2438: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2439: }
1.198 daniel 2440: ctxt->charset = enc;
1.195 daniel 2441: return(0);
1.193 daniel 2442: case XML_CHAR_ENCODING_2022_JP:
2443: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2445: ctxt->sax->error(ctxt->userData,
2446: "char encoding ISO-2022-JPnot supported\n");
2447: break;
2448: case XML_CHAR_ENCODING_SHIFT_JIS:
2449: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2451: ctxt->sax->error(ctxt->userData,
2452: "char encoding Shift_JIS not supported\n");
2453: break;
2454: case XML_CHAR_ENCODING_EUC_JP:
2455: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2457: ctxt->sax->error(ctxt->userData,
2458: "char encoding EUC-JPnot supported\n");
2459: break;
2460: }
2461: }
2462: if (handler == NULL)
2463: return(-1);
1.198 daniel 2464: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2465: return(xmlSwitchToEncoding(ctxt, handler));
2466: }
2467:
2468: /**
2469: * xmlSwitchToEncoding:
2470: * @ctxt: the parser context
2471: * @handler: the encoding handler
2472: *
2473: * change the input functions when discovering the character encoding
2474: * of a given entity.
2475: *
2476: * Returns 0 in case of success, -1 otherwise
2477: */
2478: int
2479: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2480: {
1.194 daniel 2481: int nbchars;
2482:
1.156 daniel 2483: if (handler != NULL) {
2484: if (ctxt->input != NULL) {
2485: if (ctxt->input->buf != NULL) {
2486: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2487: if (ctxt->input->buf->encoder == handler)
2488: return(0);
1.197 daniel 2489: /*
2490: * Note: this is a bit dangerous, but that's what it
2491: * takes to use nearly compatible signature for different
2492: * encodings.
2493: */
2494: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2495: ctxt->input->buf->encoder = handler;
2496: return(0);
1.156 daniel 2497: }
2498: ctxt->input->buf->encoder = handler;
2499:
2500: /*
1.194 daniel 2501: * Is there already some content down the pipe to convert ?
1.156 daniel 2502: */
2503: if ((ctxt->input->buf->buffer != NULL) &&
2504: (ctxt->input->buf->buffer->use > 0)) {
2505: int processed;
2506:
2507: /*
2508: * Specific handling of the Byte Order Mark for
2509: * UTF-16
2510: */
1.195 daniel 2511: if ((handler->name != NULL) &&
2512: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2513: (ctxt->input->cur[0] == 0xFF) &&
2514: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2515: ctxt->input->cur += 2;
1.156 daniel 2516: }
1.195 daniel 2517: if ((handler->name != NULL) &&
2518: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2519: (ctxt->input->cur[0] == 0xFE) &&
2520: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2521: ctxt->input->cur += 2;
1.156 daniel 2522: }
2523:
2524: /*
1.194 daniel 2525: * Shring the current input buffer.
2526: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2527: */
2528: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2529: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2530: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2531: ctxt->input->buf->buffer = xmlBufferCreate();
2532:
2533: /*
1.197 daniel 2534: * convert just enough to get
2535: * '<?xml version="1.0" encoding="xxx"?>'
2536: * parsed with the autodetected encoding
2537: * into the parser reading buffer.
1.194 daniel 2538: */
1.197 daniel 2539: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2540: ctxt->input->buf->buffer,
2541: ctxt->input->buf->raw);
1.194 daniel 2542: if (nbchars < 0) {
2543: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2544: return(-1);
1.156 daniel 2545: }
1.194 daniel 2546: ctxt->input->base =
2547: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2548: }
1.193 daniel 2549: return(0);
1.156 daniel 2550: } else {
2551: if (ctxt->input->length == 0) {
2552: /*
2553: * When parsing a static memory array one must know the
2554: * size to be able to convert the buffer.
2555: */
2556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2557: ctxt->sax->error(ctxt->userData,
2558: "xmlSwitchEncoding : no input\n");
1.193 daniel 2559: return(-1);
1.156 daniel 2560: } else {
1.194 daniel 2561: int processed;
2562:
2563: /*
2564: * Shring the current input buffer.
2565: * Move it as the raw buffer and create a new input buffer
2566: */
2567: processed = ctxt->input->cur - ctxt->input->base;
2568: ctxt->input->buf->raw = xmlBufferCreate();
2569: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
2570: ctxt->input->length - processed);
2571: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2572:
2573: /*
1.194 daniel 2574: * convert as much as possible of the raw input
2575: * to the parser reading buffer.
2576: */
2577: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2578: ctxt->input->buf->buffer,
2579: ctxt->input->buf->raw);
2580: if (nbchars < 0) {
2581: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2582: return(-1);
1.156 daniel 2583: }
1.194 daniel 2584:
1.156 daniel 2585: /*
2586: * Conversion succeeded, get rid of the old buffer
2587: */
2588: if ((ctxt->input->free != NULL) &&
2589: (ctxt->input->base != NULL))
2590: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2591: ctxt->input->base =
2592: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2593: }
2594: }
2595: } else {
2596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2597: ctxt->sax->error(ctxt->userData,
2598: "xmlSwitchEncoding : no input\n");
1.193 daniel 2599: return(-1);
1.156 daniel 2600: }
1.195 daniel 2601: /*
2602: * The parsing is now done in UTF8 natively
2603: */
1.198 daniel 2604: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2605: } else
2606: return(-1);
2607: return(0);
1.156 daniel 2608:
1.75 daniel 2609: }
2610:
2611: /************************************************************************
2612: * *
1.123 daniel 2613: * Commodity functions to handle xmlChars *
1.28 daniel 2614: * *
2615: ************************************************************************/
2616:
1.50 daniel 2617: /**
2618: * xmlStrndup:
1.123 daniel 2619: * @cur: the input xmlChar *
1.50 daniel 2620: * @len: the len of @cur
2621: *
1.123 daniel 2622: * a strndup for array of xmlChar's
1.68 daniel 2623: *
1.123 daniel 2624: * Returns a new xmlChar * or NULL
1.1 veillard 2625: */
1.123 daniel 2626: xmlChar *
2627: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2628: xmlChar *ret;
2629:
2630: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 ! veillard 2631: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2632: if (ret == NULL) {
1.86 daniel 2633: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2634: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2635: return(NULL);
2636: }
1.123 daniel 2637: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2638: ret[len] = 0;
2639: return(ret);
2640: }
2641:
1.50 daniel 2642: /**
2643: * xmlStrdup:
1.123 daniel 2644: * @cur: the input xmlChar *
1.50 daniel 2645: *
1.152 daniel 2646: * a strdup for array of xmlChar's. Since they are supposed to be
2647: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2648: * a termination mark of '0'.
1.68 daniel 2649: *
1.123 daniel 2650: * Returns a new xmlChar * or NULL
1.1 veillard 2651: */
1.123 daniel 2652: xmlChar *
2653: xmlStrdup(const xmlChar *cur) {
2654: const xmlChar *p = cur;
1.1 veillard 2655:
1.135 daniel 2656: if (cur == NULL) return(NULL);
1.152 daniel 2657: while (*p != 0) p++;
1.1 veillard 2658: return(xmlStrndup(cur, p - cur));
2659: }
2660:
1.50 daniel 2661: /**
2662: * xmlCharStrndup:
2663: * @cur: the input char *
2664: * @len: the len of @cur
2665: *
1.123 daniel 2666: * a strndup for char's to xmlChar's
1.68 daniel 2667: *
1.123 daniel 2668: * Returns a new xmlChar * or NULL
1.45 daniel 2669: */
2670:
1.123 daniel 2671: xmlChar *
1.55 daniel 2672: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2673: int i;
1.135 daniel 2674: xmlChar *ret;
2675:
2676: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 ! veillard 2677: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2678: if (ret == NULL) {
1.86 daniel 2679: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2680: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2681: return(NULL);
2682: }
2683: for (i = 0;i < len;i++)
1.123 daniel 2684: ret[i] = (xmlChar) cur[i];
1.45 daniel 2685: ret[len] = 0;
2686: return(ret);
2687: }
2688:
1.50 daniel 2689: /**
2690: * xmlCharStrdup:
2691: * @cur: the input char *
2692: * @len: the len of @cur
2693: *
1.123 daniel 2694: * a strdup for char's to xmlChar's
1.68 daniel 2695: *
1.123 daniel 2696: * Returns a new xmlChar * or NULL
1.45 daniel 2697: */
2698:
1.123 daniel 2699: xmlChar *
1.55 daniel 2700: xmlCharStrdup(const char *cur) {
1.45 daniel 2701: const char *p = cur;
2702:
1.135 daniel 2703: if (cur == NULL) return(NULL);
1.45 daniel 2704: while (*p != '\0') p++;
2705: return(xmlCharStrndup(cur, p - cur));
2706: }
2707:
1.50 daniel 2708: /**
2709: * xmlStrcmp:
1.123 daniel 2710: * @str1: the first xmlChar *
2711: * @str2: the second xmlChar *
1.50 daniel 2712: *
1.123 daniel 2713: * a strcmp for xmlChar's
1.68 daniel 2714: *
2715: * Returns the integer result of the comparison
1.14 veillard 2716: */
2717:
1.55 daniel 2718: int
1.123 daniel 2719: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2720: register int tmp;
2721:
1.135 daniel 2722: if ((str1 == NULL) && (str2 == NULL)) return(0);
2723: if (str1 == NULL) return(-1);
2724: if (str2 == NULL) return(1);
1.14 veillard 2725: do {
2726: tmp = *str1++ - *str2++;
2727: if (tmp != 0) return(tmp);
2728: } while ((*str1 != 0) && (*str2 != 0));
2729: return (*str1 - *str2);
2730: }
2731:
1.50 daniel 2732: /**
2733: * xmlStrncmp:
1.123 daniel 2734: * @str1: the first xmlChar *
2735: * @str2: the second xmlChar *
1.50 daniel 2736: * @len: the max comparison length
2737: *
1.123 daniel 2738: * a strncmp for xmlChar's
1.68 daniel 2739: *
2740: * Returns the integer result of the comparison
1.14 veillard 2741: */
2742:
1.55 daniel 2743: int
1.123 daniel 2744: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2745: register int tmp;
2746:
2747: if (len <= 0) return(0);
1.135 daniel 2748: if ((str1 == NULL) && (str2 == NULL)) return(0);
2749: if (str1 == NULL) return(-1);
2750: if (str2 == NULL) return(1);
1.14 veillard 2751: do {
2752: tmp = *str1++ - *str2++;
2753: if (tmp != 0) return(tmp);
2754: len--;
2755: if (len <= 0) return(0);
2756: } while ((*str1 != 0) && (*str2 != 0));
2757: return (*str1 - *str2);
2758: }
2759:
1.50 daniel 2760: /**
2761: * xmlStrchr:
1.123 daniel 2762: * @str: the xmlChar * array
2763: * @val: the xmlChar to search
1.50 daniel 2764: *
1.123 daniel 2765: * a strchr for xmlChar's
1.68 daniel 2766: *
1.123 daniel 2767: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2768: */
2769:
1.123 daniel 2770: const xmlChar *
2771: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2772: if (str == NULL) return(NULL);
1.14 veillard 2773: while (*str != 0) {
1.123 daniel 2774: if (*str == val) return((xmlChar *) str);
1.14 veillard 2775: str++;
2776: }
2777: return(NULL);
1.89 daniel 2778: }
2779:
2780: /**
2781: * xmlStrstr:
1.123 daniel 2782: * @str: the xmlChar * array (haystack)
2783: * @val: the xmlChar to search (needle)
1.89 daniel 2784: *
1.123 daniel 2785: * a strstr for xmlChar's
1.89 daniel 2786: *
1.123 daniel 2787: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2788: */
2789:
1.123 daniel 2790: const xmlChar *
2791: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2792: int n;
2793:
2794: if (str == NULL) return(NULL);
2795: if (val == NULL) return(NULL);
2796: n = xmlStrlen(val);
2797:
2798: if (n == 0) return(str);
2799: while (*str != 0) {
2800: if (*str == *val) {
1.123 daniel 2801: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2802: }
2803: str++;
2804: }
2805: return(NULL);
2806: }
2807:
2808: /**
2809: * xmlStrsub:
1.123 daniel 2810: * @str: the xmlChar * array (haystack)
1.89 daniel 2811: * @start: the index of the first char (zero based)
2812: * @len: the length of the substring
2813: *
2814: * Extract a substring of a given string
2815: *
1.123 daniel 2816: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2817: */
2818:
1.123 daniel 2819: xmlChar *
2820: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2821: int i;
2822:
2823: if (str == NULL) return(NULL);
2824: if (start < 0) return(NULL);
1.90 daniel 2825: if (len < 0) return(NULL);
1.89 daniel 2826:
2827: for (i = 0;i < start;i++) {
2828: if (*str == 0) return(NULL);
2829: str++;
2830: }
2831: if (*str == 0) return(NULL);
2832: return(xmlStrndup(str, len));
1.14 veillard 2833: }
1.28 daniel 2834:
1.50 daniel 2835: /**
2836: * xmlStrlen:
1.123 daniel 2837: * @str: the xmlChar * array
1.50 daniel 2838: *
1.127 daniel 2839: * length of a xmlChar's string
1.68 daniel 2840: *
1.123 daniel 2841: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2842: */
2843:
1.55 daniel 2844: int
1.123 daniel 2845: xmlStrlen(const xmlChar *str) {
1.45 daniel 2846: int len = 0;
2847:
2848: if (str == NULL) return(0);
2849: while (*str != 0) {
2850: str++;
2851: len++;
2852: }
2853: return(len);
2854: }
2855:
1.50 daniel 2856: /**
2857: * xmlStrncat:
1.123 daniel 2858: * @cur: the original xmlChar * array
2859: * @add: the xmlChar * array added
1.50 daniel 2860: * @len: the length of @add
2861: *
1.123 daniel 2862: * a strncat for array of xmlChar's
1.68 daniel 2863: *
1.123 daniel 2864: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2865: */
2866:
1.123 daniel 2867: xmlChar *
2868: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2869: int size;
1.123 daniel 2870: xmlChar *ret;
1.45 daniel 2871:
2872: if ((add == NULL) || (len == 0))
2873: return(cur);
2874: if (cur == NULL)
2875: return(xmlStrndup(add, len));
2876:
2877: size = xmlStrlen(cur);
1.204 ! veillard 2878: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2879: if (ret == NULL) {
1.86 daniel 2880: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2881: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2882: return(cur);
2883: }
1.123 daniel 2884: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2885: ret[size + len] = 0;
2886: return(ret);
2887: }
2888:
1.50 daniel 2889: /**
2890: * xmlStrcat:
1.123 daniel 2891: * @cur: the original xmlChar * array
2892: * @add: the xmlChar * array added
1.50 daniel 2893: *
1.152 daniel 2894: * a strcat for array of xmlChar's. Since they are supposed to be
2895: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2896: * a termination mark of '0'.
1.68 daniel 2897: *
1.123 daniel 2898: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2899: */
1.123 daniel 2900: xmlChar *
2901: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2902: const xmlChar *p = add;
1.45 daniel 2903:
2904: if (add == NULL) return(cur);
2905: if (cur == NULL)
2906: return(xmlStrdup(add));
2907:
1.152 daniel 2908: while (*p != 0) p++;
1.45 daniel 2909: return(xmlStrncat(cur, add, p - add));
2910: }
2911:
2912: /************************************************************************
2913: * *
2914: * Commodity functions, cleanup needed ? *
2915: * *
2916: ************************************************************************/
2917:
1.50 daniel 2918: /**
2919: * areBlanks:
2920: * @ctxt: an XML parser context
1.123 daniel 2921: * @str: a xmlChar *
1.50 daniel 2922: * @len: the size of @str
2923: *
1.45 daniel 2924: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2925: *
1.68 daniel 2926: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2927: */
2928:
1.123 daniel 2929: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2930: int i, ret;
1.45 daniel 2931: xmlNodePtr lastChild;
2932:
1.176 daniel 2933: /*
2934: * Check for xml:space value.
2935: */
2936: if (*(ctxt->space) == 1)
2937: return(0);
2938:
2939: /*
2940: * Check that the string is made of blanks
2941: */
1.45 daniel 2942: for (i = 0;i < len;i++)
2943: if (!(IS_BLANK(str[i]))) return(0);
2944:
1.176 daniel 2945: /*
2946: * Look if the element is mixed content in the Dtd if available
2947: */
1.104 daniel 2948: if (ctxt->myDoc != NULL) {
2949: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2950: if (ret == 0) return(1);
2951: if (ret == 1) return(0);
2952: }
1.176 daniel 2953:
1.104 daniel 2954: /*
1.176 daniel 2955: * Otherwise, heuristic :-\
1.104 daniel 2956: */
1.179 daniel 2957: if (ctxt->keepBlanks)
2958: return(0);
2959: if (RAW != '<') return(0);
2960: if (ctxt->node == NULL) return(0);
2961: if ((ctxt->node->children == NULL) &&
2962: (RAW == '<') && (NXT(1) == '/')) return(0);
2963:
1.45 daniel 2964: lastChild = xmlGetLastChild(ctxt->node);
2965: if (lastChild == NULL) {
2966: if (ctxt->node->content != NULL) return(0);
2967: } else if (xmlNodeIsText(lastChild))
2968: return(0);
1.157 daniel 2969: else if ((ctxt->node->children != NULL) &&
2970: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2971: return(0);
1.45 daniel 2972: return(1);
2973: }
2974:
1.50 daniel 2975: /**
2976: * xmlHandleEntity:
2977: * @ctxt: an XML parser context
2978: * @entity: an XML entity pointer.
2979: *
2980: * Default handling of defined entities, when should we define a new input
1.45 daniel 2981: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2982: *
2983: * OBSOLETE: to be removed at some point.
1.45 daniel 2984: */
2985:
1.55 daniel 2986: void
2987: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2988: int len;
1.50 daniel 2989: xmlParserInputPtr input;
1.45 daniel 2990:
2991: if (entity->content == NULL) {
1.123 daniel 2992: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2994: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2995: entity->name);
1.59 daniel 2996: ctxt->wellFormed = 0;
1.180 daniel 2997: ctxt->disableSAX = 1;
1.45 daniel 2998: return;
2999: }
3000: len = xmlStrlen(entity->content);
3001: if (len <= 2) goto handle_as_char;
3002:
3003: /*
3004: * Redefine its content as an input stream.
3005: */
1.50 daniel 3006: input = xmlNewEntityInputStream(ctxt, entity);
3007: xmlPushInput(ctxt, input);
1.45 daniel 3008: return;
3009:
3010: handle_as_char:
3011: /*
3012: * Just handle the content as a set of chars.
3013: */
1.171 daniel 3014: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3015: (ctxt->sax->characters != NULL))
1.74 daniel 3016: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 3017:
3018: }
3019:
3020: /*
3021: * Forward definition for recusive behaviour.
3022: */
1.77 daniel 3023: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3024: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 3025:
1.28 daniel 3026: /************************************************************************
3027: * *
3028: * Extra stuff for namespace support *
3029: * Relates to http://www.w3.org/TR/WD-xml-names *
3030: * *
3031: ************************************************************************/
3032:
1.50 daniel 3033: /**
3034: * xmlNamespaceParseNCName:
3035: * @ctxt: an XML parser context
3036: *
3037: * parse an XML namespace name.
1.28 daniel 3038: *
3039: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3040: *
3041: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3042: * CombiningChar | Extender
1.68 daniel 3043: *
3044: * Returns the namespace name or NULL
1.28 daniel 3045: */
3046:
1.123 daniel 3047: xmlChar *
1.55 daniel 3048: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3049: xmlChar buf[XML_MAX_NAMELEN + 5];
3050: int len = 0, l;
3051: int cur = CUR_CHAR(l);
1.28 daniel 3052:
1.156 daniel 3053: /* load first the value of the char !!! */
1.152 daniel 3054: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3055:
1.152 daniel 3056: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3057: (cur == '.') || (cur == '-') ||
3058: (cur == '_') ||
3059: (IS_COMBINING(cur)) ||
3060: (IS_EXTENDER(cur))) {
3061: COPY_BUF(l,buf,len,cur);
3062: NEXTL(l);
3063: cur = CUR_CHAR(l);
1.91 daniel 3064: if (len >= XML_MAX_NAMELEN) {
3065: fprintf(stderr,
3066: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3067: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3068: (cur == '.') || (cur == '-') ||
3069: (cur == '_') ||
3070: (IS_COMBINING(cur)) ||
3071: (IS_EXTENDER(cur))) {
3072: NEXTL(l);
3073: cur = CUR_CHAR(l);
3074: }
1.91 daniel 3075: break;
3076: }
3077: }
3078: return(xmlStrndup(buf, len));
1.28 daniel 3079: }
3080:
1.50 daniel 3081: /**
3082: * xmlNamespaceParseQName:
3083: * @ctxt: an XML parser context
1.123 daniel 3084: * @prefix: a xmlChar **
1.50 daniel 3085: *
3086: * parse an XML qualified name
1.28 daniel 3087: *
3088: * [NS 5] QName ::= (Prefix ':')? LocalPart
3089: *
3090: * [NS 6] Prefix ::= NCName
3091: *
3092: * [NS 7] LocalPart ::= NCName
1.68 daniel 3093: *
1.127 daniel 3094: * Returns the local part, and prefix is updated
1.50 daniel 3095: * to get the Prefix if any.
1.28 daniel 3096: */
3097:
1.123 daniel 3098: xmlChar *
3099: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3100: xmlChar *ret = NULL;
1.28 daniel 3101:
3102: *prefix = NULL;
3103: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3104: if (RAW == ':') {
1.28 daniel 3105: *prefix = ret;
1.40 daniel 3106: NEXT;
1.28 daniel 3107: ret = xmlNamespaceParseNCName(ctxt);
3108: }
3109:
3110: return(ret);
3111: }
3112:
1.50 daniel 3113: /**
1.72 daniel 3114: * xmlSplitQName:
1.162 daniel 3115: * @ctxt: an XML parser context
1.72 daniel 3116: * @name: an XML parser context
1.123 daniel 3117: * @prefix: a xmlChar **
1.72 daniel 3118: *
3119: * parse an XML qualified name string
3120: *
3121: * [NS 5] QName ::= (Prefix ':')? LocalPart
3122: *
3123: * [NS 6] Prefix ::= NCName
3124: *
3125: * [NS 7] LocalPart ::= NCName
3126: *
1.127 daniel 3127: * Returns the local part, and prefix is updated
1.72 daniel 3128: * to get the Prefix if any.
3129: */
3130:
1.123 daniel 3131: xmlChar *
1.162 daniel 3132: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3133: xmlChar buf[XML_MAX_NAMELEN + 5];
3134: int len = 0;
1.123 daniel 3135: xmlChar *ret = NULL;
3136: const xmlChar *cur = name;
1.162 daniel 3137: int c,l;
1.72 daniel 3138:
3139: *prefix = NULL;
1.113 daniel 3140:
3141: /* xml: prefix is not really a namespace */
3142: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3143: (cur[2] == 'l') && (cur[3] == ':'))
3144: return(xmlStrdup(name));
3145:
1.162 daniel 3146: /* nasty but valid */
3147: if (cur[0] == ':')
3148: return(xmlStrdup(name));
3149:
3150: c = CUR_SCHAR(cur, l);
3151: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3152:
1.162 daniel 3153: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3154: (c == '.') || (c == '-') ||
3155: (c == '_') ||
3156: (IS_COMBINING(c)) ||
3157: (IS_EXTENDER(c))) {
3158: COPY_BUF(l,buf,len,c);
3159: cur += l;
3160: c = CUR_SCHAR(cur, l);
3161: }
1.72 daniel 3162:
1.162 daniel 3163: ret = xmlStrndup(buf, len);
1.72 daniel 3164:
1.162 daniel 3165: if (c == ':') {
3166: cur += l;
1.163 daniel 3167: c = CUR_SCHAR(cur, l);
1.162 daniel 3168: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3169: *prefix = ret;
1.162 daniel 3170: len = 0;
1.72 daniel 3171:
1.162 daniel 3172: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3173: (c == '.') || (c == '-') ||
3174: (c == '_') ||
3175: (IS_COMBINING(c)) ||
3176: (IS_EXTENDER(c))) {
3177: COPY_BUF(l,buf,len,c);
3178: cur += l;
3179: c = CUR_SCHAR(cur, l);
3180: }
1.72 daniel 3181:
1.162 daniel 3182: ret = xmlStrndup(buf, len);
1.72 daniel 3183: }
3184:
3185: return(ret);
3186: }
3187: /**
1.50 daniel 3188: * xmlNamespaceParseNSDef:
3189: * @ctxt: an XML parser context
3190: *
3191: * parse a namespace prefix declaration
1.28 daniel 3192: *
3193: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3194: *
3195: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3196: *
3197: * Returns the namespace name
1.28 daniel 3198: */
3199:
1.123 daniel 3200: xmlChar *
1.55 daniel 3201: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3202: xmlChar *name = NULL;
1.28 daniel 3203:
1.152 daniel 3204: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3205: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3206: (NXT(4) == 's')) {
3207: SKIP(5);
1.152 daniel 3208: if (RAW == ':') {
1.40 daniel 3209: NEXT;
1.28 daniel 3210: name = xmlNamespaceParseNCName(ctxt);
3211: }
3212: }
1.39 daniel 3213: return(name);
1.28 daniel 3214: }
3215:
1.50 daniel 3216: /**
3217: * xmlParseQuotedString:
3218: * @ctxt: an XML parser context
3219: *
1.45 daniel 3220: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3221: * To be removed at next drop of binary compatibility
1.68 daniel 3222: *
3223: * Returns the string parser or NULL.
1.45 daniel 3224: */
1.123 daniel 3225: xmlChar *
1.55 daniel 3226: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3227: xmlChar *buf = NULL;
1.152 daniel 3228: int len = 0,l;
1.140 daniel 3229: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3230: int c;
1.45 daniel 3231:
1.135 daniel 3232: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3233: if (buf == NULL) {
3234: fprintf(stderr, "malloc of %d byte failed\n", size);
3235: return(NULL);
3236: }
1.152 daniel 3237: if (RAW == '"') {
1.45 daniel 3238: NEXT;
1.152 daniel 3239: c = CUR_CHAR(l);
1.135 daniel 3240: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3241: if (len + 5 >= size) {
1.135 daniel 3242: size *= 2;
1.204 ! veillard 3243: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3244: if (buf == NULL) {
3245: fprintf(stderr, "realloc of %d byte failed\n", size);
3246: return(NULL);
3247: }
3248: }
1.152 daniel 3249: COPY_BUF(l,buf,len,c);
3250: NEXTL(l);
3251: c = CUR_CHAR(l);
1.135 daniel 3252: }
3253: if (c != '"') {
1.123 daniel 3254: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3256: ctxt->sax->error(ctxt->userData,
3257: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3258: ctxt->wellFormed = 0;
1.180 daniel 3259: ctxt->disableSAX = 1;
1.55 daniel 3260: } else {
1.45 daniel 3261: NEXT;
3262: }
1.152 daniel 3263: } else if (RAW == '\''){
1.45 daniel 3264: NEXT;
1.135 daniel 3265: c = CUR;
3266: while (IS_CHAR(c) && (c != '\'')) {
3267: if (len + 1 >= size) {
3268: size *= 2;
1.204 ! veillard 3269: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3270: if (buf == NULL) {
3271: fprintf(stderr, "realloc of %d byte failed\n", size);
3272: return(NULL);
3273: }
3274: }
3275: buf[len++] = c;
3276: NEXT;
3277: c = CUR;
3278: }
1.152 daniel 3279: if (RAW != '\'') {
1.123 daniel 3280: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3281: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3282: ctxt->sax->error(ctxt->userData,
3283: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3284: ctxt->wellFormed = 0;
1.180 daniel 3285: ctxt->disableSAX = 1;
1.55 daniel 3286: } else {
1.45 daniel 3287: NEXT;
3288: }
3289: }
1.135 daniel 3290: return(buf);
1.45 daniel 3291: }
3292:
1.50 daniel 3293: /**
3294: * xmlParseNamespace:
3295: * @ctxt: an XML parser context
3296: *
1.45 daniel 3297: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3298: *
3299: * This is what the older xml-name Working Draft specified, a bunch of
3300: * other stuff may still rely on it, so support is still here as
1.127 daniel 3301: * if it was declared on the root of the Tree:-(
1.110 daniel 3302: *
3303: * To be removed at next drop of binary compatibility
1.45 daniel 3304: */
3305:
1.55 daniel 3306: void
3307: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3308: xmlChar *href = NULL;
3309: xmlChar *prefix = NULL;
1.45 daniel 3310: int garbage = 0;
3311:
3312: /*
3313: * We just skipped "namespace" or "xml:namespace"
3314: */
3315: SKIP_BLANKS;
3316:
1.153 daniel 3317: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3318: /*
3319: * We can have "ns" or "prefix" attributes
3320: * Old encoding as 'href' or 'AS' attributes is still supported
3321: */
1.152 daniel 3322: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3323: garbage = 0;
3324: SKIP(2);
3325: SKIP_BLANKS;
3326:
1.152 daniel 3327: if (RAW != '=') continue;
1.45 daniel 3328: NEXT;
3329: SKIP_BLANKS;
3330:
3331: href = xmlParseQuotedString(ctxt);
3332: SKIP_BLANKS;
1.152 daniel 3333: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3334: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3335: garbage = 0;
3336: SKIP(4);
3337: SKIP_BLANKS;
3338:
1.152 daniel 3339: if (RAW != '=') continue;
1.45 daniel 3340: NEXT;
3341: SKIP_BLANKS;
3342:
3343: href = xmlParseQuotedString(ctxt);
3344: SKIP_BLANKS;
1.152 daniel 3345: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3346: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3347: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3348: garbage = 0;
3349: SKIP(6);
3350: SKIP_BLANKS;
3351:
1.152 daniel 3352: if (RAW != '=') continue;
1.45 daniel 3353: NEXT;
3354: SKIP_BLANKS;
3355:
3356: prefix = xmlParseQuotedString(ctxt);
3357: SKIP_BLANKS;
1.152 daniel 3358: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3359: garbage = 0;
3360: SKIP(2);
3361: SKIP_BLANKS;
3362:
1.152 daniel 3363: if (RAW != '=') continue;
1.45 daniel 3364: NEXT;
3365: SKIP_BLANKS;
3366:
3367: prefix = xmlParseQuotedString(ctxt);
3368: SKIP_BLANKS;
1.152 daniel 3369: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3370: garbage = 0;
1.91 daniel 3371: NEXT;
1.45 daniel 3372: } else {
3373: /*
3374: * Found garbage when parsing the namespace
3375: */
1.122 daniel 3376: if (!garbage) {
1.55 daniel 3377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3378: ctxt->sax->error(ctxt->userData,
3379: "xmlParseNamespace found garbage\n");
3380: }
1.123 daniel 3381: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3382: ctxt->wellFormed = 0;
1.180 daniel 3383: ctxt->disableSAX = 1;
1.45 daniel 3384: NEXT;
3385: }
3386: }
3387:
3388: MOVETO_ENDTAG(CUR_PTR);
3389: NEXT;
3390:
3391: /*
3392: * Register the DTD.
1.72 daniel 3393: if (href != NULL)
3394: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3395: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3396: */
3397:
1.119 daniel 3398: if (prefix != NULL) xmlFree(prefix);
3399: if (href != NULL) xmlFree(href);
1.45 daniel 3400: }
3401:
1.28 daniel 3402: /************************************************************************
3403: * *
3404: * The parser itself *
3405: * Relates to http://www.w3.org/TR/REC-xml *
3406: * *
3407: ************************************************************************/
1.14 veillard 3408:
1.50 daniel 3409: /**
1.97 daniel 3410: * xmlScanName:
3411: * @ctxt: an XML parser context
3412: *
3413: * Trickery: parse an XML name but without consuming the input flow
3414: * Needed for rollback cases.
3415: *
3416: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3417: * CombiningChar | Extender
3418: *
3419: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3420: *
3421: * [6] Names ::= Name (S Name)*
3422: *
3423: * Returns the Name parsed or NULL
3424: */
3425:
1.123 daniel 3426: xmlChar *
1.97 daniel 3427: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3428: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3429: int len = 0;
3430:
3431: GROW;
1.152 daniel 3432: if (!IS_LETTER(RAW) && (RAW != '_') &&
3433: (RAW != ':')) {
1.97 daniel 3434: return(NULL);
3435: }
3436:
3437: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3438: (NXT(len) == '.') || (NXT(len) == '-') ||
3439: (NXT(len) == '_') || (NXT(len) == ':') ||
3440: (IS_COMBINING(NXT(len))) ||
3441: (IS_EXTENDER(NXT(len)))) {
3442: buf[len] = NXT(len);
3443: len++;
3444: if (len >= XML_MAX_NAMELEN) {
3445: fprintf(stderr,
3446: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3447: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3448: (NXT(len) == '.') || (NXT(len) == '-') ||
3449: (NXT(len) == '_') || (NXT(len) == ':') ||
3450: (IS_COMBINING(NXT(len))) ||
3451: (IS_EXTENDER(NXT(len))))
3452: len++;
3453: break;
3454: }
3455: }
3456: return(xmlStrndup(buf, len));
3457: }
3458:
3459: /**
1.50 daniel 3460: * xmlParseName:
3461: * @ctxt: an XML parser context
3462: *
3463: * parse an XML name.
1.22 daniel 3464: *
3465: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3466: * CombiningChar | Extender
3467: *
3468: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3469: *
3470: * [6] Names ::= Name (S Name)*
1.68 daniel 3471: *
3472: * Returns the Name parsed or NULL
1.1 veillard 3473: */
3474:
1.123 daniel 3475: xmlChar *
1.55 daniel 3476: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3477: xmlChar buf[XML_MAX_NAMELEN + 5];
3478: int len = 0, l;
3479: int c;
1.1 veillard 3480:
1.91 daniel 3481: GROW;
1.160 daniel 3482: c = CUR_CHAR(l);
1.190 daniel 3483: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3484: (!IS_LETTER(c) && (c != '_') &&
3485: (c != ':'))) {
1.91 daniel 3486: return(NULL);
3487: }
1.40 daniel 3488:
1.190 daniel 3489: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3490: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3491: (c == '.') || (c == '-') ||
3492: (c == '_') || (c == ':') ||
3493: (IS_COMBINING(c)) ||
3494: (IS_EXTENDER(c)))) {
1.160 daniel 3495: COPY_BUF(l,buf,len,c);
3496: NEXTL(l);
3497: c = CUR_CHAR(l);
1.91 daniel 3498: if (len >= XML_MAX_NAMELEN) {
3499: fprintf(stderr,
3500: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3501: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3502: (c == '.') || (c == '-') ||
3503: (c == '_') || (c == ':') ||
3504: (IS_COMBINING(c)) ||
3505: (IS_EXTENDER(c))) {
3506: NEXTL(l);
3507: c = CUR_CHAR(l);
1.97 daniel 3508: }
1.91 daniel 3509: break;
3510: }
3511: }
3512: return(xmlStrndup(buf, len));
1.22 daniel 3513: }
3514:
1.50 daniel 3515: /**
1.135 daniel 3516: * xmlParseStringName:
3517: * @ctxt: an XML parser context
3518: * @str: a pointer to an index in the string
3519: *
3520: * parse an XML name.
3521: *
3522: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3523: * CombiningChar | Extender
3524: *
3525: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3526: *
3527: * [6] Names ::= Name (S Name)*
3528: *
3529: * Returns the Name parsed or NULL. The str pointer
3530: * is updated to the current location in the string.
3531: */
3532:
3533: xmlChar *
3534: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3535: xmlChar buf[XML_MAX_NAMELEN + 5];
3536: const xmlChar *cur = *str;
3537: int len = 0, l;
3538: int c;
1.135 daniel 3539:
1.176 daniel 3540: c = CUR_SCHAR(cur, l);
3541: if (!IS_LETTER(c) && (c != '_') &&
3542: (c != ':')) {
1.135 daniel 3543: return(NULL);
3544: }
3545:
1.176 daniel 3546: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3547: (c == '.') || (c == '-') ||
3548: (c == '_') || (c == ':') ||
3549: (IS_COMBINING(c)) ||
3550: (IS_EXTENDER(c))) {
3551: COPY_BUF(l,buf,len,c);
3552: cur += l;
3553: c = CUR_SCHAR(cur, l);
3554: if (len >= XML_MAX_NAMELEN) {
3555: fprintf(stderr,
3556: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3557: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3558: (c == '.') || (c == '-') ||
3559: (c == '_') || (c == ':') ||
3560: (IS_COMBINING(c)) ||
3561: (IS_EXTENDER(c))) {
3562: cur += l;
3563: c = CUR_SCHAR(cur, l);
3564: }
3565: break;
3566: }
1.135 daniel 3567: }
1.176 daniel 3568: *str = cur;
3569: return(xmlStrndup(buf, len));
1.135 daniel 3570: }
3571:
3572: /**
1.50 daniel 3573: * xmlParseNmtoken:
3574: * @ctxt: an XML parser context
3575: *
3576: * parse an XML Nmtoken.
1.22 daniel 3577: *
3578: * [7] Nmtoken ::= (NameChar)+
3579: *
3580: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3581: *
3582: * Returns the Nmtoken parsed or NULL
1.22 daniel 3583: */
3584:
1.123 daniel 3585: xmlChar *
1.55 daniel 3586: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3587: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3588: int len = 0;
1.160 daniel 3589: int c,l;
1.22 daniel 3590:
1.91 daniel 3591: GROW;
1.160 daniel 3592: c = CUR_CHAR(l);
3593: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3594: (c == '.') || (c == '-') ||
3595: (c == '_') || (c == ':') ||
3596: (IS_COMBINING(c)) ||
3597: (IS_EXTENDER(c))) {
3598: COPY_BUF(l,buf,len,c);
3599: NEXTL(l);
3600: c = CUR_CHAR(l);
1.91 daniel 3601: if (len >= XML_MAX_NAMELEN) {
3602: fprintf(stderr,
3603: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3604: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3605: (c == '.') || (c == '-') ||
3606: (c == '_') || (c == ':') ||
3607: (IS_COMBINING(c)) ||
3608: (IS_EXTENDER(c))) {
3609: NEXTL(l);
3610: c = CUR_CHAR(l);
3611: }
1.91 daniel 3612: break;
3613: }
3614: }
1.168 daniel 3615: if (len == 0)
3616: return(NULL);
1.91 daniel 3617: return(xmlStrndup(buf, len));
1.1 veillard 3618: }
3619:
1.50 daniel 3620: /**
3621: * xmlParseEntityValue:
3622: * @ctxt: an XML parser context
1.78 daniel 3623: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3624: *
3625: * parse a value for ENTITY decl.
1.24 daniel 3626: *
3627: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3628: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3629: *
1.78 daniel 3630: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3631: */
3632:
1.123 daniel 3633: xmlChar *
3634: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3635: xmlChar *buf = NULL;
3636: int len = 0;
1.140 daniel 3637: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3638: int c, l;
1.135 daniel 3639: xmlChar stop;
1.123 daniel 3640: xmlChar *ret = NULL;
1.176 daniel 3641: const xmlChar *cur = NULL;
1.98 daniel 3642: xmlParserInputPtr input;
1.24 daniel 3643:
1.152 daniel 3644: if (RAW == '"') stop = '"';
3645: else if (RAW == '\'') stop = '\'';
1.135 daniel 3646: else {
3647: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3649: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3650: ctxt->wellFormed = 0;
1.180 daniel 3651: ctxt->disableSAX = 1;
1.135 daniel 3652: return(NULL);
3653: }
3654: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3655: if (buf == NULL) {
3656: fprintf(stderr, "malloc of %d byte failed\n", size);
3657: return(NULL);
3658: }
1.94 daniel 3659:
1.135 daniel 3660: /*
3661: * The content of the entity definition is copied in a buffer.
3662: */
1.94 daniel 3663:
1.135 daniel 3664: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3665: input = ctxt->input;
3666: GROW;
3667: NEXT;
1.152 daniel 3668: c = CUR_CHAR(l);
1.135 daniel 3669: /*
3670: * NOTE: 4.4.5 Included in Literal
3671: * When a parameter entity reference appears in a literal entity
3672: * value, ... a single or double quote character in the replacement
3673: * text is always treated as a normal data character and will not
3674: * terminate the literal.
3675: * In practice it means we stop the loop only when back at parsing
3676: * the initial entity and the quote is found
3677: */
3678: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3679: if (len + 5 >= size) {
1.135 daniel 3680: size *= 2;
1.204 ! veillard 3681: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3682: if (buf == NULL) {
3683: fprintf(stderr, "realloc of %d byte failed\n", size);
3684: return(NULL);
1.94 daniel 3685: }
1.79 daniel 3686: }
1.152 daniel 3687: COPY_BUF(l,buf,len,c);
3688: NEXTL(l);
1.98 daniel 3689: /*
1.135 daniel 3690: * Pop-up of finished entities.
1.98 daniel 3691: */
1.152 daniel 3692: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3693: xmlPopInput(ctxt);
1.152 daniel 3694:
3695: c = CUR_CHAR(l);
1.135 daniel 3696: if (c == 0) {
1.94 daniel 3697: GROW;
1.152 daniel 3698: c = CUR_CHAR(l);
1.79 daniel 3699: }
1.135 daniel 3700: }
3701: buf[len] = 0;
3702:
3703: /*
1.176 daniel 3704: * Raise problem w.r.t. '&' and '%' being used in non-entities
3705: * reference constructs. Note Charref will be handled in
3706: * xmlStringDecodeEntities()
3707: */
3708: cur = buf;
3709: while (*cur != 0) {
3710: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3711: xmlChar *name;
3712: xmlChar tmp = *cur;
3713:
3714: cur++;
3715: name = xmlParseStringName(ctxt, &cur);
3716: if ((name == NULL) || (*cur != ';')) {
3717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3718: ctxt->sax->error(ctxt->userData,
3719: "EntityValue: '%c' forbidden except for entities references\n",
3720: tmp);
3721: ctxt->wellFormed = 0;
1.180 daniel 3722: ctxt->disableSAX = 1;
1.176 daniel 3723: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3724: }
3725: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3726: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3727: ctxt->sax->error(ctxt->userData,
3728: "EntityValue: PEReferences forbidden in internal subset\n",
3729: tmp);
3730: ctxt->wellFormed = 0;
1.180 daniel 3731: ctxt->disableSAX = 1;
1.176 daniel 3732: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3733: }
3734: if (name != NULL)
3735: xmlFree(name);
3736: }
3737: cur++;
3738: }
3739:
3740: /*
1.135 daniel 3741: * Then PEReference entities are substituted.
3742: */
3743: if (c != stop) {
3744: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3746: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3747: ctxt->wellFormed = 0;
1.180 daniel 3748: ctxt->disableSAX = 1;
1.170 daniel 3749: xmlFree(buf);
1.135 daniel 3750: } else {
3751: NEXT;
3752: /*
3753: * NOTE: 4.4.7 Bypassed
3754: * When a general entity reference appears in the EntityValue in
3755: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3756: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3757: */
3758: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3759: 0, 0, 0);
3760: if (orig != NULL)
3761: *orig = buf;
3762: else
3763: xmlFree(buf);
1.24 daniel 3764: }
3765:
3766: return(ret);
3767: }
3768:
1.50 daniel 3769: /**
3770: * xmlParseAttValue:
3771: * @ctxt: an XML parser context
3772: *
3773: * parse a value for an attribute
1.78 daniel 3774: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3775: * will be handled later in xmlStringGetNodeList
1.29 daniel 3776: *
3777: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3778: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3779: *
1.129 daniel 3780: * 3.3.3 Attribute-Value Normalization:
3781: * Before the value of an attribute is passed to the application or
3782: * checked for validity, the XML processor must normalize it as follows:
3783: * - a character reference is processed by appending the referenced
3784: * character to the attribute value
3785: * - an entity reference is processed by recursively processing the
3786: * replacement text of the entity
3787: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3788: * appending #x20 to the normalized value, except that only a single
3789: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3790: * parsed entity or the literal entity value of an internal parsed entity
3791: * - other characters are processed by appending them to the normalized value
1.130 daniel 3792: * If the declared value is not CDATA, then the XML processor must further
3793: * process the normalized attribute value by discarding any leading and
3794: * trailing space (#x20) characters, and by replacing sequences of space
3795: * (#x20) characters by a single space (#x20) character.
3796: * All attributes for which no declaration has been read should be treated
3797: * by a non-validating parser as if declared CDATA.
1.129 daniel 3798: *
3799: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3800: */
3801:
1.123 daniel 3802: xmlChar *
1.55 daniel 3803: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3804: xmlChar limit = 0;
1.198 daniel 3805: xmlChar *buf = NULL;
3806: int len = 0;
3807: int buf_size = 0;
3808: int c, l;
1.129 daniel 3809: xmlChar *current = NULL;
3810: xmlEntityPtr ent;
3811:
1.29 daniel 3812:
1.91 daniel 3813: SHRINK;
1.151 daniel 3814: if (NXT(0) == '"') {
1.96 daniel 3815: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3816: limit = '"';
1.40 daniel 3817: NEXT;
1.151 daniel 3818: } else if (NXT(0) == '\'') {
1.129 daniel 3819: limit = '\'';
1.96 daniel 3820: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3821: NEXT;
1.29 daniel 3822: } else {
1.123 daniel 3823: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3824: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3825: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3826: ctxt->wellFormed = 0;
1.180 daniel 3827: ctxt->disableSAX = 1;
1.129 daniel 3828: return(NULL);
1.29 daniel 3829: }
3830:
1.129 daniel 3831: /*
3832: * allocate a translation buffer.
3833: */
1.198 daniel 3834: buf_size = XML_PARSER_BUFFER_SIZE;
3835: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
3836: if (buf == NULL) {
1.129 daniel 3837: perror("xmlParseAttValue: malloc failed");
3838: return(NULL);
3839: }
3840:
3841: /*
3842: * Ok loop until we reach one of the ending char or a size limit.
3843: */
1.198 daniel 3844: c = CUR_CHAR(l);
3845: while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
3846: if (c == 0) break;
3847: if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 3848: int val = xmlParseCharRef(ctxt);
1.198 daniel 3849: COPY_BUF(l,buf,len,val);
3850: NEXTL(l);
3851: } else if (c == '&') {
1.129 daniel 3852: ent = xmlParseEntityRef(ctxt);
3853: if ((ent != NULL) &&
3854: (ctxt->replaceEntities != 0)) {
1.185 daniel 3855: xmlChar *rep;
3856:
1.186 daniel 3857: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3858: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3859: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3860: if (rep != NULL) {
3861: current = rep;
3862: while (*current != 0) {
1.198 daniel 3863: buf[len++] = *current++;
3864: if (len > buf_size - 10) {
3865: growBuffer(buf);
1.186 daniel 3866: }
1.185 daniel 3867: }
1.186 daniel 3868: xmlFree(rep);
1.129 daniel 3869: }
1.186 daniel 3870: } else {
3871: if (ent->content != NULL)
1.198 daniel 3872: buf[len++] = ent->content[0];
1.129 daniel 3873: }
3874: } else if (ent != NULL) {
3875: int i = xmlStrlen(ent->name);
3876: const xmlChar *cur = ent->name;
3877:
1.186 daniel 3878: /*
3879: * This may look absurd but is needed to detect
3880: * entities problems
3881: */
3882: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3883: xmlChar *rep;
3884: rep = xmlStringDecodeEntities(ctxt, ent->content,
3885: XML_SUBSTITUTE_REF, 0, 0, 0);
3886: if (rep != NULL)
3887: xmlFree(rep);
3888: }
3889:
3890: /*
3891: * Just output the reference
3892: */
1.198 daniel 3893: buf[len++] = '&';
3894: if (len > buf_size - i - 10) {
3895: growBuffer(buf);
1.129 daniel 3896: }
3897: for (;i > 0;i--)
1.198 daniel 3898: buf[len++] = *cur++;
3899: buf[len++] = ';';
1.129 daniel 3900: }
3901: } else {
1.198 daniel 3902: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3903: COPY_BUF(l,buf,len,0x20);
3904: if (len > buf_size - 10) {
3905: growBuffer(buf);
1.129 daniel 3906: }
3907: } else {
1.198 daniel 3908: COPY_BUF(l,buf,len,c);
3909: if (len > buf_size - 10) {
3910: growBuffer(buf);
1.129 daniel 3911: }
3912: }
1.198 daniel 3913: NEXTL(l);
1.129 daniel 3914: }
1.198 daniel 3915: GROW;
3916: c = CUR_CHAR(l);
1.129 daniel 3917: }
1.198 daniel 3918: buf[len++] = 0;
1.152 daniel 3919: if (RAW == '<') {
1.129 daniel 3920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3921: ctxt->sax->error(ctxt->userData,
3922: "Unescaped '<' not allowed in attributes values\n");
3923: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3924: ctxt->wellFormed = 0;
1.180 daniel 3925: ctxt->disableSAX = 1;
1.152 daniel 3926: } else if (RAW != limit) {
1.129 daniel 3927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3928: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3929: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3930: ctxt->wellFormed = 0;
1.180 daniel 3931: ctxt->disableSAX = 1;
1.129 daniel 3932: } else
3933: NEXT;
1.198 daniel 3934: return(buf);
1.29 daniel 3935: }
3936:
1.50 daniel 3937: /**
3938: * xmlParseSystemLiteral:
3939: * @ctxt: an XML parser context
3940: *
3941: * parse an XML Literal
1.21 daniel 3942: *
1.22 daniel 3943: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3944: *
3945: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3946: */
3947:
1.123 daniel 3948: xmlChar *
1.55 daniel 3949: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3950: xmlChar *buf = NULL;
3951: int len = 0;
1.140 daniel 3952: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3953: int cur, l;
1.135 daniel 3954: xmlChar stop;
1.168 daniel 3955: int state = ctxt->instate;
1.21 daniel 3956:
1.91 daniel 3957: SHRINK;
1.152 daniel 3958: if (RAW == '"') {
1.40 daniel 3959: NEXT;
1.135 daniel 3960: stop = '"';
1.152 daniel 3961: } else if (RAW == '\'') {
1.40 daniel 3962: NEXT;
1.135 daniel 3963: stop = '\'';
1.21 daniel 3964: } else {
1.55 daniel 3965: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3966: ctxt->sax->error(ctxt->userData,
3967: "SystemLiteral \" or ' expected\n");
1.123 daniel 3968: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3969: ctxt->wellFormed = 0;
1.180 daniel 3970: ctxt->disableSAX = 1;
1.135 daniel 3971: return(NULL);
1.21 daniel 3972: }
3973:
1.135 daniel 3974: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3975: if (buf == NULL) {
3976: fprintf(stderr, "malloc of %d byte failed\n", size);
3977: return(NULL);
3978: }
1.168 daniel 3979: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3980: cur = CUR_CHAR(l);
1.135 daniel 3981: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3982: if (len + 5 >= size) {
1.135 daniel 3983: size *= 2;
1.204 ! veillard 3984: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3985: if (buf == NULL) {
3986: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 ! veillard 3987: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 3988: return(NULL);
3989: }
3990: }
1.152 daniel 3991: COPY_BUF(l,buf,len,cur);
3992: NEXTL(l);
3993: cur = CUR_CHAR(l);
1.135 daniel 3994: if (cur == 0) {
3995: GROW;
3996: SHRINK;
1.152 daniel 3997: cur = CUR_CHAR(l);
1.135 daniel 3998: }
3999: }
4000: buf[len] = 0;
1.204 ! veillard 4001: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4002: if (!IS_CHAR(cur)) {
4003: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4004: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4005: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4006: ctxt->wellFormed = 0;
1.180 daniel 4007: ctxt->disableSAX = 1;
1.135 daniel 4008: } else {
4009: NEXT;
4010: }
4011: return(buf);
1.21 daniel 4012: }
4013:
1.50 daniel 4014: /**
4015: * xmlParsePubidLiteral:
4016: * @ctxt: an XML parser context
1.21 daniel 4017: *
1.50 daniel 4018: * parse an XML public literal
1.68 daniel 4019: *
4020: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4021: *
4022: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 4023: */
4024:
1.123 daniel 4025: xmlChar *
1.55 daniel 4026: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4027: xmlChar *buf = NULL;
4028: int len = 0;
1.140 daniel 4029: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 4030: xmlChar cur;
4031: xmlChar stop;
1.125 daniel 4032:
1.91 daniel 4033: SHRINK;
1.152 daniel 4034: if (RAW == '"') {
1.40 daniel 4035: NEXT;
1.135 daniel 4036: stop = '"';
1.152 daniel 4037: } else if (RAW == '\'') {
1.40 daniel 4038: NEXT;
1.135 daniel 4039: stop = '\'';
1.21 daniel 4040: } else {
1.55 daniel 4041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4042: ctxt->sax->error(ctxt->userData,
4043: "SystemLiteral \" or ' expected\n");
1.123 daniel 4044: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4045: ctxt->wellFormed = 0;
1.180 daniel 4046: ctxt->disableSAX = 1;
1.135 daniel 4047: return(NULL);
4048: }
4049: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4050: if (buf == NULL) {
4051: fprintf(stderr, "malloc of %d byte failed\n", size);
4052: return(NULL);
4053: }
4054: cur = CUR;
4055: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4056: if (len + 1 >= size) {
4057: size *= 2;
1.204 ! veillard 4058: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4059: if (buf == NULL) {
4060: fprintf(stderr, "realloc of %d byte failed\n", size);
4061: return(NULL);
4062: }
4063: }
4064: buf[len++] = cur;
4065: NEXT;
4066: cur = CUR;
4067: if (cur == 0) {
4068: GROW;
4069: SHRINK;
4070: cur = CUR;
4071: }
4072: }
4073: buf[len] = 0;
4074: if (cur != stop) {
4075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4076: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4077: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4078: ctxt->wellFormed = 0;
1.180 daniel 4079: ctxt->disableSAX = 1;
1.135 daniel 4080: } else {
4081: NEXT;
1.21 daniel 4082: }
1.135 daniel 4083: return(buf);
1.21 daniel 4084: }
4085:
1.50 daniel 4086: /**
4087: * xmlParseCharData:
4088: * @ctxt: an XML parser context
4089: * @cdata: int indicating whether we are within a CDATA section
4090: *
4091: * parse a CharData section.
4092: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4093: *
1.151 daniel 4094: * The right angle bracket (>) may be represented using the string ">",
4095: * and must, for compatibility, be escaped using ">" or a character
4096: * reference when it appears in the string "]]>" in content, when that
4097: * string is not marking the end of a CDATA section.
4098: *
1.27 daniel 4099: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4100: */
4101:
1.55 daniel 4102: void
4103: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4104: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4105: int nbchar = 0;
1.152 daniel 4106: int cur, l;
1.27 daniel 4107:
1.91 daniel 4108: SHRINK;
1.152 daniel 4109: cur = CUR_CHAR(l);
1.190 daniel 4110: while (((cur != '<') || (ctxt->token == '<')) &&
4111: ((cur != '&') || (ctxt->token == '&')) &&
4112: (IS_CHAR(cur))) {
1.97 daniel 4113: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4114: (NXT(2) == '>')) {
4115: if (cdata) break;
4116: else {
4117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4118: ctxt->sax->error(ctxt->userData,
1.59 daniel 4119: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4120: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4121: /* Should this be relaxed ??? I see a "must here */
4122: ctxt->wellFormed = 0;
1.180 daniel 4123: ctxt->disableSAX = 1;
1.59 daniel 4124: }
4125: }
1.152 daniel 4126: COPY_BUF(l,buf,nbchar,cur);
4127: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4128: /*
4129: * Ok the segment is to be consumed as chars.
4130: */
1.171 daniel 4131: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4132: if (areBlanks(ctxt, buf, nbchar)) {
4133: if (ctxt->sax->ignorableWhitespace != NULL)
4134: ctxt->sax->ignorableWhitespace(ctxt->userData,
4135: buf, nbchar);
4136: } else {
4137: if (ctxt->sax->characters != NULL)
4138: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4139: }
4140: }
4141: nbchar = 0;
4142: }
1.152 daniel 4143: NEXTL(l);
4144: cur = CUR_CHAR(l);
1.27 daniel 4145: }
1.91 daniel 4146: if (nbchar != 0) {
4147: /*
4148: * Ok the segment is to be consumed as chars.
4149: */
1.171 daniel 4150: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4151: if (areBlanks(ctxt, buf, nbchar)) {
4152: if (ctxt->sax->ignorableWhitespace != NULL)
4153: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4154: } else {
4155: if (ctxt->sax->characters != NULL)
4156: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4157: }
4158: }
1.45 daniel 4159: }
1.27 daniel 4160: }
4161:
1.50 daniel 4162: /**
4163: * xmlParseExternalID:
4164: * @ctxt: an XML parser context
1.123 daniel 4165: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4166: * @strict: indicate whether we should restrict parsing to only
4167: * production [75], see NOTE below
1.50 daniel 4168: *
1.67 daniel 4169: * Parse an External ID or a Public ID
4170: *
4171: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4172: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4173: *
4174: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4175: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4176: *
4177: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4178: *
1.68 daniel 4179: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4180: * case publicID receives PubidLiteral, is strict is off
4181: * it is possible to return NULL and have publicID set.
1.22 daniel 4182: */
4183:
1.123 daniel 4184: xmlChar *
4185: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4186: xmlChar *URI = NULL;
1.22 daniel 4187:
1.91 daniel 4188: SHRINK;
1.152 daniel 4189: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4190: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4191: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4192: SKIP(6);
1.59 daniel 4193: if (!IS_BLANK(CUR)) {
4194: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4195: ctxt->sax->error(ctxt->userData,
1.59 daniel 4196: "Space required after 'SYSTEM'\n");
1.123 daniel 4197: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4198: ctxt->wellFormed = 0;
1.180 daniel 4199: ctxt->disableSAX = 1;
1.59 daniel 4200: }
1.42 daniel 4201: SKIP_BLANKS;
1.39 daniel 4202: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4203: if (URI == NULL) {
1.55 daniel 4204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4205: ctxt->sax->error(ctxt->userData,
1.39 daniel 4206: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4207: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4208: ctxt->wellFormed = 0;
1.180 daniel 4209: ctxt->disableSAX = 1;
1.59 daniel 4210: }
1.152 daniel 4211: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4212: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4213: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4214: SKIP(6);
1.59 daniel 4215: if (!IS_BLANK(CUR)) {
4216: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4217: ctxt->sax->error(ctxt->userData,
1.59 daniel 4218: "Space required after 'PUBLIC'\n");
1.123 daniel 4219: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4220: ctxt->wellFormed = 0;
1.180 daniel 4221: ctxt->disableSAX = 1;
1.59 daniel 4222: }
1.42 daniel 4223: SKIP_BLANKS;
1.39 daniel 4224: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4225: if (*publicID == NULL) {
1.55 daniel 4226: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4227: ctxt->sax->error(ctxt->userData,
1.39 daniel 4228: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4229: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4230: ctxt->wellFormed = 0;
1.180 daniel 4231: ctxt->disableSAX = 1;
1.59 daniel 4232: }
1.67 daniel 4233: if (strict) {
4234: /*
4235: * We don't handle [83] so "S SystemLiteral" is required.
4236: */
4237: if (!IS_BLANK(CUR)) {
4238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4239: ctxt->sax->error(ctxt->userData,
1.67 daniel 4240: "Space required after the Public Identifier\n");
1.123 daniel 4241: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4242: ctxt->wellFormed = 0;
1.180 daniel 4243: ctxt->disableSAX = 1;
1.67 daniel 4244: }
4245: } else {
4246: /*
4247: * We handle [83] so we return immediately, if
4248: * "S SystemLiteral" is not detected. From a purely parsing
4249: * point of view that's a nice mess.
4250: */
1.135 daniel 4251: const xmlChar *ptr;
4252: GROW;
4253:
4254: ptr = CUR_PTR;
1.67 daniel 4255: if (!IS_BLANK(*ptr)) return(NULL);
4256:
4257: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4258: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4259: }
1.42 daniel 4260: SKIP_BLANKS;
1.39 daniel 4261: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4262: if (URI == NULL) {
1.55 daniel 4263: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4264: ctxt->sax->error(ctxt->userData,
1.39 daniel 4265: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4266: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4267: ctxt->wellFormed = 0;
1.180 daniel 4268: ctxt->disableSAX = 1;
1.59 daniel 4269: }
1.22 daniel 4270: }
1.39 daniel 4271: return(URI);
1.22 daniel 4272: }
4273:
1.50 daniel 4274: /**
4275: * xmlParseComment:
1.69 daniel 4276: * @ctxt: an XML parser context
1.50 daniel 4277: *
1.3 veillard 4278: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4279: * The spec says that "For compatibility, the string "--" (double-hyphen)
4280: * must not occur within comments. "
1.22 daniel 4281: *
4282: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4283: */
1.72 daniel 4284: void
1.114 daniel 4285: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4286: xmlChar *buf = NULL;
1.195 daniel 4287: int len;
1.140 daniel 4288: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4289: int q, ql;
4290: int r, rl;
4291: int cur, l;
1.140 daniel 4292: xmlParserInputState state;
1.187 daniel 4293: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4294:
4295: /*
1.22 daniel 4296: * Check that there is a comment right here.
1.3 veillard 4297: */
1.152 daniel 4298: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4299: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4300:
1.140 daniel 4301: state = ctxt->instate;
1.97 daniel 4302: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4303: SHRINK;
1.40 daniel 4304: SKIP(4);
1.135 daniel 4305: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4306: if (buf == NULL) {
4307: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4308: ctxt->instate = state;
1.135 daniel 4309: return;
4310: }
1.152 daniel 4311: q = CUR_CHAR(ql);
4312: NEXTL(ql);
4313: r = CUR_CHAR(rl);
4314: NEXTL(rl);
4315: cur = CUR_CHAR(l);
1.195 daniel 4316: len = 0;
1.135 daniel 4317: while (IS_CHAR(cur) &&
4318: ((cur != '>') ||
4319: (r != '-') || (q != '-'))) {
1.195 daniel 4320: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4321: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4322: ctxt->sax->error(ctxt->userData,
1.38 daniel 4323: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4324: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4325: ctxt->wellFormed = 0;
1.180 daniel 4326: ctxt->disableSAX = 1;
1.59 daniel 4327: }
1.152 daniel 4328: if (len + 5 >= size) {
1.135 daniel 4329: size *= 2;
1.204 ! veillard 4330: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4331: if (buf == NULL) {
4332: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4333: ctxt->instate = state;
1.135 daniel 4334: return;
4335: }
4336: }
1.152 daniel 4337: COPY_BUF(ql,buf,len,q);
1.135 daniel 4338: q = r;
1.152 daniel 4339: ql = rl;
1.135 daniel 4340: r = cur;
1.152 daniel 4341: rl = l;
4342: NEXTL(l);
4343: cur = CUR_CHAR(l);
1.135 daniel 4344: if (cur == 0) {
4345: SHRINK;
4346: GROW;
1.152 daniel 4347: cur = CUR_CHAR(l);
1.135 daniel 4348: }
1.3 veillard 4349: }
1.135 daniel 4350: buf[len] = 0;
4351: if (!IS_CHAR(cur)) {
1.55 daniel 4352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4353: ctxt->sax->error(ctxt->userData,
1.135 daniel 4354: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4355: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4356: ctxt->wellFormed = 0;
1.180 daniel 4357: ctxt->disableSAX = 1;
1.178 daniel 4358: xmlFree(buf);
1.3 veillard 4359: } else {
1.187 daniel 4360: if (input != ctxt->input) {
4361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4362: ctxt->sax->error(ctxt->userData,
4363: "Comment doesn't start and stop in the same entity\n");
4364: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4365: ctxt->wellFormed = 0;
4366: ctxt->disableSAX = 1;
4367: }
1.40 daniel 4368: NEXT;
1.171 daniel 4369: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4370: (!ctxt->disableSAX))
1.135 daniel 4371: ctxt->sax->comment(ctxt->userData, buf);
4372: xmlFree(buf);
1.3 veillard 4373: }
1.140 daniel 4374: ctxt->instate = state;
1.3 veillard 4375: }
4376:
1.50 daniel 4377: /**
4378: * xmlParsePITarget:
4379: * @ctxt: an XML parser context
4380: *
4381: * parse the name of a PI
1.22 daniel 4382: *
4383: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4384: *
4385: * Returns the PITarget name or NULL
1.22 daniel 4386: */
4387:
1.123 daniel 4388: xmlChar *
1.55 daniel 4389: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4390: xmlChar *name;
1.22 daniel 4391:
4392: name = xmlParseName(ctxt);
1.139 daniel 4393: if ((name != NULL) &&
1.22 daniel 4394: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4395: ((name[1] == 'm') || (name[1] == 'M')) &&
4396: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4397: int i;
1.177 daniel 4398: if ((name[0] == 'x') && (name[1] == 'm') &&
4399: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4401: ctxt->sax->error(ctxt->userData,
4402: "XML declaration allowed only at the start of the document\n");
4403: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4404: ctxt->wellFormed = 0;
1.180 daniel 4405: ctxt->disableSAX = 1;
1.151 daniel 4406: return(name);
4407: } else if (name[3] == 0) {
4408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4409: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4410: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4411: ctxt->wellFormed = 0;
1.180 daniel 4412: ctxt->disableSAX = 1;
1.151 daniel 4413: return(name);
4414: }
1.139 daniel 4415: for (i = 0;;i++) {
4416: if (xmlW3CPIs[i] == NULL) break;
4417: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4418: return(name);
4419: }
4420: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4421: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4422: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4423: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4424: }
1.22 daniel 4425: }
4426: return(name);
4427: }
4428:
1.50 daniel 4429: /**
4430: * xmlParsePI:
4431: * @ctxt: an XML parser context
4432: *
4433: * parse an XML Processing Instruction.
1.22 daniel 4434: *
4435: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4436: *
1.69 daniel 4437: * The processing is transfered to SAX once parsed.
1.3 veillard 4438: */
4439:
1.55 daniel 4440: void
4441: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4442: xmlChar *buf = NULL;
4443: int len = 0;
1.140 daniel 4444: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4445: int cur, l;
1.123 daniel 4446: xmlChar *target;
1.140 daniel 4447: xmlParserInputState state;
1.22 daniel 4448:
1.152 daniel 4449: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4450: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4451: state = ctxt->instate;
4452: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4453: /*
4454: * this is a Processing Instruction.
4455: */
1.40 daniel 4456: SKIP(2);
1.91 daniel 4457: SHRINK;
1.3 veillard 4458:
4459: /*
1.22 daniel 4460: * Parse the target name and check for special support like
4461: * namespace.
1.3 veillard 4462: */
1.22 daniel 4463: target = xmlParsePITarget(ctxt);
4464: if (target != NULL) {
1.156 daniel 4465: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4466: if (input != ctxt->input) {
4467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4468: ctxt->sax->error(ctxt->userData,
4469: "PI declaration doesn't start and stop in the same entity\n");
4470: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4471: ctxt->wellFormed = 0;
4472: ctxt->disableSAX = 1;
4473: }
1.156 daniel 4474: SKIP(2);
4475:
4476: /*
4477: * SAX: PI detected.
4478: */
1.171 daniel 4479: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4480: (ctxt->sax->processingInstruction != NULL))
4481: ctxt->sax->processingInstruction(ctxt->userData,
4482: target, NULL);
4483: ctxt->instate = state;
1.170 daniel 4484: xmlFree(target);
1.156 daniel 4485: return;
4486: }
1.135 daniel 4487: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4488: if (buf == NULL) {
4489: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4490: ctxt->instate = state;
1.135 daniel 4491: return;
4492: }
4493: cur = CUR;
4494: if (!IS_BLANK(cur)) {
1.114 daniel 4495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4496: ctxt->sax->error(ctxt->userData,
4497: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4498: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4499: ctxt->wellFormed = 0;
1.180 daniel 4500: ctxt->disableSAX = 1;
1.114 daniel 4501: }
4502: SKIP_BLANKS;
1.152 daniel 4503: cur = CUR_CHAR(l);
1.135 daniel 4504: while (IS_CHAR(cur) &&
4505: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4506: if (len + 5 >= size) {
1.135 daniel 4507: size *= 2;
1.204 ! veillard 4508: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4509: if (buf == NULL) {
4510: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4511: ctxt->instate = state;
1.135 daniel 4512: return;
4513: }
4514: }
1.152 daniel 4515: COPY_BUF(l,buf,len,cur);
4516: NEXTL(l);
4517: cur = CUR_CHAR(l);
1.135 daniel 4518: if (cur == 0) {
4519: SHRINK;
4520: GROW;
1.152 daniel 4521: cur = CUR_CHAR(l);
1.135 daniel 4522: }
4523: }
4524: buf[len] = 0;
1.152 daniel 4525: if (cur != '?') {
1.72 daniel 4526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4527: ctxt->sax->error(ctxt->userData,
1.72 daniel 4528: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4529: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4530: ctxt->wellFormed = 0;
1.180 daniel 4531: ctxt->disableSAX = 1;
1.22 daniel 4532: } else {
1.187 daniel 4533: if (input != ctxt->input) {
4534: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4535: ctxt->sax->error(ctxt->userData,
4536: "PI declaration doesn't start and stop in the same entity\n");
4537: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4538: ctxt->wellFormed = 0;
4539: ctxt->disableSAX = 1;
4540: }
1.72 daniel 4541: SKIP(2);
1.44 daniel 4542:
1.72 daniel 4543: /*
4544: * SAX: PI detected.
4545: */
1.171 daniel 4546: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4547: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4548: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4549: target, buf);
1.22 daniel 4550: }
1.135 daniel 4551: xmlFree(buf);
1.119 daniel 4552: xmlFree(target);
1.3 veillard 4553: } else {
1.55 daniel 4554: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4555: ctxt->sax->error(ctxt->userData,
4556: "xmlParsePI : no target name\n");
1.123 daniel 4557: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4558: ctxt->wellFormed = 0;
1.180 daniel 4559: ctxt->disableSAX = 1;
1.22 daniel 4560: }
1.140 daniel 4561: ctxt->instate = state;
1.22 daniel 4562: }
4563: }
4564:
1.50 daniel 4565: /**
4566: * xmlParseNotationDecl:
4567: * @ctxt: an XML parser context
4568: *
4569: * parse a notation declaration
1.22 daniel 4570: *
4571: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4572: *
4573: * Hence there is actually 3 choices:
4574: * 'PUBLIC' S PubidLiteral
4575: * 'PUBLIC' S PubidLiteral S SystemLiteral
4576: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4577: *
1.67 daniel 4578: * See the NOTE on xmlParseExternalID().
1.22 daniel 4579: */
4580:
1.55 daniel 4581: void
4582: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4583: xmlChar *name;
4584: xmlChar *Pubid;
4585: xmlChar *Systemid;
1.22 daniel 4586:
1.152 daniel 4587: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4588: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4589: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4590: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4591: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4592: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4593: SHRINK;
1.40 daniel 4594: SKIP(10);
1.67 daniel 4595: if (!IS_BLANK(CUR)) {
4596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4597: ctxt->sax->error(ctxt->userData,
4598: "Space required after '<!NOTATION'\n");
1.123 daniel 4599: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4600: ctxt->wellFormed = 0;
1.180 daniel 4601: ctxt->disableSAX = 1;
1.67 daniel 4602: return;
4603: }
4604: SKIP_BLANKS;
1.22 daniel 4605:
4606: name = xmlParseName(ctxt);
4607: if (name == NULL) {
1.55 daniel 4608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4609: ctxt->sax->error(ctxt->userData,
4610: "NOTATION: Name expected here\n");
1.123 daniel 4611: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4612: ctxt->wellFormed = 0;
1.180 daniel 4613: ctxt->disableSAX = 1;
1.67 daniel 4614: return;
4615: }
4616: if (!IS_BLANK(CUR)) {
4617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4618: ctxt->sax->error(ctxt->userData,
1.67 daniel 4619: "Space required after the NOTATION name'\n");
1.123 daniel 4620: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4621: ctxt->wellFormed = 0;
1.180 daniel 4622: ctxt->disableSAX = 1;
1.22 daniel 4623: return;
4624: }
1.42 daniel 4625: SKIP_BLANKS;
1.67 daniel 4626:
1.22 daniel 4627: /*
1.67 daniel 4628: * Parse the IDs.
1.22 daniel 4629: */
1.160 daniel 4630: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4631: SKIP_BLANKS;
4632:
1.152 daniel 4633: if (RAW == '>') {
1.187 daniel 4634: if (input != ctxt->input) {
4635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4636: ctxt->sax->error(ctxt->userData,
4637: "Notation declaration doesn't start and stop in the same entity\n");
4638: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4639: ctxt->wellFormed = 0;
4640: ctxt->disableSAX = 1;
4641: }
1.40 daniel 4642: NEXT;
1.171 daniel 4643: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4644: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4645: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4646: } else {
4647: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4648: ctxt->sax->error(ctxt->userData,
1.67 daniel 4649: "'>' required to close NOTATION declaration\n");
1.123 daniel 4650: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4651: ctxt->wellFormed = 0;
1.180 daniel 4652: ctxt->disableSAX = 1;
1.67 daniel 4653: }
1.119 daniel 4654: xmlFree(name);
4655: if (Systemid != NULL) xmlFree(Systemid);
4656: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4657: }
4658: }
4659:
1.50 daniel 4660: /**
4661: * xmlParseEntityDecl:
4662: * @ctxt: an XML parser context
4663: *
4664: * parse <!ENTITY declarations
1.22 daniel 4665: *
4666: * [70] EntityDecl ::= GEDecl | PEDecl
4667: *
4668: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4669: *
4670: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4671: *
4672: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4673: *
4674: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4675: *
4676: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4677: *
4678: * [ VC: Notation Declared ]
1.116 daniel 4679: * The Name must match the declared name of a notation.
1.22 daniel 4680: */
4681:
1.55 daniel 4682: void
4683: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4684: xmlChar *name = NULL;
4685: xmlChar *value = NULL;
4686: xmlChar *URI = NULL, *literal = NULL;
4687: xmlChar *ndata = NULL;
1.39 daniel 4688: int isParameter = 0;
1.123 daniel 4689: xmlChar *orig = NULL;
1.22 daniel 4690:
1.94 daniel 4691: GROW;
1.152 daniel 4692: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4693: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4694: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4695: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4696: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4697: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4698: SHRINK;
1.40 daniel 4699: SKIP(8);
1.59 daniel 4700: if (!IS_BLANK(CUR)) {
4701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4702: ctxt->sax->error(ctxt->userData,
4703: "Space required after '<!ENTITY'\n");
1.123 daniel 4704: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4705: ctxt->wellFormed = 0;
1.180 daniel 4706: ctxt->disableSAX = 1;
1.59 daniel 4707: }
4708: SKIP_BLANKS;
1.40 daniel 4709:
1.152 daniel 4710: if (RAW == '%') {
1.40 daniel 4711: NEXT;
1.59 daniel 4712: if (!IS_BLANK(CUR)) {
4713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4714: ctxt->sax->error(ctxt->userData,
4715: "Space required after '%'\n");
1.123 daniel 4716: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4717: ctxt->wellFormed = 0;
1.180 daniel 4718: ctxt->disableSAX = 1;
1.59 daniel 4719: }
1.42 daniel 4720: SKIP_BLANKS;
1.39 daniel 4721: isParameter = 1;
1.22 daniel 4722: }
4723:
4724: name = xmlParseName(ctxt);
1.24 daniel 4725: if (name == NULL) {
1.55 daniel 4726: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4727: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4728: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4729: ctxt->wellFormed = 0;
1.180 daniel 4730: ctxt->disableSAX = 1;
1.24 daniel 4731: return;
4732: }
1.59 daniel 4733: if (!IS_BLANK(CUR)) {
4734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4735: ctxt->sax->error(ctxt->userData,
1.59 daniel 4736: "Space required after the entity name\n");
1.123 daniel 4737: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4738: ctxt->wellFormed = 0;
1.180 daniel 4739: ctxt->disableSAX = 1;
1.59 daniel 4740: }
1.42 daniel 4741: SKIP_BLANKS;
1.24 daniel 4742:
1.22 daniel 4743: /*
1.68 daniel 4744: * handle the various case of definitions...
1.22 daniel 4745: */
1.39 daniel 4746: if (isParameter) {
1.152 daniel 4747: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4748: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4749: if (value) {
1.171 daniel 4750: if ((ctxt->sax != NULL) &&
4751: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4752: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4753: XML_INTERNAL_PARAMETER_ENTITY,
4754: NULL, NULL, value);
4755: }
1.24 daniel 4756: else {
1.67 daniel 4757: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4758: if ((URI == NULL) && (literal == NULL)) {
4759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4760: ctxt->sax->error(ctxt->userData,
4761: "Entity value required\n");
4762: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4763: ctxt->wellFormed = 0;
1.180 daniel 4764: ctxt->disableSAX = 1;
1.169 daniel 4765: }
1.39 daniel 4766: if (URI) {
1.193 daniel 4767: xmlURIPtr uri;
4768:
4769: uri = xmlParseURI((const char *) URI);
4770: if (uri == NULL) {
4771: if ((ctxt->sax != NULL) &&
4772: (!ctxt->disableSAX) &&
4773: (ctxt->sax->error != NULL))
4774: ctxt->sax->error(ctxt->userData,
4775: "Invalid URI: %s\n", URI);
4776: ctxt->wellFormed = 0;
4777: ctxt->errNo = XML_ERR_INVALID_URI;
4778: } else {
4779: if (uri->fragment != NULL) {
4780: if ((ctxt->sax != NULL) &&
4781: (!ctxt->disableSAX) &&
4782: (ctxt->sax->error != NULL))
4783: ctxt->sax->error(ctxt->userData,
4784: "Fragment not allowed: %s\n", URI);
4785: ctxt->wellFormed = 0;
4786: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4787: } else {
4788: if ((ctxt->sax != NULL) &&
4789: (!ctxt->disableSAX) &&
4790: (ctxt->sax->entityDecl != NULL))
4791: ctxt->sax->entityDecl(ctxt->userData, name,
4792: XML_EXTERNAL_PARAMETER_ENTITY,
4793: literal, URI, NULL);
4794: }
4795: xmlFreeURI(uri);
4796: }
1.39 daniel 4797: }
1.24 daniel 4798: }
4799: } else {
1.152 daniel 4800: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4801: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4802: if ((ctxt->sax != NULL) &&
4803: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4804: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4805: XML_INTERNAL_GENERAL_ENTITY,
4806: NULL, NULL, value);
4807: } else {
1.67 daniel 4808: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4809: if ((URI == NULL) && (literal == NULL)) {
4810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4811: ctxt->sax->error(ctxt->userData,
4812: "Entity value required\n");
4813: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4814: ctxt->wellFormed = 0;
1.180 daniel 4815: ctxt->disableSAX = 1;
1.169 daniel 4816: }
1.193 daniel 4817: if (URI) {
4818: xmlURIPtr uri;
4819:
4820: uri = xmlParseURI((const char *)URI);
4821: if (uri == NULL) {
4822: if ((ctxt->sax != NULL) &&
4823: (!ctxt->disableSAX) &&
4824: (ctxt->sax->error != NULL))
4825: ctxt->sax->error(ctxt->userData,
4826: "Invalid URI: %s\n", URI);
4827: ctxt->wellFormed = 0;
4828: ctxt->errNo = XML_ERR_INVALID_URI;
4829: } else {
4830: if (uri->fragment != NULL) {
4831: if ((ctxt->sax != NULL) &&
4832: (!ctxt->disableSAX) &&
4833: (ctxt->sax->error != NULL))
4834: ctxt->sax->error(ctxt->userData,
4835: "Fragment not allowed: %s\n", URI);
4836: ctxt->wellFormed = 0;
4837: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4838: }
4839: xmlFreeURI(uri);
4840: }
4841: }
1.152 daniel 4842: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4843: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4844: ctxt->sax->error(ctxt->userData,
1.59 daniel 4845: "Space required before 'NDATA'\n");
1.123 daniel 4846: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4847: ctxt->wellFormed = 0;
1.180 daniel 4848: ctxt->disableSAX = 1;
1.59 daniel 4849: }
1.42 daniel 4850: SKIP_BLANKS;
1.152 daniel 4851: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4852: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4853: (NXT(4) == 'A')) {
4854: SKIP(5);
1.59 daniel 4855: if (!IS_BLANK(CUR)) {
4856: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4857: ctxt->sax->error(ctxt->userData,
1.59 daniel 4858: "Space required after 'NDATA'\n");
1.123 daniel 4859: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4860: ctxt->wellFormed = 0;
1.180 daniel 4861: ctxt->disableSAX = 1;
1.59 daniel 4862: }
1.42 daniel 4863: SKIP_BLANKS;
1.24 daniel 4864: ndata = xmlParseName(ctxt);
1.171 daniel 4865: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4866: (ctxt->sax->unparsedEntityDecl != NULL))
4867: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4868: literal, URI, ndata);
4869: } else {
1.171 daniel 4870: if ((ctxt->sax != NULL) &&
4871: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4872: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4873: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4874: literal, URI, NULL);
1.24 daniel 4875: }
4876: }
4877: }
1.42 daniel 4878: SKIP_BLANKS;
1.152 daniel 4879: if (RAW != '>') {
1.55 daniel 4880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4881: ctxt->sax->error(ctxt->userData,
1.31 daniel 4882: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4883: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4884: ctxt->wellFormed = 0;
1.180 daniel 4885: ctxt->disableSAX = 1;
1.187 daniel 4886: } else {
4887: if (input != ctxt->input) {
4888: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4889: ctxt->sax->error(ctxt->userData,
4890: "Entity declaration doesn't start and stop in the same entity\n");
4891: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4892: ctxt->wellFormed = 0;
4893: ctxt->disableSAX = 1;
4894: }
1.40 daniel 4895: NEXT;
1.187 daniel 4896: }
1.78 daniel 4897: if (orig != NULL) {
4898: /*
1.98 daniel 4899: * Ugly mechanism to save the raw entity value.
1.78 daniel 4900: */
4901: xmlEntityPtr cur = NULL;
4902:
1.98 daniel 4903: if (isParameter) {
4904: if ((ctxt->sax != NULL) &&
4905: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4906: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4907: } else {
4908: if ((ctxt->sax != NULL) &&
4909: (ctxt->sax->getEntity != NULL))
1.120 daniel 4910: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4911: }
4912: if (cur != NULL) {
4913: if (cur->orig != NULL)
1.119 daniel 4914: xmlFree(orig);
1.98 daniel 4915: else
4916: cur->orig = orig;
4917: } else
1.119 daniel 4918: xmlFree(orig);
1.78 daniel 4919: }
1.119 daniel 4920: if (name != NULL) xmlFree(name);
4921: if (value != NULL) xmlFree(value);
4922: if (URI != NULL) xmlFree(URI);
4923: if (literal != NULL) xmlFree(literal);
4924: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4925: }
4926: }
4927:
1.50 daniel 4928: /**
1.59 daniel 4929: * xmlParseDefaultDecl:
4930: * @ctxt: an XML parser context
4931: * @value: Receive a possible fixed default value for the attribute
4932: *
4933: * Parse an attribute default declaration
4934: *
4935: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4936: *
1.99 daniel 4937: * [ VC: Required Attribute ]
1.117 daniel 4938: * if the default declaration is the keyword #REQUIRED, then the
4939: * attribute must be specified for all elements of the type in the
4940: * attribute-list declaration.
1.99 daniel 4941: *
4942: * [ VC: Attribute Default Legal ]
1.102 daniel 4943: * The declared default value must meet the lexical constraints of
4944: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4945: *
4946: * [ VC: Fixed Attribute Default ]
1.117 daniel 4947: * if an attribute has a default value declared with the #FIXED
4948: * keyword, instances of that attribute must match the default value.
1.99 daniel 4949: *
4950: * [ WFC: No < in Attribute Values ]
4951: * handled in xmlParseAttValue()
4952: *
1.59 daniel 4953: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4954: * or XML_ATTRIBUTE_FIXED.
4955: */
4956:
4957: int
1.123 daniel 4958: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4959: int val;
1.123 daniel 4960: xmlChar *ret;
1.59 daniel 4961:
4962: *value = NULL;
1.152 daniel 4963: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4964: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4965: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4966: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4967: (NXT(8) == 'D')) {
4968: SKIP(9);
4969: return(XML_ATTRIBUTE_REQUIRED);
4970: }
1.152 daniel 4971: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4972: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4973: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4974: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4975: SKIP(8);
4976: return(XML_ATTRIBUTE_IMPLIED);
4977: }
4978: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4979: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4980: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4981: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4982: SKIP(6);
4983: val = XML_ATTRIBUTE_FIXED;
4984: if (!IS_BLANK(CUR)) {
4985: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4986: ctxt->sax->error(ctxt->userData,
4987: "Space required after '#FIXED'\n");
1.123 daniel 4988: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4989: ctxt->wellFormed = 0;
1.180 daniel 4990: ctxt->disableSAX = 1;
1.59 daniel 4991: }
4992: SKIP_BLANKS;
4993: }
4994: ret = xmlParseAttValue(ctxt);
1.96 daniel 4995: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4996: if (ret == NULL) {
4997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4998: ctxt->sax->error(ctxt->userData,
1.59 daniel 4999: "Attribute default value declaration error\n");
5000: ctxt->wellFormed = 0;
1.180 daniel 5001: ctxt->disableSAX = 1;
1.59 daniel 5002: } else
5003: *value = ret;
5004: return(val);
5005: }
5006:
5007: /**
1.66 daniel 5008: * xmlParseNotationType:
5009: * @ctxt: an XML parser context
5010: *
5011: * parse an Notation attribute type.
5012: *
1.99 daniel 5013: * Note: the leading 'NOTATION' S part has already being parsed...
5014: *
1.66 daniel 5015: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5016: *
1.99 daniel 5017: * [ VC: Notation Attributes ]
1.117 daniel 5018: * Values of this type must match one of the notation names included
1.99 daniel 5019: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 5020: *
5021: * Returns: the notation attribute tree built while parsing
5022: */
5023:
5024: xmlEnumerationPtr
5025: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5026: xmlChar *name;
1.66 daniel 5027: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5028:
1.152 daniel 5029: if (RAW != '(') {
1.66 daniel 5030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5031: ctxt->sax->error(ctxt->userData,
5032: "'(' required to start 'NOTATION'\n");
1.123 daniel 5033: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 5034: ctxt->wellFormed = 0;
1.180 daniel 5035: ctxt->disableSAX = 1;
1.66 daniel 5036: return(NULL);
5037: }
1.91 daniel 5038: SHRINK;
1.66 daniel 5039: do {
5040: NEXT;
5041: SKIP_BLANKS;
5042: name = xmlParseName(ctxt);
5043: if (name == NULL) {
5044: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5045: ctxt->sax->error(ctxt->userData,
1.66 daniel 5046: "Name expected in NOTATION declaration\n");
1.123 daniel 5047: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 5048: ctxt->wellFormed = 0;
1.180 daniel 5049: ctxt->disableSAX = 1;
1.66 daniel 5050: return(ret);
5051: }
5052: cur = xmlCreateEnumeration(name);
1.119 daniel 5053: xmlFree(name);
1.66 daniel 5054: if (cur == NULL) return(ret);
5055: if (last == NULL) ret = last = cur;
5056: else {
5057: last->next = cur;
5058: last = cur;
5059: }
5060: SKIP_BLANKS;
1.152 daniel 5061: } while (RAW == '|');
5062: if (RAW != ')') {
1.66 daniel 5063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5064: ctxt->sax->error(ctxt->userData,
1.66 daniel 5065: "')' required to finish NOTATION declaration\n");
1.123 daniel 5066: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5067: ctxt->wellFormed = 0;
1.180 daniel 5068: ctxt->disableSAX = 1;
1.170 daniel 5069: if ((last != NULL) && (last != ret))
5070: xmlFreeEnumeration(last);
1.66 daniel 5071: return(ret);
5072: }
5073: NEXT;
5074: return(ret);
5075: }
5076:
5077: /**
5078: * xmlParseEnumerationType:
5079: * @ctxt: an XML parser context
5080: *
5081: * parse an Enumeration attribute type.
5082: *
5083: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5084: *
1.99 daniel 5085: * [ VC: Enumeration ]
1.117 daniel 5086: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5087: * the declaration
5088: *
1.66 daniel 5089: * Returns: the enumeration attribute tree built while parsing
5090: */
5091:
5092: xmlEnumerationPtr
5093: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5094: xmlChar *name;
1.66 daniel 5095: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5096:
1.152 daniel 5097: if (RAW != '(') {
1.66 daniel 5098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5099: ctxt->sax->error(ctxt->userData,
1.66 daniel 5100: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5101: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5102: ctxt->wellFormed = 0;
1.180 daniel 5103: ctxt->disableSAX = 1;
1.66 daniel 5104: return(NULL);
5105: }
1.91 daniel 5106: SHRINK;
1.66 daniel 5107: do {
5108: NEXT;
5109: SKIP_BLANKS;
5110: name = xmlParseNmtoken(ctxt);
5111: if (name == NULL) {
5112: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5113: ctxt->sax->error(ctxt->userData,
1.66 daniel 5114: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5115: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5116: ctxt->wellFormed = 0;
1.180 daniel 5117: ctxt->disableSAX = 1;
1.66 daniel 5118: return(ret);
5119: }
5120: cur = xmlCreateEnumeration(name);
1.119 daniel 5121: xmlFree(name);
1.66 daniel 5122: if (cur == NULL) return(ret);
5123: if (last == NULL) ret = last = cur;
5124: else {
5125: last->next = cur;
5126: last = cur;
5127: }
5128: SKIP_BLANKS;
1.152 daniel 5129: } while (RAW == '|');
5130: if (RAW != ')') {
1.66 daniel 5131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5132: ctxt->sax->error(ctxt->userData,
1.66 daniel 5133: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5134: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5135: ctxt->wellFormed = 0;
1.180 daniel 5136: ctxt->disableSAX = 1;
1.66 daniel 5137: return(ret);
5138: }
5139: NEXT;
5140: return(ret);
5141: }
5142:
5143: /**
1.50 daniel 5144: * xmlParseEnumeratedType:
5145: * @ctxt: an XML parser context
1.66 daniel 5146: * @tree: the enumeration tree built while parsing
1.50 daniel 5147: *
1.66 daniel 5148: * parse an Enumerated attribute type.
1.22 daniel 5149: *
5150: * [57] EnumeratedType ::= NotationType | Enumeration
5151: *
5152: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5153: *
1.50 daniel 5154: *
1.66 daniel 5155: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5156: */
5157:
1.66 daniel 5158: int
5159: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5160: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5161: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5162: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5163: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5164: SKIP(8);
5165: if (!IS_BLANK(CUR)) {
5166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5167: ctxt->sax->error(ctxt->userData,
5168: "Space required after 'NOTATION'\n");
1.123 daniel 5169: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5170: ctxt->wellFormed = 0;
1.180 daniel 5171: ctxt->disableSAX = 1;
1.66 daniel 5172: return(0);
5173: }
5174: SKIP_BLANKS;
5175: *tree = xmlParseNotationType(ctxt);
5176: if (*tree == NULL) return(0);
5177: return(XML_ATTRIBUTE_NOTATION);
5178: }
5179: *tree = xmlParseEnumerationType(ctxt);
5180: if (*tree == NULL) return(0);
5181: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5182: }
5183:
1.50 daniel 5184: /**
5185: * xmlParseAttributeType:
5186: * @ctxt: an XML parser context
1.66 daniel 5187: * @tree: the enumeration tree built while parsing
1.50 daniel 5188: *
1.59 daniel 5189: * parse the Attribute list def for an element
1.22 daniel 5190: *
5191: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5192: *
5193: * [55] StringType ::= 'CDATA'
5194: *
5195: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5196: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5197: *
1.102 daniel 5198: * Validity constraints for attribute values syntax are checked in
5199: * xmlValidateAttributeValue()
5200: *
1.99 daniel 5201: * [ VC: ID ]
1.117 daniel 5202: * Values of type ID must match the Name production. A name must not
1.99 daniel 5203: * appear more than once in an XML document as a value of this type;
5204: * i.e., ID values must uniquely identify the elements which bear them.
5205: *
5206: * [ VC: One ID per Element Type ]
1.117 daniel 5207: * No element type may have more than one ID attribute specified.
1.99 daniel 5208: *
5209: * [ VC: ID Attribute Default ]
1.117 daniel 5210: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5211: *
5212: * [ VC: IDREF ]
1.102 daniel 5213: * Values of type IDREF must match the Name production, and values
1.140 daniel 5214: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5215: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5216: * values must match the value of some ID attribute.
5217: *
5218: * [ VC: Entity Name ]
1.102 daniel 5219: * Values of type ENTITY must match the Name production, values
1.140 daniel 5220: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5221: * name of an unparsed entity declared in the DTD.
1.99 daniel 5222: *
5223: * [ VC: Name Token ]
1.102 daniel 5224: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5225: * of type NMTOKENS must match Nmtokens.
5226: *
1.69 daniel 5227: * Returns the attribute type
1.22 daniel 5228: */
1.59 daniel 5229: int
1.66 daniel 5230: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5231: SHRINK;
1.152 daniel 5232: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5233: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5234: (NXT(4) == 'A')) {
5235: SKIP(5);
1.66 daniel 5236: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5237: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5238: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5239: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5240: SKIP(6);
5241: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5242: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5243: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5244: (NXT(4) == 'F')) {
5245: SKIP(5);
1.59 daniel 5246: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5247: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5248: SKIP(2);
5249: return(XML_ATTRIBUTE_ID);
1.152 daniel 5250: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5251: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5252: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5253: SKIP(6);
1.59 daniel 5254: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5255: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5256: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5257: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5258: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5259: SKIP(8);
1.59 daniel 5260: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5261: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5262: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5263: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5264: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5265: SKIP(8);
5266: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5267: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5268: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5269: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5270: (NXT(6) == 'N')) {
5271: SKIP(7);
1.59 daniel 5272: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5273: }
1.66 daniel 5274: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5275: }
5276:
1.50 daniel 5277: /**
5278: * xmlParseAttributeListDecl:
5279: * @ctxt: an XML parser context
5280: *
5281: * : parse the Attribute list def for an element
1.22 daniel 5282: *
5283: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5284: *
5285: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5286: *
1.22 daniel 5287: */
1.55 daniel 5288: void
5289: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5290: xmlChar *elemName;
5291: xmlChar *attrName;
1.103 daniel 5292: xmlEnumerationPtr tree;
1.22 daniel 5293:
1.152 daniel 5294: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5295: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5296: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5297: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5298: (NXT(8) == 'T')) {
1.187 daniel 5299: xmlParserInputPtr input = ctxt->input;
5300:
1.40 daniel 5301: SKIP(9);
1.59 daniel 5302: if (!IS_BLANK(CUR)) {
5303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5304: ctxt->sax->error(ctxt->userData,
5305: "Space required after '<!ATTLIST'\n");
1.123 daniel 5306: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5307: ctxt->wellFormed = 0;
1.180 daniel 5308: ctxt->disableSAX = 1;
1.59 daniel 5309: }
1.42 daniel 5310: SKIP_BLANKS;
1.59 daniel 5311: elemName = xmlParseName(ctxt);
5312: if (elemName == NULL) {
1.55 daniel 5313: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5314: ctxt->sax->error(ctxt->userData,
5315: "ATTLIST: no name for Element\n");
1.123 daniel 5316: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5317: ctxt->wellFormed = 0;
1.180 daniel 5318: ctxt->disableSAX = 1;
1.22 daniel 5319: return;
5320: }
1.42 daniel 5321: SKIP_BLANKS;
1.152 daniel 5322: while (RAW != '>') {
1.123 daniel 5323: const xmlChar *check = CUR_PTR;
1.59 daniel 5324: int type;
5325: int def;
1.123 daniel 5326: xmlChar *defaultValue = NULL;
1.59 daniel 5327:
1.103 daniel 5328: tree = NULL;
1.59 daniel 5329: attrName = xmlParseName(ctxt);
5330: if (attrName == NULL) {
5331: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5332: ctxt->sax->error(ctxt->userData,
5333: "ATTLIST: no name for Attribute\n");
1.123 daniel 5334: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5335: ctxt->wellFormed = 0;
1.180 daniel 5336: ctxt->disableSAX = 1;
1.59 daniel 5337: break;
5338: }
1.97 daniel 5339: GROW;
1.59 daniel 5340: if (!IS_BLANK(CUR)) {
5341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5342: ctxt->sax->error(ctxt->userData,
1.59 daniel 5343: "Space required after the attribute name\n");
1.123 daniel 5344: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5345: ctxt->wellFormed = 0;
1.180 daniel 5346: ctxt->disableSAX = 1;
1.170 daniel 5347: if (attrName != NULL)
5348: xmlFree(attrName);
5349: if (defaultValue != NULL)
5350: xmlFree(defaultValue);
1.59 daniel 5351: break;
5352: }
5353: SKIP_BLANKS;
5354:
1.66 daniel 5355: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5356: if (type <= 0) {
5357: if (attrName != NULL)
5358: xmlFree(attrName);
5359: if (defaultValue != NULL)
5360: xmlFree(defaultValue);
5361: break;
5362: }
1.22 daniel 5363:
1.97 daniel 5364: GROW;
1.59 daniel 5365: if (!IS_BLANK(CUR)) {
5366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5367: ctxt->sax->error(ctxt->userData,
1.59 daniel 5368: "Space required after the attribute type\n");
1.123 daniel 5369: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5370: ctxt->wellFormed = 0;
1.180 daniel 5371: ctxt->disableSAX = 1;
1.170 daniel 5372: if (attrName != NULL)
5373: xmlFree(attrName);
5374: if (defaultValue != NULL)
5375: xmlFree(defaultValue);
5376: if (tree != NULL)
5377: xmlFreeEnumeration(tree);
1.59 daniel 5378: break;
5379: }
1.42 daniel 5380: SKIP_BLANKS;
1.59 daniel 5381:
5382: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5383: if (def <= 0) {
5384: if (attrName != NULL)
5385: xmlFree(attrName);
5386: if (defaultValue != NULL)
5387: xmlFree(defaultValue);
5388: if (tree != NULL)
5389: xmlFreeEnumeration(tree);
5390: break;
5391: }
1.59 daniel 5392:
1.97 daniel 5393: GROW;
1.152 daniel 5394: if (RAW != '>') {
1.59 daniel 5395: if (!IS_BLANK(CUR)) {
5396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5397: ctxt->sax->error(ctxt->userData,
1.59 daniel 5398: "Space required after the attribute default value\n");
1.123 daniel 5399: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5400: ctxt->wellFormed = 0;
1.180 daniel 5401: ctxt->disableSAX = 1;
1.170 daniel 5402: if (attrName != NULL)
5403: xmlFree(attrName);
5404: if (defaultValue != NULL)
5405: xmlFree(defaultValue);
5406: if (tree != NULL)
5407: xmlFreeEnumeration(tree);
1.59 daniel 5408: break;
5409: }
5410: SKIP_BLANKS;
5411: }
1.40 daniel 5412: if (check == CUR_PTR) {
1.55 daniel 5413: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5414: ctxt->sax->error(ctxt->userData,
1.59 daniel 5415: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5416: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5417: if (attrName != NULL)
5418: xmlFree(attrName);
5419: if (defaultValue != NULL)
5420: xmlFree(defaultValue);
5421: if (tree != NULL)
5422: xmlFreeEnumeration(tree);
1.22 daniel 5423: break;
5424: }
1.171 daniel 5425: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5426: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5427: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5428: type, def, defaultValue, tree);
1.59 daniel 5429: if (attrName != NULL)
1.119 daniel 5430: xmlFree(attrName);
1.59 daniel 5431: if (defaultValue != NULL)
1.119 daniel 5432: xmlFree(defaultValue);
1.97 daniel 5433: GROW;
1.22 daniel 5434: }
1.187 daniel 5435: if (RAW == '>') {
5436: if (input != ctxt->input) {
5437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5438: ctxt->sax->error(ctxt->userData,
5439: "Attribute list declaration doesn't start and stop in the same entity\n");
5440: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5441: ctxt->wellFormed = 0;
5442: ctxt->disableSAX = 1;
5443: }
1.40 daniel 5444: NEXT;
1.187 daniel 5445: }
1.22 daniel 5446:
1.119 daniel 5447: xmlFree(elemName);
1.22 daniel 5448: }
5449: }
5450:
1.50 daniel 5451: /**
1.61 daniel 5452: * xmlParseElementMixedContentDecl:
5453: * @ctxt: an XML parser context
5454: *
5455: * parse the declaration for a Mixed Element content
5456: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5457: *
5458: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5459: * '(' S? '#PCDATA' S? ')'
5460: *
1.99 daniel 5461: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5462: *
5463: * [ VC: No Duplicate Types ]
1.117 daniel 5464: * The same name must not appear more than once in a single
5465: * mixed-content declaration.
1.99 daniel 5466: *
1.61 daniel 5467: * returns: the list of the xmlElementContentPtr describing the element choices
5468: */
5469: xmlElementContentPtr
1.62 daniel 5470: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5471: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5472: xmlChar *elem = NULL;
1.61 daniel 5473:
1.97 daniel 5474: GROW;
1.152 daniel 5475: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5476: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5477: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5478: (NXT(6) == 'A')) {
5479: SKIP(7);
5480: SKIP_BLANKS;
1.91 daniel 5481: SHRINK;
1.152 daniel 5482: if (RAW == ')') {
1.187 daniel 5483: ctxt->entity = ctxt->input;
1.63 daniel 5484: NEXT;
5485: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5486: if (RAW == '*') {
1.136 daniel 5487: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5488: NEXT;
5489: }
1.63 daniel 5490: return(ret);
5491: }
1.152 daniel 5492: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5493: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5494: if (ret == NULL) return(NULL);
1.99 daniel 5495: }
1.152 daniel 5496: while (RAW == '|') {
1.64 daniel 5497: NEXT;
1.61 daniel 5498: if (elem == NULL) {
5499: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5500: if (ret == NULL) return(NULL);
5501: ret->c1 = cur;
1.64 daniel 5502: cur = ret;
1.61 daniel 5503: } else {
1.64 daniel 5504: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5505: if (n == NULL) return(NULL);
5506: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5507: cur->c2 = n;
5508: cur = n;
1.119 daniel 5509: xmlFree(elem);
1.61 daniel 5510: }
5511: SKIP_BLANKS;
5512: elem = xmlParseName(ctxt);
5513: if (elem == NULL) {
5514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5515: ctxt->sax->error(ctxt->userData,
1.61 daniel 5516: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5517: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5518: ctxt->wellFormed = 0;
1.180 daniel 5519: ctxt->disableSAX = 1;
1.61 daniel 5520: xmlFreeElementContent(cur);
5521: return(NULL);
5522: }
5523: SKIP_BLANKS;
1.97 daniel 5524: GROW;
1.61 daniel 5525: }
1.152 daniel 5526: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5527: if (elem != NULL) {
1.61 daniel 5528: cur->c2 = xmlNewElementContent(elem,
5529: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5530: xmlFree(elem);
1.66 daniel 5531: }
1.65 daniel 5532: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5533: ctxt->entity = ctxt->input;
1.64 daniel 5534: SKIP(2);
1.61 daniel 5535: } else {
1.119 daniel 5536: if (elem != NULL) xmlFree(elem);
1.61 daniel 5537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5538: ctxt->sax->error(ctxt->userData,
1.63 daniel 5539: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5540: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5541: ctxt->wellFormed = 0;
1.180 daniel 5542: ctxt->disableSAX = 1;
1.61 daniel 5543: xmlFreeElementContent(ret);
5544: return(NULL);
5545: }
5546:
5547: } else {
5548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5549: ctxt->sax->error(ctxt->userData,
1.61 daniel 5550: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5551: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5552: ctxt->wellFormed = 0;
1.180 daniel 5553: ctxt->disableSAX = 1;
1.61 daniel 5554: }
5555: return(ret);
5556: }
5557:
5558: /**
5559: * xmlParseElementChildrenContentDecl:
1.50 daniel 5560: * @ctxt: an XML parser context
5561: *
1.61 daniel 5562: * parse the declaration for a Mixed Element content
5563: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5564: *
1.61 daniel 5565: *
1.22 daniel 5566: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5567: *
5568: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5569: *
5570: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5571: *
5572: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5573: *
1.99 daniel 5574: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5575: * TODO Parameter-entity replacement text must be properly nested
5576: * with parenthetized groups. That is to say, if either of the
5577: * opening or closing parentheses in a choice, seq, or Mixed
5578: * construct is contained in the replacement text for a parameter
5579: * entity, both must be contained in the same replacement text. For
5580: * interoperability, if a parameter-entity reference appears in a
5581: * choice, seq, or Mixed construct, its replacement text should not
5582: * be empty, and neither the first nor last non-blank character of
5583: * the replacement text should be a connector (| or ,).
5584: *
1.62 daniel 5585: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5586: * hierarchy.
5587: */
5588: xmlElementContentPtr
1.62 daniel 5589: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5590: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5591: xmlChar *elem;
5592: xmlChar type = 0;
1.62 daniel 5593:
5594: SKIP_BLANKS;
1.94 daniel 5595: GROW;
1.152 daniel 5596: if (RAW == '(') {
1.63 daniel 5597: /* Recurse on first child */
1.62 daniel 5598: NEXT;
5599: SKIP_BLANKS;
5600: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5601: SKIP_BLANKS;
1.101 daniel 5602: GROW;
1.62 daniel 5603: } else {
5604: elem = xmlParseName(ctxt);
5605: if (elem == NULL) {
5606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5607: ctxt->sax->error(ctxt->userData,
1.62 daniel 5608: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5609: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5610: ctxt->wellFormed = 0;
1.180 daniel 5611: ctxt->disableSAX = 1;
1.62 daniel 5612: return(NULL);
5613: }
5614: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5615: GROW;
1.152 daniel 5616: if (RAW == '?') {
1.104 daniel 5617: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5618: NEXT;
1.152 daniel 5619: } else if (RAW == '*') {
1.104 daniel 5620: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5621: NEXT;
1.152 daniel 5622: } else if (RAW == '+') {
1.104 daniel 5623: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5624: NEXT;
5625: } else {
1.104 daniel 5626: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5627: }
1.119 daniel 5628: xmlFree(elem);
1.101 daniel 5629: GROW;
1.62 daniel 5630: }
5631: SKIP_BLANKS;
1.91 daniel 5632: SHRINK;
1.152 daniel 5633: while (RAW != ')') {
1.63 daniel 5634: /*
5635: * Each loop we parse one separator and one element.
5636: */
1.152 daniel 5637: if (RAW == ',') {
1.62 daniel 5638: if (type == 0) type = CUR;
5639:
5640: /*
5641: * Detect "Name | Name , Name" error
5642: */
5643: else if (type != CUR) {
5644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5645: ctxt->sax->error(ctxt->userData,
1.62 daniel 5646: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5647: type);
1.123 daniel 5648: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5649: ctxt->wellFormed = 0;
1.180 daniel 5650: ctxt->disableSAX = 1;
1.170 daniel 5651: if ((op != NULL) && (op != ret))
5652: xmlFreeElementContent(op);
5653: if ((last != NULL) && (last != ret))
5654: xmlFreeElementContent(last);
5655: if (ret != NULL)
5656: xmlFreeElementContent(ret);
1.62 daniel 5657: return(NULL);
5658: }
1.64 daniel 5659: NEXT;
1.62 daniel 5660:
1.63 daniel 5661: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5662: if (op == NULL) {
5663: xmlFreeElementContent(ret);
5664: return(NULL);
5665: }
5666: if (last == NULL) {
5667: op->c1 = ret;
1.65 daniel 5668: ret = cur = op;
1.63 daniel 5669: } else {
5670: cur->c2 = op;
5671: op->c1 = last;
5672: cur =op;
1.65 daniel 5673: last = NULL;
1.63 daniel 5674: }
1.152 daniel 5675: } else if (RAW == '|') {
1.62 daniel 5676: if (type == 0) type = CUR;
5677:
5678: /*
1.63 daniel 5679: * Detect "Name , Name | Name" error
1.62 daniel 5680: */
5681: else if (type != CUR) {
5682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5683: ctxt->sax->error(ctxt->userData,
1.62 daniel 5684: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5685: type);
1.123 daniel 5686: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5687: ctxt->wellFormed = 0;
1.180 daniel 5688: ctxt->disableSAX = 1;
1.170 daniel 5689: if ((op != NULL) && (op != ret))
5690: xmlFreeElementContent(op);
5691: if ((last != NULL) && (last != ret))
5692: xmlFreeElementContent(last);
5693: if (ret != NULL)
5694: xmlFreeElementContent(ret);
1.62 daniel 5695: return(NULL);
5696: }
1.64 daniel 5697: NEXT;
1.62 daniel 5698:
1.63 daniel 5699: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5700: if (op == NULL) {
1.170 daniel 5701: if ((op != NULL) && (op != ret))
5702: xmlFreeElementContent(op);
5703: if ((last != NULL) && (last != ret))
5704: xmlFreeElementContent(last);
5705: if (ret != NULL)
5706: xmlFreeElementContent(ret);
1.63 daniel 5707: return(NULL);
5708: }
5709: if (last == NULL) {
5710: op->c1 = ret;
1.65 daniel 5711: ret = cur = op;
1.63 daniel 5712: } else {
5713: cur->c2 = op;
5714: op->c1 = last;
5715: cur =op;
1.65 daniel 5716: last = NULL;
1.63 daniel 5717: }
1.62 daniel 5718: } else {
5719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5720: ctxt->sax->error(ctxt->userData,
1.62 daniel 5721: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5722: ctxt->wellFormed = 0;
1.180 daniel 5723: ctxt->disableSAX = 1;
1.123 daniel 5724: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5725: if ((op != NULL) && (op != ret))
5726: xmlFreeElementContent(op);
5727: if ((last != NULL) && (last != ret))
5728: xmlFreeElementContent(last);
5729: if (ret != NULL)
5730: xmlFreeElementContent(ret);
1.62 daniel 5731: return(NULL);
5732: }
1.101 daniel 5733: GROW;
1.62 daniel 5734: SKIP_BLANKS;
1.101 daniel 5735: GROW;
1.152 daniel 5736: if (RAW == '(') {
1.63 daniel 5737: /* Recurse on second child */
1.62 daniel 5738: NEXT;
5739: SKIP_BLANKS;
1.65 daniel 5740: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5741: SKIP_BLANKS;
5742: } else {
5743: elem = xmlParseName(ctxt);
5744: if (elem == NULL) {
5745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5746: ctxt->sax->error(ctxt->userData,
1.122 daniel 5747: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5748: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5749: ctxt->wellFormed = 0;
1.180 daniel 5750: ctxt->disableSAX = 1;
1.170 daniel 5751: if ((op != NULL) && (op != ret))
5752: xmlFreeElementContent(op);
5753: if ((last != NULL) && (last != ret))
5754: xmlFreeElementContent(last);
5755: if (ret != NULL)
5756: xmlFreeElementContent(ret);
1.62 daniel 5757: return(NULL);
5758: }
1.65 daniel 5759: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5760: xmlFree(elem);
1.152 daniel 5761: if (RAW == '?') {
1.105 daniel 5762: last->ocur = XML_ELEMENT_CONTENT_OPT;
5763: NEXT;
1.152 daniel 5764: } else if (RAW == '*') {
1.105 daniel 5765: last->ocur = XML_ELEMENT_CONTENT_MULT;
5766: NEXT;
1.152 daniel 5767: } else if (RAW == '+') {
1.105 daniel 5768: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5769: NEXT;
5770: } else {
5771: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5772: }
1.63 daniel 5773: }
5774: SKIP_BLANKS;
1.97 daniel 5775: GROW;
1.64 daniel 5776: }
1.65 daniel 5777: if ((cur != NULL) && (last != NULL)) {
5778: cur->c2 = last;
1.62 daniel 5779: }
1.187 daniel 5780: ctxt->entity = ctxt->input;
1.62 daniel 5781: NEXT;
1.152 daniel 5782: if (RAW == '?') {
1.62 daniel 5783: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5784: NEXT;
1.152 daniel 5785: } else if (RAW == '*') {
1.62 daniel 5786: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5787: NEXT;
1.152 daniel 5788: } else if (RAW == '+') {
1.62 daniel 5789: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5790: NEXT;
5791: }
5792: return(ret);
1.61 daniel 5793: }
5794:
5795: /**
5796: * xmlParseElementContentDecl:
5797: * @ctxt: an XML parser context
5798: * @name: the name of the element being defined.
5799: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5800: *
1.61 daniel 5801: * parse the declaration for an Element content either Mixed or Children,
5802: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5803: *
5804: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5805: *
1.61 daniel 5806: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5807: */
5808:
1.61 daniel 5809: int
1.123 daniel 5810: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5811: xmlElementContentPtr *result) {
5812:
5813: xmlElementContentPtr tree = NULL;
1.187 daniel 5814: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5815: int res;
5816:
5817: *result = NULL;
5818:
1.152 daniel 5819: if (RAW != '(') {
1.61 daniel 5820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5821: ctxt->sax->error(ctxt->userData,
1.61 daniel 5822: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5823: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5824: ctxt->wellFormed = 0;
1.180 daniel 5825: ctxt->disableSAX = 1;
1.61 daniel 5826: return(-1);
5827: }
5828: NEXT;
1.97 daniel 5829: GROW;
1.61 daniel 5830: SKIP_BLANKS;
1.152 daniel 5831: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5832: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5833: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5834: (NXT(6) == 'A')) {
1.62 daniel 5835: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5836: res = XML_ELEMENT_TYPE_MIXED;
5837: } else {
1.62 daniel 5838: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5839: res = XML_ELEMENT_TYPE_ELEMENT;
5840: }
1.187 daniel 5841: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5843: ctxt->sax->error(ctxt->userData,
5844: "Element content declaration doesn't start and stop in the same entity\n");
5845: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5846: ctxt->wellFormed = 0;
5847: ctxt->disableSAX = 1;
5848: }
1.61 daniel 5849: SKIP_BLANKS;
1.63 daniel 5850: /****************************
1.152 daniel 5851: if (RAW != ')') {
1.61 daniel 5852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5853: ctxt->sax->error(ctxt->userData,
1.61 daniel 5854: "xmlParseElementContentDecl : ')' expected\n");
5855: ctxt->wellFormed = 0;
1.180 daniel 5856: ctxt->disableSAX = 1;
1.61 daniel 5857: return(-1);
5858: }
1.63 daniel 5859: ****************************/
5860: *result = tree;
1.61 daniel 5861: return(res);
1.22 daniel 5862: }
5863:
1.50 daniel 5864: /**
5865: * xmlParseElementDecl:
5866: * @ctxt: an XML parser context
5867: *
5868: * parse an Element declaration.
1.22 daniel 5869: *
5870: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5871: *
1.99 daniel 5872: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5873: * No element type may be declared more than once
1.69 daniel 5874: *
5875: * Returns the type of the element, or -1 in case of error
1.22 daniel 5876: */
1.59 daniel 5877: int
1.55 daniel 5878: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5879: xmlChar *name;
1.59 daniel 5880: int ret = -1;
1.61 daniel 5881: xmlElementContentPtr content = NULL;
1.22 daniel 5882:
1.97 daniel 5883: GROW;
1.152 daniel 5884: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5885: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5886: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5887: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5888: (NXT(8) == 'T')) {
1.187 daniel 5889: xmlParserInputPtr input = ctxt->input;
5890:
1.40 daniel 5891: SKIP(9);
1.59 daniel 5892: if (!IS_BLANK(CUR)) {
5893: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5894: ctxt->sax->error(ctxt->userData,
1.59 daniel 5895: "Space required after 'ELEMENT'\n");
1.123 daniel 5896: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5897: ctxt->wellFormed = 0;
1.180 daniel 5898: ctxt->disableSAX = 1;
1.59 daniel 5899: }
1.42 daniel 5900: SKIP_BLANKS;
1.22 daniel 5901: name = xmlParseName(ctxt);
5902: if (name == NULL) {
1.55 daniel 5903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5904: ctxt->sax->error(ctxt->userData,
1.59 daniel 5905: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5906: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5907: ctxt->wellFormed = 0;
1.180 daniel 5908: ctxt->disableSAX = 1;
1.59 daniel 5909: return(-1);
5910: }
5911: if (!IS_BLANK(CUR)) {
5912: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5913: ctxt->sax->error(ctxt->userData,
1.59 daniel 5914: "Space required after the element name\n");
1.123 daniel 5915: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5916: ctxt->wellFormed = 0;
1.180 daniel 5917: ctxt->disableSAX = 1;
1.22 daniel 5918: }
1.42 daniel 5919: SKIP_BLANKS;
1.152 daniel 5920: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5921: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5922: (NXT(4) == 'Y')) {
5923: SKIP(5);
1.22 daniel 5924: /*
5925: * Element must always be empty.
5926: */
1.59 daniel 5927: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5928: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5929: (NXT(2) == 'Y')) {
5930: SKIP(3);
1.22 daniel 5931: /*
5932: * Element is a generic container.
5933: */
1.59 daniel 5934: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5935: } else if (RAW == '(') {
1.61 daniel 5936: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5937: } else {
1.98 daniel 5938: /*
5939: * [ WFC: PEs in Internal Subset ] error handling.
5940: */
1.152 daniel 5941: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5942: (ctxt->inputNr == 1)) {
5943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5944: ctxt->sax->error(ctxt->userData,
5945: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5946: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5947: } else {
5948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5949: ctxt->sax->error(ctxt->userData,
5950: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5951: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5952: }
1.61 daniel 5953: ctxt->wellFormed = 0;
1.180 daniel 5954: ctxt->disableSAX = 1;
1.119 daniel 5955: if (name != NULL) xmlFree(name);
1.61 daniel 5956: return(-1);
1.22 daniel 5957: }
1.142 daniel 5958:
5959: SKIP_BLANKS;
5960: /*
5961: * Pop-up of finished entities.
5962: */
1.152 daniel 5963: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5964: xmlPopInput(ctxt);
1.42 daniel 5965: SKIP_BLANKS;
1.142 daniel 5966:
1.152 daniel 5967: if (RAW != '>') {
1.55 daniel 5968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5969: ctxt->sax->error(ctxt->userData,
1.31 daniel 5970: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5971: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5972: ctxt->wellFormed = 0;
1.180 daniel 5973: ctxt->disableSAX = 1;
1.61 daniel 5974: } else {
1.187 daniel 5975: if (input != ctxt->input) {
5976: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5977: ctxt->sax->error(ctxt->userData,
5978: "Element declaration doesn't start and stop in the same entity\n");
5979: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5980: ctxt->wellFormed = 0;
5981: ctxt->disableSAX = 1;
5982: }
5983:
1.40 daniel 5984: NEXT;
1.171 daniel 5985: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5986: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5987: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5988: content);
1.61 daniel 5989: }
1.84 daniel 5990: if (content != NULL) {
5991: xmlFreeElementContent(content);
5992: }
1.61 daniel 5993: if (name != NULL) {
1.119 daniel 5994: xmlFree(name);
1.61 daniel 5995: }
1.22 daniel 5996: }
1.59 daniel 5997: return(ret);
1.22 daniel 5998: }
5999:
1.50 daniel 6000: /**
6001: * xmlParseMarkupDecl:
6002: * @ctxt: an XML parser context
6003: *
6004: * parse Markup declarations
1.22 daniel 6005: *
6006: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6007: * NotationDecl | PI | Comment
6008: *
1.98 daniel 6009: * [ VC: Proper Declaration/PE Nesting ]
6010: * TODO Parameter-entity replacement text must be properly nested with
6011: * markup declarations. That is to say, if either the first character
6012: * or the last character of a markup declaration (markupdecl above) is
6013: * contained in the replacement text for a parameter-entity reference,
6014: * both must be contained in the same replacement text.
6015: *
6016: * [ WFC: PEs in Internal Subset ]
6017: * In the internal DTD subset, parameter-entity references can occur
6018: * only where markup declarations can occur, not within markup declarations.
6019: * (This does not apply to references that occur in external parameter
6020: * entities or to the external subset.)
1.22 daniel 6021: */
1.55 daniel 6022: void
6023: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 6024: GROW;
1.22 daniel 6025: xmlParseElementDecl(ctxt);
6026: xmlParseAttributeListDecl(ctxt);
6027: xmlParseEntityDecl(ctxt);
6028: xmlParseNotationDecl(ctxt);
6029: xmlParsePI(ctxt);
1.114 daniel 6030: xmlParseComment(ctxt);
1.98 daniel 6031: /*
6032: * This is only for internal subset. On external entities,
6033: * the replacement is done before parsing stage
6034: */
6035: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6036: xmlParsePEReference(ctxt);
1.97 daniel 6037: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 6038: }
6039:
1.50 daniel 6040: /**
1.76 daniel 6041: * xmlParseTextDecl:
6042: * @ctxt: an XML parser context
6043: *
6044: * parse an XML declaration header for external entities
6045: *
6046: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 6047: *
6048: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6049: */
6050:
1.172 daniel 6051: void
1.76 daniel 6052: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6053: xmlChar *version;
1.76 daniel 6054:
6055: /*
6056: * We know that '<?xml' is here.
6057: */
1.193 daniel 6058: if ((RAW == '<') && (NXT(1) == '?') &&
6059: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6060: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6061: SKIP(5);
6062: } else {
6063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6064: ctxt->sax->error(ctxt->userData,
6065: "Text declaration '<?xml' required\n");
6066: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6067: ctxt->wellFormed = 0;
6068: ctxt->disableSAX = 1;
6069:
6070: return;
6071: }
1.76 daniel 6072:
6073: if (!IS_BLANK(CUR)) {
6074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6075: ctxt->sax->error(ctxt->userData,
6076: "Space needed after '<?xml'\n");
1.123 daniel 6077: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6078: ctxt->wellFormed = 0;
1.180 daniel 6079: ctxt->disableSAX = 1;
1.76 daniel 6080: }
6081: SKIP_BLANKS;
6082:
6083: /*
6084: * We may have the VersionInfo here.
6085: */
6086: version = xmlParseVersionInfo(ctxt);
6087: if (version == NULL)
6088: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6089: ctxt->input->version = version;
1.76 daniel 6090:
6091: /*
6092: * We must have the encoding declaration
6093: */
6094: if (!IS_BLANK(CUR)) {
6095: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6096: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6097: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6098: ctxt->wellFormed = 0;
1.180 daniel 6099: ctxt->disableSAX = 1;
1.76 daniel 6100: }
1.195 daniel 6101: xmlParseEncodingDecl(ctxt);
1.193 daniel 6102: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6103: /*
6104: * The XML REC instructs us to stop parsing right here
6105: */
6106: return;
6107: }
1.76 daniel 6108:
6109: SKIP_BLANKS;
1.152 daniel 6110: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6111: SKIP(2);
1.152 daniel 6112: } else if (RAW == '>') {
1.76 daniel 6113: /* Deprecated old WD ... */
6114: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6115: ctxt->sax->error(ctxt->userData,
6116: "XML declaration must end-up with '?>'\n");
1.123 daniel 6117: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6118: ctxt->wellFormed = 0;
1.180 daniel 6119: ctxt->disableSAX = 1;
1.76 daniel 6120: NEXT;
6121: } else {
6122: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6123: ctxt->sax->error(ctxt->userData,
6124: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6125: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6126: ctxt->wellFormed = 0;
1.180 daniel 6127: ctxt->disableSAX = 1;
1.76 daniel 6128: MOVETO_ENDTAG(CUR_PTR);
6129: NEXT;
6130: }
6131: }
6132:
6133: /*
6134: * xmlParseConditionalSections
6135: * @ctxt: an XML parser context
6136: *
6137: * TODO : Conditionnal section are not yet supported !
6138: *
6139: * [61] conditionalSect ::= includeSect | ignoreSect
6140: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6141: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6142: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6143: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6144: */
6145:
6146: void
6147: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6148: SKIP(3);
6149: SKIP_BLANKS;
1.168 daniel 6150: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6151: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6152: (NXT(6) == 'E')) {
1.165 daniel 6153: SKIP(7);
1.168 daniel 6154: SKIP_BLANKS;
6155: if (RAW != '[') {
6156: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6157: ctxt->sax->error(ctxt->userData,
6158: "XML conditional section '[' expected\n");
6159: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6160: ctxt->wellFormed = 0;
1.180 daniel 6161: ctxt->disableSAX = 1;
1.168 daniel 6162: } else {
6163: NEXT;
6164: }
1.165 daniel 6165: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6166: (NXT(2) != '>'))) {
6167: const xmlChar *check = CUR_PTR;
6168: int cons = ctxt->input->consumed;
6169: int tok = ctxt->token;
6170:
6171: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6172: xmlParseConditionalSections(ctxt);
6173: } else if (IS_BLANK(CUR)) {
6174: NEXT;
6175: } else if (RAW == '%') {
6176: xmlParsePEReference(ctxt);
6177: } else
6178: xmlParseMarkupDecl(ctxt);
6179:
6180: /*
6181: * Pop-up of finished entities.
6182: */
6183: while ((RAW == 0) && (ctxt->inputNr > 1))
6184: xmlPopInput(ctxt);
6185:
6186: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6187: (tok == ctxt->token)) {
6188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6189: ctxt->sax->error(ctxt->userData,
6190: "Content error in the external subset\n");
6191: ctxt->wellFormed = 0;
1.180 daniel 6192: ctxt->disableSAX = 1;
1.165 daniel 6193: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6194: break;
6195: }
6196: }
1.168 daniel 6197: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6198: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6199: int state;
6200:
1.168 daniel 6201: SKIP(6);
6202: SKIP_BLANKS;
6203: if (RAW != '[') {
6204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6205: ctxt->sax->error(ctxt->userData,
6206: "XML conditional section '[' expected\n");
6207: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6208: ctxt->wellFormed = 0;
1.180 daniel 6209: ctxt->disableSAX = 1;
1.168 daniel 6210: } else {
6211: NEXT;
6212: }
1.171 daniel 6213:
1.143 daniel 6214: /*
1.171 daniel 6215: * Parse up to the end of the conditionnal section
6216: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6217: */
1.171 daniel 6218: state = ctxt->disableSAX;
1.165 daniel 6219: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6220: (NXT(2) != '>'))) {
1.171 daniel 6221: const xmlChar *check = CUR_PTR;
6222: int cons = ctxt->input->consumed;
6223: int tok = ctxt->token;
6224:
6225: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6226: xmlParseConditionalSections(ctxt);
6227: } else if (IS_BLANK(CUR)) {
6228: NEXT;
6229: } else if (RAW == '%') {
6230: xmlParsePEReference(ctxt);
6231: } else
6232: xmlParseMarkupDecl(ctxt);
6233:
1.165 daniel 6234: /*
6235: * Pop-up of finished entities.
6236: */
6237: while ((RAW == 0) && (ctxt->inputNr > 1))
6238: xmlPopInput(ctxt);
1.143 daniel 6239:
1.171 daniel 6240: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6241: (tok == ctxt->token)) {
6242: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6243: ctxt->sax->error(ctxt->userData,
6244: "Content error in the external subset\n");
6245: ctxt->wellFormed = 0;
1.180 daniel 6246: ctxt->disableSAX = 1;
1.171 daniel 6247: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6248: break;
6249: }
1.165 daniel 6250: }
1.171 daniel 6251: ctxt->disableSAX = state;
1.168 daniel 6252: } else {
6253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6254: ctxt->sax->error(ctxt->userData,
6255: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6256: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6257: ctxt->wellFormed = 0;
1.180 daniel 6258: ctxt->disableSAX = 1;
1.143 daniel 6259: }
6260:
1.152 daniel 6261: if (RAW == 0)
1.143 daniel 6262: SHRINK;
6263:
1.152 daniel 6264: if (RAW == 0) {
1.76 daniel 6265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6266: ctxt->sax->error(ctxt->userData,
6267: "XML conditional section not closed\n");
1.123 daniel 6268: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6269: ctxt->wellFormed = 0;
1.180 daniel 6270: ctxt->disableSAX = 1;
1.143 daniel 6271: } else {
6272: SKIP(3);
1.76 daniel 6273: }
6274: }
6275:
6276: /**
1.124 daniel 6277: * xmlParseExternalSubset:
1.76 daniel 6278: * @ctxt: an XML parser context
1.124 daniel 6279: * @ExternalID: the external identifier
6280: * @SystemID: the system identifier (or URL)
1.76 daniel 6281: *
6282: * parse Markup declarations from an external subset
6283: *
6284: * [30] extSubset ::= textDecl? extSubsetDecl
6285: *
6286: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6287: */
6288: void
1.123 daniel 6289: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6290: const xmlChar *SystemID) {
1.132 daniel 6291: GROW;
1.152 daniel 6292: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6293: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6294: (NXT(4) == 'l')) {
1.172 daniel 6295: xmlParseTextDecl(ctxt);
1.193 daniel 6296: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6297: /*
6298: * The XML REC instructs us to stop parsing right here
6299: */
6300: ctxt->instate = XML_PARSER_EOF;
6301: return;
6302: }
1.76 daniel 6303: }
1.79 daniel 6304: if (ctxt->myDoc == NULL) {
1.116 daniel 6305: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6306: }
6307: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6308: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6309:
1.96 daniel 6310: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6311: ctxt->external = 1;
1.152 daniel 6312: while (((RAW == '<') && (NXT(1) == '?')) ||
6313: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6314: IS_BLANK(CUR)) {
1.123 daniel 6315: const xmlChar *check = CUR_PTR;
1.115 daniel 6316: int cons = ctxt->input->consumed;
1.164 daniel 6317: int tok = ctxt->token;
1.115 daniel 6318:
1.152 daniel 6319: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6320: xmlParseConditionalSections(ctxt);
6321: } else if (IS_BLANK(CUR)) {
6322: NEXT;
1.152 daniel 6323: } else if (RAW == '%') {
1.76 daniel 6324: xmlParsePEReference(ctxt);
6325: } else
6326: xmlParseMarkupDecl(ctxt);
1.77 daniel 6327:
6328: /*
6329: * Pop-up of finished entities.
6330: */
1.166 daniel 6331: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6332: xmlPopInput(ctxt);
6333:
1.164 daniel 6334: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6335: (tok == ctxt->token)) {
1.115 daniel 6336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6337: ctxt->sax->error(ctxt->userData,
6338: "Content error in the external subset\n");
6339: ctxt->wellFormed = 0;
1.180 daniel 6340: ctxt->disableSAX = 1;
1.123 daniel 6341: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6342: break;
6343: }
1.76 daniel 6344: }
6345:
1.152 daniel 6346: if (RAW != 0) {
1.76 daniel 6347: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6348: ctxt->sax->error(ctxt->userData,
6349: "Extra content at the end of the document\n");
1.123 daniel 6350: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6351: ctxt->wellFormed = 0;
1.180 daniel 6352: ctxt->disableSAX = 1;
1.76 daniel 6353: }
6354:
6355: }
6356:
6357: /**
1.77 daniel 6358: * xmlParseReference:
6359: * @ctxt: an XML parser context
6360: *
6361: * parse and handle entity references in content, depending on the SAX
6362: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6363: * CharRef, a predefined entity, if there is no reference() callback.
6364: * or if the parser was asked to switch to that mode.
1.77 daniel 6365: *
6366: * [67] Reference ::= EntityRef | CharRef
6367: */
6368: void
6369: xmlParseReference(xmlParserCtxtPtr ctxt) {
6370: xmlEntityPtr ent;
1.123 daniel 6371: xmlChar *val;
1.152 daniel 6372: if (RAW != '&') return;
1.77 daniel 6373:
1.113 daniel 6374: if (ctxt->inputNr > 1) {
1.123 daniel 6375: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6376:
1.171 daniel 6377: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6378: (!ctxt->disableSAX))
1.113 daniel 6379: ctxt->sax->characters(ctxt->userData, cur, 1);
6380: if (ctxt->token == '&')
6381: ctxt->token = 0;
6382: else {
6383: SKIP(1);
6384: }
6385: return;
6386: }
1.77 daniel 6387: if (NXT(1) == '#') {
1.152 daniel 6388: int i = 0;
1.153 daniel 6389: xmlChar out[10];
6390: int hex = NXT(2);
1.77 daniel 6391: int val = xmlParseCharRef(ctxt);
1.152 daniel 6392:
1.198 daniel 6393: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 6394: /*
6395: * So we are using non-UTF-8 buffers
6396: * Check that the char fit on 8bits, if not
6397: * generate a CharRef.
6398: */
6399: if (val <= 0xFF) {
6400: out[0] = val;
6401: out[1] = 0;
1.171 daniel 6402: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6403: (!ctxt->disableSAX))
1.153 daniel 6404: ctxt->sax->characters(ctxt->userData, out, 1);
6405: } else {
6406: if ((hex == 'x') || (hex == 'X'))
6407: sprintf((char *)out, "#x%X", val);
6408: else
6409: sprintf((char *)out, "#%d", val);
1.171 daniel 6410: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6411: (!ctxt->disableSAX))
1.153 daniel 6412: ctxt->sax->reference(ctxt->userData, out);
6413: }
6414: } else {
6415: /*
6416: * Just encode the value in UTF-8
6417: */
6418: COPY_BUF(0 ,out, i, val);
6419: out[i] = 0;
1.171 daniel 6420: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6421: (!ctxt->disableSAX))
1.153 daniel 6422: ctxt->sax->characters(ctxt->userData, out, i);
6423: }
1.77 daniel 6424: } else {
6425: ent = xmlParseEntityRef(ctxt);
6426: if (ent == NULL) return;
6427: if ((ent->name != NULL) &&
1.159 daniel 6428: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6429: xmlNodePtr list = NULL;
6430: int ret;
6431:
6432:
6433: /*
6434: * The first reference to the entity trigger a parsing phase
6435: * where the ent->children is filled with the result from
6436: * the parsing.
6437: */
6438: if (ent->children == NULL) {
6439: xmlChar *value;
6440: value = ent->content;
6441:
6442: /*
6443: * Check that this entity is well formed
6444: */
6445: if ((value != NULL) &&
6446: (value[1] == 0) && (value[0] == '<') &&
6447: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6448: /*
6449: * TODO: get definite answer on this !!!
6450: * Lots of entity decls are used to declare a single
6451: * char
6452: * <!ENTITY lt "<">
6453: * Which seems to be valid since
6454: * 2.4: The ampersand character (&) and the left angle
6455: * bracket (<) may appear in their literal form only
6456: * when used ... They are also legal within the literal
6457: * entity value of an internal entity declaration;i
6458: * see "4.3.2 Well-Formed Parsed Entities".
6459: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6460: * Looking at the OASIS test suite and James Clark
6461: * tests, this is broken. However the XML REC uses
6462: * it. Is the XML REC not well-formed ????
6463: * This is a hack to avoid this problem
6464: */
6465: list = xmlNewDocText(ctxt->myDoc, value);
6466: if (list != NULL) {
6467: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6468: (ent->children == NULL)) {
6469: ent->children = list;
6470: ent->last = list;
6471: list->parent = (xmlNodePtr) ent;
6472: } else {
6473: xmlFreeNodeList(list);
6474: }
6475: } else if (list != NULL) {
6476: xmlFreeNodeList(list);
6477: }
1.181 daniel 6478: } else {
1.180 daniel 6479: /*
6480: * 4.3.2: An internal general parsed entity is well-formed
6481: * if its replacement text matches the production labeled
6482: * content.
6483: */
1.185 daniel 6484: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6485: ctxt->depth++;
1.180 daniel 6486: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6487: ctxt->sax, NULL, ctxt->depth,
6488: value, &list);
6489: ctxt->depth--;
6490: } else if (ent->etype ==
6491: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6492: ctxt->depth++;
1.180 daniel 6493: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6494: ctxt->sax, NULL, ctxt->depth,
6495: ent->SystemID, ent->ExternalID, &list);
6496: ctxt->depth--;
6497: } else {
1.180 daniel 6498: ret = -1;
6499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6500: ctxt->sax->error(ctxt->userData,
6501: "Internal: invalid entity type\n");
6502: }
1.185 daniel 6503: if (ret == XML_ERR_ENTITY_LOOP) {
6504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6505: ctxt->sax->error(ctxt->userData,
6506: "Detected entity reference loop\n");
6507: ctxt->wellFormed = 0;
6508: ctxt->disableSAX = 1;
6509: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6510: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6511: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6512: (ent->children == NULL)) {
6513: ent->children = list;
6514: while (list != NULL) {
6515: list->parent = (xmlNodePtr) ent;
6516: if (list->next == NULL)
6517: ent->last = list;
6518: list = list->next;
6519: }
6520: } else {
6521: xmlFreeNodeList(list);
6522: }
6523: } else if (ret > 0) {
6524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6525: ctxt->sax->error(ctxt->userData,
6526: "Entity value required\n");
6527: ctxt->errNo = ret;
6528: ctxt->wellFormed = 0;
6529: ctxt->disableSAX = 1;
6530: } else if (list != NULL) {
6531: xmlFreeNodeList(list);
6532: }
6533: }
6534: }
1.113 daniel 6535: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6536: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6537: /*
6538: * Create a node.
6539: */
6540: ctxt->sax->reference(ctxt->userData, ent->name);
6541: return;
6542: } else if (ctxt->replaceEntities) {
6543: xmlParserInputPtr input;
1.79 daniel 6544:
1.113 daniel 6545: input = xmlNewEntityInputStream(ctxt, ent);
6546: xmlPushInput(ctxt, input);
1.167 daniel 6547: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6548: (RAW == '<') && (NXT(1) == '?') &&
6549: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6550: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6551: xmlParseTextDecl(ctxt);
1.193 daniel 6552: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6553: /*
6554: * The XML REC instructs us to stop parsing right here
6555: */
6556: ctxt->instate = XML_PARSER_EOF;
6557: return;
6558: }
1.199 daniel 6559: if (input->standalone == 1) {
1.167 daniel 6560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6561: ctxt->sax->error(ctxt->userData,
6562: "external parsed entities cannot be standalone\n");
6563: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6564: ctxt->wellFormed = 0;
1.180 daniel 6565: ctxt->disableSAX = 1;
1.167 daniel 6566: }
6567: }
1.179 daniel 6568: /*
6569: * !!! TODO: build the tree under the entity first
6570: * 1234
6571: */
1.113 daniel 6572: return;
6573: }
1.77 daniel 6574: }
6575: val = ent->content;
6576: if (val == NULL) return;
6577: /*
6578: * inline the entity.
6579: */
1.171 daniel 6580: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6581: (!ctxt->disableSAX))
1.77 daniel 6582: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6583: }
1.24 daniel 6584: }
6585:
1.50 daniel 6586: /**
6587: * xmlParseEntityRef:
6588: * @ctxt: an XML parser context
6589: *
6590: * parse ENTITY references declarations
1.24 daniel 6591: *
6592: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6593: *
1.98 daniel 6594: * [ WFC: Entity Declared ]
6595: * In a document without any DTD, a document with only an internal DTD
6596: * subset which contains no parameter entity references, or a document
6597: * with "standalone='yes'", the Name given in the entity reference
6598: * must match that in an entity declaration, except that well-formed
6599: * documents need not declare any of the following entities: amp, lt,
6600: * gt, apos, quot. The declaration of a parameter entity must precede
6601: * any reference to it. Similarly, the declaration of a general entity
6602: * must precede any reference to it which appears in a default value in an
6603: * attribute-list declaration. Note that if entities are declared in the
6604: * external subset or in external parameter entities, a non-validating
6605: * processor is not obligated to read and process their declarations;
6606: * for such documents, the rule that an entity must be declared is a
6607: * well-formedness constraint only if standalone='yes'.
6608: *
6609: * [ WFC: Parsed Entity ]
6610: * An entity reference must not contain the name of an unparsed entity
6611: *
1.77 daniel 6612: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6613: */
1.77 daniel 6614: xmlEntityPtr
1.55 daniel 6615: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6616: xmlChar *name;
1.72 daniel 6617: xmlEntityPtr ent = NULL;
1.24 daniel 6618:
1.91 daniel 6619: GROW;
1.111 daniel 6620:
1.152 daniel 6621: if (RAW == '&') {
1.40 daniel 6622: NEXT;
1.24 daniel 6623: name = xmlParseName(ctxt);
6624: if (name == NULL) {
1.55 daniel 6625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6626: ctxt->sax->error(ctxt->userData,
6627: "xmlParseEntityRef: no name\n");
1.123 daniel 6628: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6629: ctxt->wellFormed = 0;
1.180 daniel 6630: ctxt->disableSAX = 1;
1.24 daniel 6631: } else {
1.152 daniel 6632: if (RAW == ';') {
1.40 daniel 6633: NEXT;
1.24 daniel 6634: /*
1.77 daniel 6635: * Ask first SAX for entity resolution, otherwise try the
6636: * predefined set.
6637: */
6638: if (ctxt->sax != NULL) {
6639: if (ctxt->sax->getEntity != NULL)
6640: ent = ctxt->sax->getEntity(ctxt->userData, name);
6641: if (ent == NULL)
6642: ent = xmlGetPredefinedEntity(name);
6643: }
6644: /*
1.98 daniel 6645: * [ WFC: Entity Declared ]
6646: * In a document without any DTD, a document with only an
6647: * internal DTD subset which contains no parameter entity
6648: * references, or a document with "standalone='yes'", the
6649: * Name given in the entity reference must match that in an
6650: * entity declaration, except that well-formed documents
6651: * need not declare any of the following entities: amp, lt,
6652: * gt, apos, quot.
6653: * The declaration of a parameter entity must precede any
6654: * reference to it.
6655: * Similarly, the declaration of a general entity must
6656: * precede any reference to it which appears in a default
6657: * value in an attribute-list declaration. Note that if
6658: * entities are declared in the external subset or in
6659: * external parameter entities, a non-validating processor
6660: * is not obligated to read and process their declarations;
6661: * for such documents, the rule that an entity must be
6662: * declared is a well-formedness constraint only if
6663: * standalone='yes'.
1.59 daniel 6664: */
1.77 daniel 6665: if (ent == NULL) {
1.98 daniel 6666: if ((ctxt->standalone == 1) ||
6667: ((ctxt->hasExternalSubset == 0) &&
6668: (ctxt->hasPErefs == 0))) {
6669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6670: ctxt->sax->error(ctxt->userData,
6671: "Entity '%s' not defined\n", name);
1.123 daniel 6672: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6673: ctxt->wellFormed = 0;
1.180 daniel 6674: ctxt->disableSAX = 1;
1.77 daniel 6675: } else {
1.98 daniel 6676: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6677: ctxt->sax->warning(ctxt->userData,
6678: "Entity '%s' not defined\n", name);
1.123 daniel 6679: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6680: }
1.77 daniel 6681: }
1.59 daniel 6682:
6683: /*
1.98 daniel 6684: * [ WFC: Parsed Entity ]
6685: * An entity reference must not contain the name of an
6686: * unparsed entity
6687: */
1.159 daniel 6688: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6690: ctxt->sax->error(ctxt->userData,
6691: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6692: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6693: ctxt->wellFormed = 0;
1.180 daniel 6694: ctxt->disableSAX = 1;
1.98 daniel 6695: }
6696:
6697: /*
6698: * [ WFC: No External Entity References ]
6699: * Attribute values cannot contain direct or indirect
6700: * entity references to external entities.
6701: */
6702: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6703: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6705: ctxt->sax->error(ctxt->userData,
6706: "Attribute references external entity '%s'\n", name);
1.123 daniel 6707: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6708: ctxt->wellFormed = 0;
1.180 daniel 6709: ctxt->disableSAX = 1;
1.98 daniel 6710: }
6711: /*
6712: * [ WFC: No < in Attribute Values ]
6713: * The replacement text of any entity referred to directly or
6714: * indirectly in an attribute value (other than "<") must
6715: * not contain a <.
1.59 daniel 6716: */
1.98 daniel 6717: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6718: (ent != NULL) &&
6719: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6720: (ent->content != NULL) &&
6721: (xmlStrchr(ent->content, '<'))) {
6722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6723: ctxt->sax->error(ctxt->userData,
6724: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6725: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6726: ctxt->wellFormed = 0;
1.180 daniel 6727: ctxt->disableSAX = 1;
1.98 daniel 6728: }
6729:
6730: /*
6731: * Internal check, no parameter entities here ...
6732: */
6733: else {
1.159 daniel 6734: switch (ent->etype) {
1.59 daniel 6735: case XML_INTERNAL_PARAMETER_ENTITY:
6736: case XML_EXTERNAL_PARAMETER_ENTITY:
6737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6738: ctxt->sax->error(ctxt->userData,
1.59 daniel 6739: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6740: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6741: ctxt->wellFormed = 0;
1.180 daniel 6742: ctxt->disableSAX = 1;
6743: break;
6744: default:
1.59 daniel 6745: break;
6746: }
6747: }
6748:
6749: /*
1.98 daniel 6750: * [ WFC: No Recursion ]
1.117 daniel 6751: * TODO A parsed entity must not contain a recursive reference
6752: * to itself, either directly or indirectly.
1.59 daniel 6753: */
1.77 daniel 6754:
1.24 daniel 6755: } else {
1.55 daniel 6756: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6757: ctxt->sax->error(ctxt->userData,
1.59 daniel 6758: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6759: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6760: ctxt->wellFormed = 0;
1.180 daniel 6761: ctxt->disableSAX = 1;
1.24 daniel 6762: }
1.119 daniel 6763: xmlFree(name);
1.24 daniel 6764: }
6765: }
1.77 daniel 6766: return(ent);
1.24 daniel 6767: }
1.135 daniel 6768: /**
6769: * xmlParseStringEntityRef:
6770: * @ctxt: an XML parser context
6771: * @str: a pointer to an index in the string
6772: *
6773: * parse ENTITY references declarations, but this version parses it from
6774: * a string value.
6775: *
6776: * [68] EntityRef ::= '&' Name ';'
6777: *
6778: * [ WFC: Entity Declared ]
6779: * In a document without any DTD, a document with only an internal DTD
6780: * subset which contains no parameter entity references, or a document
6781: * with "standalone='yes'", the Name given in the entity reference
6782: * must match that in an entity declaration, except that well-formed
6783: * documents need not declare any of the following entities: amp, lt,
6784: * gt, apos, quot. The declaration of a parameter entity must precede
6785: * any reference to it. Similarly, the declaration of a general entity
6786: * must precede any reference to it which appears in a default value in an
6787: * attribute-list declaration. Note that if entities are declared in the
6788: * external subset or in external parameter entities, a non-validating
6789: * processor is not obligated to read and process their declarations;
6790: * for such documents, the rule that an entity must be declared is a
6791: * well-formedness constraint only if standalone='yes'.
6792: *
6793: * [ WFC: Parsed Entity ]
6794: * An entity reference must not contain the name of an unparsed entity
6795: *
6796: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6797: * is updated to the current location in the string.
6798: */
6799: xmlEntityPtr
6800: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6801: xmlChar *name;
6802: const xmlChar *ptr;
6803: xmlChar cur;
6804: xmlEntityPtr ent = NULL;
6805:
1.156 daniel 6806: if ((str == NULL) || (*str == NULL))
6807: return(NULL);
1.135 daniel 6808: ptr = *str;
6809: cur = *ptr;
6810: if (cur == '&') {
6811: ptr++;
6812: cur = *ptr;
6813: name = xmlParseStringName(ctxt, &ptr);
6814: if (name == NULL) {
6815: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6816: ctxt->sax->error(ctxt->userData,
6817: "xmlParseEntityRef: no name\n");
6818: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6819: ctxt->wellFormed = 0;
1.180 daniel 6820: ctxt->disableSAX = 1;
1.135 daniel 6821: } else {
1.185 daniel 6822: if (*ptr == ';') {
6823: ptr++;
1.135 daniel 6824: /*
6825: * Ask first SAX for entity resolution, otherwise try the
6826: * predefined set.
6827: */
6828: if (ctxt->sax != NULL) {
6829: if (ctxt->sax->getEntity != NULL)
6830: ent = ctxt->sax->getEntity(ctxt->userData, name);
6831: if (ent == NULL)
6832: ent = xmlGetPredefinedEntity(name);
6833: }
6834: /*
6835: * [ WFC: Entity Declared ]
6836: * In a document without any DTD, a document with only an
6837: * internal DTD subset which contains no parameter entity
6838: * references, or a document with "standalone='yes'", the
6839: * Name given in the entity reference must match that in an
6840: * entity declaration, except that well-formed documents
6841: * need not declare any of the following entities: amp, lt,
6842: * gt, apos, quot.
6843: * The declaration of a parameter entity must precede any
6844: * reference to it.
6845: * Similarly, the declaration of a general entity must
6846: * precede any reference to it which appears in a default
6847: * value in an attribute-list declaration. Note that if
6848: * entities are declared in the external subset or in
6849: * external parameter entities, a non-validating processor
6850: * is not obligated to read and process their declarations;
6851: * for such documents, the rule that an entity must be
6852: * declared is a well-formedness constraint only if
6853: * standalone='yes'.
6854: */
6855: if (ent == NULL) {
6856: if ((ctxt->standalone == 1) ||
6857: ((ctxt->hasExternalSubset == 0) &&
6858: (ctxt->hasPErefs == 0))) {
6859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6860: ctxt->sax->error(ctxt->userData,
6861: "Entity '%s' not defined\n", name);
6862: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6863: ctxt->wellFormed = 0;
1.180 daniel 6864: ctxt->disableSAX = 1;
1.135 daniel 6865: } else {
6866: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6867: ctxt->sax->warning(ctxt->userData,
6868: "Entity '%s' not defined\n", name);
6869: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6870: }
6871: }
6872:
6873: /*
6874: * [ WFC: Parsed Entity ]
6875: * An entity reference must not contain the name of an
6876: * unparsed entity
6877: */
1.159 daniel 6878: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6880: ctxt->sax->error(ctxt->userData,
6881: "Entity reference to unparsed entity %s\n", name);
6882: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6883: ctxt->wellFormed = 0;
1.180 daniel 6884: ctxt->disableSAX = 1;
1.135 daniel 6885: }
6886:
6887: /*
6888: * [ WFC: No External Entity References ]
6889: * Attribute values cannot contain direct or indirect
6890: * entity references to external entities.
6891: */
6892: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6893: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6895: ctxt->sax->error(ctxt->userData,
6896: "Attribute references external entity '%s'\n", name);
6897: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6898: ctxt->wellFormed = 0;
1.180 daniel 6899: ctxt->disableSAX = 1;
1.135 daniel 6900: }
6901: /*
6902: * [ WFC: No < in Attribute Values ]
6903: * The replacement text of any entity referred to directly or
6904: * indirectly in an attribute value (other than "<") must
6905: * not contain a <.
6906: */
6907: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6908: (ent != NULL) &&
6909: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6910: (ent->content != NULL) &&
6911: (xmlStrchr(ent->content, '<'))) {
6912: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6913: ctxt->sax->error(ctxt->userData,
6914: "'<' in entity '%s' is not allowed in attributes values\n", name);
6915: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6916: ctxt->wellFormed = 0;
1.180 daniel 6917: ctxt->disableSAX = 1;
1.135 daniel 6918: }
6919:
6920: /*
6921: * Internal check, no parameter entities here ...
6922: */
6923: else {
1.159 daniel 6924: switch (ent->etype) {
1.135 daniel 6925: case XML_INTERNAL_PARAMETER_ENTITY:
6926: case XML_EXTERNAL_PARAMETER_ENTITY:
6927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6928: ctxt->sax->error(ctxt->userData,
6929: "Attempt to reference the parameter entity '%s'\n", name);
6930: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6931: ctxt->wellFormed = 0;
1.180 daniel 6932: ctxt->disableSAX = 1;
6933: break;
6934: default:
1.135 daniel 6935: break;
6936: }
6937: }
6938:
6939: /*
6940: * [ WFC: No Recursion ]
6941: * TODO A parsed entity must not contain a recursive reference
6942: * to itself, either directly or indirectly.
6943: */
6944:
6945: } else {
6946: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6947: ctxt->sax->error(ctxt->userData,
6948: "xmlParseEntityRef: expecting ';'\n");
6949: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6950: ctxt->wellFormed = 0;
1.180 daniel 6951: ctxt->disableSAX = 1;
1.135 daniel 6952: }
6953: xmlFree(name);
6954: }
6955: }
1.185 daniel 6956: *str = ptr;
1.135 daniel 6957: return(ent);
6958: }
1.24 daniel 6959:
1.50 daniel 6960: /**
6961: * xmlParsePEReference:
6962: * @ctxt: an XML parser context
6963: *
6964: * parse PEReference declarations
1.77 daniel 6965: * The entity content is handled directly by pushing it's content as
6966: * a new input stream.
1.22 daniel 6967: *
6968: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6969: *
1.98 daniel 6970: * [ WFC: No Recursion ]
6971: * TODO A parsed entity must not contain a recursive
6972: * reference to itself, either directly or indirectly.
6973: *
6974: * [ WFC: Entity Declared ]
6975: * In a document without any DTD, a document with only an internal DTD
6976: * subset which contains no parameter entity references, or a document
6977: * with "standalone='yes'", ... ... The declaration of a parameter
6978: * entity must precede any reference to it...
6979: *
6980: * [ VC: Entity Declared ]
6981: * In a document with an external subset or external parameter entities
6982: * with "standalone='no'", ... ... The declaration of a parameter entity
6983: * must precede any reference to it...
6984: *
6985: * [ WFC: In DTD ]
6986: * Parameter-entity references may only appear in the DTD.
6987: * NOTE: misleading but this is handled.
1.22 daniel 6988: */
1.77 daniel 6989: void
1.55 daniel 6990: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6991: xmlChar *name;
1.72 daniel 6992: xmlEntityPtr entity = NULL;
1.50 daniel 6993: xmlParserInputPtr input;
1.22 daniel 6994:
1.152 daniel 6995: if (RAW == '%') {
1.40 daniel 6996: NEXT;
1.22 daniel 6997: name = xmlParseName(ctxt);
6998: if (name == NULL) {
1.55 daniel 6999: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7000: ctxt->sax->error(ctxt->userData,
7001: "xmlParsePEReference: no name\n");
1.123 daniel 7002: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7003: ctxt->wellFormed = 0;
1.180 daniel 7004: ctxt->disableSAX = 1;
1.22 daniel 7005: } else {
1.152 daniel 7006: if (RAW == ';') {
1.40 daniel 7007: NEXT;
1.98 daniel 7008: if ((ctxt->sax != NULL) &&
7009: (ctxt->sax->getParameterEntity != NULL))
7010: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7011: name);
1.45 daniel 7012: if (entity == NULL) {
1.98 daniel 7013: /*
7014: * [ WFC: Entity Declared ]
7015: * In a document without any DTD, a document with only an
7016: * internal DTD subset which contains no parameter entity
7017: * references, or a document with "standalone='yes'", ...
7018: * ... The declaration of a parameter entity must precede
7019: * any reference to it...
7020: */
7021: if ((ctxt->standalone == 1) ||
7022: ((ctxt->hasExternalSubset == 0) &&
7023: (ctxt->hasPErefs == 0))) {
7024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7025: ctxt->sax->error(ctxt->userData,
7026: "PEReference: %%%s; not found\n", name);
1.123 daniel 7027: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 7028: ctxt->wellFormed = 0;
1.180 daniel 7029: ctxt->disableSAX = 1;
1.98 daniel 7030: } else {
7031: /*
7032: * [ VC: Entity Declared ]
7033: * In a document with an external subset or external
7034: * parameter entities with "standalone='no'", ...
7035: * ... The declaration of a parameter entity must precede
7036: * any reference to it...
7037: */
7038: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7039: ctxt->sax->warning(ctxt->userData,
7040: "PEReference: %%%s; not found\n", name);
7041: ctxt->valid = 0;
7042: }
1.50 daniel 7043: } else {
1.98 daniel 7044: /*
7045: * Internal checking in case the entity quest barfed
7046: */
1.159 daniel 7047: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7048: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7049: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7050: ctxt->sax->warning(ctxt->userData,
7051: "Internal: %%%s; is not a parameter entity\n", name);
7052: } else {
1.164 daniel 7053: /*
7054: * TODO !!!
7055: * handle the extra spaces added before and after
7056: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7057: */
1.98 daniel 7058: input = xmlNewEntityInputStream(ctxt, entity);
7059: xmlPushInput(ctxt, input);
1.164 daniel 7060: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7061: (RAW == '<') && (NXT(1) == '?') &&
7062: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7063: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7064: xmlParseTextDecl(ctxt);
1.193 daniel 7065: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7066: /*
7067: * The XML REC instructs us to stop parsing
7068: * right here
7069: */
7070: ctxt->instate = XML_PARSER_EOF;
7071: xmlFree(name);
7072: return;
7073: }
1.164 daniel 7074: }
7075: if (ctxt->token == 0)
7076: ctxt->token = ' ';
1.98 daniel 7077: }
1.45 daniel 7078: }
1.98 daniel 7079: ctxt->hasPErefs = 1;
1.22 daniel 7080: } else {
1.55 daniel 7081: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7082: ctxt->sax->error(ctxt->userData,
1.59 daniel 7083: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7084: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7085: ctxt->wellFormed = 0;
1.180 daniel 7086: ctxt->disableSAX = 1;
1.22 daniel 7087: }
1.119 daniel 7088: xmlFree(name);
1.3 veillard 7089: }
7090: }
7091: }
7092:
1.50 daniel 7093: /**
1.135 daniel 7094: * xmlParseStringPEReference:
7095: * @ctxt: an XML parser context
7096: * @str: a pointer to an index in the string
7097: *
7098: * parse PEReference declarations
7099: *
7100: * [69] PEReference ::= '%' Name ';'
7101: *
7102: * [ WFC: No Recursion ]
7103: * TODO A parsed entity must not contain a recursive
7104: * reference to itself, either directly or indirectly.
7105: *
7106: * [ WFC: Entity Declared ]
7107: * In a document without any DTD, a document with only an internal DTD
7108: * subset which contains no parameter entity references, or a document
7109: * with "standalone='yes'", ... ... The declaration of a parameter
7110: * entity must precede any reference to it...
7111: *
7112: * [ VC: Entity Declared ]
7113: * In a document with an external subset or external parameter entities
7114: * with "standalone='no'", ... ... The declaration of a parameter entity
7115: * must precede any reference to it...
7116: *
7117: * [ WFC: In DTD ]
7118: * Parameter-entity references may only appear in the DTD.
7119: * NOTE: misleading but this is handled.
7120: *
7121: * Returns the string of the entity content.
7122: * str is updated to the current value of the index
7123: */
7124: xmlEntityPtr
7125: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7126: const xmlChar *ptr;
7127: xmlChar cur;
7128: xmlChar *name;
7129: xmlEntityPtr entity = NULL;
7130:
7131: if ((str == NULL) || (*str == NULL)) return(NULL);
7132: ptr = *str;
7133: cur = *ptr;
7134: if (cur == '%') {
7135: ptr++;
7136: cur = *ptr;
7137: name = xmlParseStringName(ctxt, &ptr);
7138: if (name == NULL) {
7139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7140: ctxt->sax->error(ctxt->userData,
7141: "xmlParseStringPEReference: no name\n");
7142: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7143: ctxt->wellFormed = 0;
1.180 daniel 7144: ctxt->disableSAX = 1;
1.135 daniel 7145: } else {
7146: cur = *ptr;
7147: if (cur == ';') {
7148: ptr++;
7149: cur = *ptr;
7150: if ((ctxt->sax != NULL) &&
7151: (ctxt->sax->getParameterEntity != NULL))
7152: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7153: name);
7154: if (entity == NULL) {
7155: /*
7156: * [ WFC: Entity Declared ]
7157: * In a document without any DTD, a document with only an
7158: * internal DTD subset which contains no parameter entity
7159: * references, or a document with "standalone='yes'", ...
7160: * ... The declaration of a parameter entity must precede
7161: * any reference to it...
7162: */
7163: if ((ctxt->standalone == 1) ||
7164: ((ctxt->hasExternalSubset == 0) &&
7165: (ctxt->hasPErefs == 0))) {
7166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7167: ctxt->sax->error(ctxt->userData,
7168: "PEReference: %%%s; not found\n", name);
7169: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7170: ctxt->wellFormed = 0;
1.180 daniel 7171: ctxt->disableSAX = 1;
1.135 daniel 7172: } else {
7173: /*
7174: * [ VC: Entity Declared ]
7175: * In a document with an external subset or external
7176: * parameter entities with "standalone='no'", ...
7177: * ... The declaration of a parameter entity must
7178: * precede any reference to it...
7179: */
7180: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7181: ctxt->sax->warning(ctxt->userData,
7182: "PEReference: %%%s; not found\n", name);
7183: ctxt->valid = 0;
7184: }
7185: } else {
7186: /*
7187: * Internal checking in case the entity quest barfed
7188: */
1.159 daniel 7189: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7190: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7191: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7192: ctxt->sax->warning(ctxt->userData,
7193: "Internal: %%%s; is not a parameter entity\n", name);
7194: }
7195: }
7196: ctxt->hasPErefs = 1;
7197: } else {
7198: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7199: ctxt->sax->error(ctxt->userData,
7200: "xmlParseStringPEReference: expecting ';'\n");
7201: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7202: ctxt->wellFormed = 0;
1.180 daniel 7203: ctxt->disableSAX = 1;
1.135 daniel 7204: }
7205: xmlFree(name);
7206: }
7207: }
7208: *str = ptr;
7209: return(entity);
7210: }
7211:
7212: /**
1.181 daniel 7213: * xmlParseDocTypeDecl:
1.50 daniel 7214: * @ctxt: an XML parser context
7215: *
7216: * parse a DOCTYPE declaration
1.21 daniel 7217: *
1.22 daniel 7218: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7219: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7220: *
7221: * [ VC: Root Element Type ]
1.99 daniel 7222: * The Name in the document type declaration must match the element
1.98 daniel 7223: * type of the root element.
1.21 daniel 7224: */
7225:
1.55 daniel 7226: void
7227: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7228: xmlChar *name = NULL;
1.123 daniel 7229: xmlChar *ExternalID = NULL;
7230: xmlChar *URI = NULL;
1.21 daniel 7231:
7232: /*
7233: * We know that '<!DOCTYPE' has been detected.
7234: */
1.40 daniel 7235: SKIP(9);
1.21 daniel 7236:
1.42 daniel 7237: SKIP_BLANKS;
1.21 daniel 7238:
7239: /*
7240: * Parse the DOCTYPE name.
7241: */
7242: name = xmlParseName(ctxt);
7243: if (name == NULL) {
1.55 daniel 7244: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7245: ctxt->sax->error(ctxt->userData,
7246: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7247: ctxt->wellFormed = 0;
1.180 daniel 7248: ctxt->disableSAX = 1;
1.123 daniel 7249: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7250: }
1.165 daniel 7251: ctxt->intSubName = name;
1.21 daniel 7252:
1.42 daniel 7253: SKIP_BLANKS;
1.21 daniel 7254:
7255: /*
1.22 daniel 7256: * Check for SystemID and ExternalID
7257: */
1.67 daniel 7258: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7259:
7260: if ((URI != NULL) || (ExternalID != NULL)) {
7261: ctxt->hasExternalSubset = 1;
7262: }
1.165 daniel 7263: ctxt->extSubURI = URI;
7264: ctxt->extSubSystem = ExternalID;
1.98 daniel 7265:
1.42 daniel 7266: SKIP_BLANKS;
1.36 daniel 7267:
1.76 daniel 7268: /*
1.165 daniel 7269: * Create and update the internal subset.
1.76 daniel 7270: */
1.171 daniel 7271: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7272: (!ctxt->disableSAX))
1.74 daniel 7273: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7274:
7275: /*
1.140 daniel 7276: * Is there any internal subset declarations ?
7277: * they are handled separately in xmlParseInternalSubset()
7278: */
1.152 daniel 7279: if (RAW == '[')
1.140 daniel 7280: return;
7281:
7282: /*
7283: * We should be at the end of the DOCTYPE declaration.
7284: */
1.152 daniel 7285: if (RAW != '>') {
1.140 daniel 7286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7287: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7288: ctxt->wellFormed = 0;
1.180 daniel 7289: ctxt->disableSAX = 1;
1.140 daniel 7290: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7291: }
7292: NEXT;
7293: }
7294:
7295: /**
1.181 daniel 7296: * xmlParseInternalsubset:
1.140 daniel 7297: * @ctxt: an XML parser context
7298: *
7299: * parse the internal subset declaration
7300: *
7301: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7302: */
7303:
7304: void
7305: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7306: /*
1.22 daniel 7307: * Is there any DTD definition ?
7308: */
1.152 daniel 7309: if (RAW == '[') {
1.96 daniel 7310: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7311: NEXT;
1.22 daniel 7312: /*
7313: * Parse the succession of Markup declarations and
7314: * PEReferences.
7315: * Subsequence (markupdecl | PEReference | S)*
7316: */
1.152 daniel 7317: while (RAW != ']') {
1.123 daniel 7318: const xmlChar *check = CUR_PTR;
1.115 daniel 7319: int cons = ctxt->input->consumed;
1.22 daniel 7320:
1.42 daniel 7321: SKIP_BLANKS;
1.22 daniel 7322: xmlParseMarkupDecl(ctxt);
1.50 daniel 7323: xmlParsePEReference(ctxt);
1.22 daniel 7324:
1.115 daniel 7325: /*
7326: * Pop-up of finished entities.
7327: */
1.152 daniel 7328: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7329: xmlPopInput(ctxt);
7330:
1.118 daniel 7331: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7333: ctxt->sax->error(ctxt->userData,
1.140 daniel 7334: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7335: ctxt->wellFormed = 0;
1.180 daniel 7336: ctxt->disableSAX = 1;
1.123 daniel 7337: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7338: break;
7339: }
7340: }
1.152 daniel 7341: if (RAW == ']') NEXT;
1.22 daniel 7342: }
7343:
7344: /*
7345: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7346: */
1.152 daniel 7347: if (RAW != '>') {
1.55 daniel 7348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7349: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7350: ctxt->wellFormed = 0;
1.180 daniel 7351: ctxt->disableSAX = 1;
1.123 daniel 7352: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7353: }
1.40 daniel 7354: NEXT;
1.21 daniel 7355: }
7356:
1.50 daniel 7357: /**
7358: * xmlParseAttribute:
7359: * @ctxt: an XML parser context
1.123 daniel 7360: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7361: *
7362: * parse an attribute
1.3 veillard 7363: *
1.22 daniel 7364: * [41] Attribute ::= Name Eq AttValue
7365: *
1.98 daniel 7366: * [ WFC: No External Entity References ]
7367: * Attribute values cannot contain direct or indirect entity references
7368: * to external entities.
7369: *
7370: * [ WFC: No < in Attribute Values ]
7371: * The replacement text of any entity referred to directly or indirectly in
7372: * an attribute value (other than "<") must not contain a <.
7373: *
7374: * [ VC: Attribute Value Type ]
1.117 daniel 7375: * The attribute must have been declared; the value must be of the type
1.99 daniel 7376: * declared for it.
1.98 daniel 7377: *
1.22 daniel 7378: * [25] Eq ::= S? '=' S?
7379: *
1.29 daniel 7380: * With namespace:
7381: *
7382: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7383: *
7384: * Also the case QName == xmlns:??? is handled independently as a namespace
7385: * definition.
1.69 daniel 7386: *
1.72 daniel 7387: * Returns the attribute name, and the value in *value.
1.3 veillard 7388: */
7389:
1.123 daniel 7390: xmlChar *
7391: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7392: xmlChar *name, *val;
1.3 veillard 7393:
1.72 daniel 7394: *value = NULL;
7395: name = xmlParseName(ctxt);
1.22 daniel 7396: if (name == NULL) {
1.55 daniel 7397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7398: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7399: ctxt->wellFormed = 0;
1.180 daniel 7400: ctxt->disableSAX = 1;
1.123 daniel 7401: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7402: return(NULL);
1.3 veillard 7403: }
7404:
7405: /*
1.29 daniel 7406: * read the value
1.3 veillard 7407: */
1.42 daniel 7408: SKIP_BLANKS;
1.152 daniel 7409: if (RAW == '=') {
1.40 daniel 7410: NEXT;
1.42 daniel 7411: SKIP_BLANKS;
1.72 daniel 7412: val = xmlParseAttValue(ctxt);
1.96 daniel 7413: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7414: } else {
1.55 daniel 7415: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7416: ctxt->sax->error(ctxt->userData,
1.59 daniel 7417: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7418: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7419: ctxt->wellFormed = 0;
1.180 daniel 7420: ctxt->disableSAX = 1;
1.170 daniel 7421: xmlFree(name);
1.52 daniel 7422: return(NULL);
1.43 daniel 7423: }
7424:
1.172 daniel 7425: /*
7426: * Check that xml:lang conforms to the specification
7427: */
7428: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7429: if (!xmlCheckLanguageID(val)) {
7430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7431: ctxt->sax->error(ctxt->userData,
7432: "Invalid value for xml:lang : %s\n", val);
7433: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7434: ctxt->wellFormed = 0;
1.180 daniel 7435: ctxt->disableSAX = 1;
1.172 daniel 7436: }
7437: }
7438:
1.176 daniel 7439: /*
7440: * Check that xml:space conforms to the specification
7441: */
7442: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7443: if (!xmlStrcmp(val, BAD_CAST "default"))
7444: *(ctxt->space) = 0;
7445: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7446: *(ctxt->space) = 1;
7447: else {
7448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7449: ctxt->sax->error(ctxt->userData,
7450: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7451: val);
7452: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7453: ctxt->wellFormed = 0;
1.180 daniel 7454: ctxt->disableSAX = 1;
1.176 daniel 7455: }
7456: }
7457:
1.72 daniel 7458: *value = val;
7459: return(name);
1.3 veillard 7460: }
7461:
1.50 daniel 7462: /**
7463: * xmlParseStartTag:
7464: * @ctxt: an XML parser context
7465: *
7466: * parse a start of tag either for rule element or
7467: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7468: *
7469: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7470: *
1.98 daniel 7471: * [ WFC: Unique Att Spec ]
7472: * No attribute name may appear more than once in the same start-tag or
7473: * empty-element tag.
7474: *
1.29 daniel 7475: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7476: *
1.98 daniel 7477: * [ WFC: Unique Att Spec ]
7478: * No attribute name may appear more than once in the same start-tag or
7479: * empty-element tag.
7480: *
1.29 daniel 7481: * With namespace:
7482: *
7483: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7484: *
7485: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7486: *
1.192 daniel 7487: * Returns the element name parsed
1.2 veillard 7488: */
7489:
1.123 daniel 7490: xmlChar *
1.69 daniel 7491: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7492: xmlChar *name;
7493: xmlChar *attname;
7494: xmlChar *attvalue;
7495: const xmlChar **atts = NULL;
1.72 daniel 7496: int nbatts = 0;
7497: int maxatts = 0;
7498: int i;
1.2 veillard 7499:
1.152 daniel 7500: if (RAW != '<') return(NULL);
1.40 daniel 7501: NEXT;
1.3 veillard 7502:
1.72 daniel 7503: name = xmlParseName(ctxt);
1.59 daniel 7504: if (name == NULL) {
7505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7506: ctxt->sax->error(ctxt->userData,
1.59 daniel 7507: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7508: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7509: ctxt->wellFormed = 0;
1.180 daniel 7510: ctxt->disableSAX = 1;
1.83 daniel 7511: return(NULL);
1.50 daniel 7512: }
7513:
7514: /*
1.3 veillard 7515: * Now parse the attributes, it ends up with the ending
7516: *
7517: * (S Attribute)* S?
7518: */
1.42 daniel 7519: SKIP_BLANKS;
1.91 daniel 7520: GROW;
1.168 daniel 7521:
1.153 daniel 7522: while ((IS_CHAR(RAW)) &&
1.152 daniel 7523: (RAW != '>') &&
7524: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7525: const xmlChar *q = CUR_PTR;
1.91 daniel 7526: int cons = ctxt->input->consumed;
1.29 daniel 7527:
1.72 daniel 7528: attname = xmlParseAttribute(ctxt, &attvalue);
7529: if ((attname != NULL) && (attvalue != NULL)) {
7530: /*
1.98 daniel 7531: * [ WFC: Unique Att Spec ]
7532: * No attribute name may appear more than once in the same
7533: * start-tag or empty-element tag.
1.72 daniel 7534: */
7535: for (i = 0; i < nbatts;i += 2) {
7536: if (!xmlStrcmp(atts[i], attname)) {
7537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7538: ctxt->sax->error(ctxt->userData,
7539: "Attribute %s redefined\n",
7540: attname);
1.72 daniel 7541: ctxt->wellFormed = 0;
1.180 daniel 7542: ctxt->disableSAX = 1;
1.123 daniel 7543: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7544: xmlFree(attname);
7545: xmlFree(attvalue);
1.98 daniel 7546: goto failed;
1.72 daniel 7547: }
7548: }
7549:
7550: /*
7551: * Add the pair to atts
7552: */
7553: if (atts == NULL) {
7554: maxatts = 10;
1.123 daniel 7555: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7556: if (atts == NULL) {
1.86 daniel 7557: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7558: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7559: return(NULL);
1.72 daniel 7560: }
1.127 daniel 7561: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7562: maxatts *= 2;
1.123 daniel 7563: atts = (const xmlChar **) xmlRealloc(atts,
7564: maxatts * sizeof(xmlChar *));
1.72 daniel 7565: if (atts == NULL) {
1.86 daniel 7566: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7567: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7568: return(NULL);
1.72 daniel 7569: }
7570: }
7571: atts[nbatts++] = attname;
7572: atts[nbatts++] = attvalue;
7573: atts[nbatts] = NULL;
7574: atts[nbatts + 1] = NULL;
1.176 daniel 7575: } else {
7576: if (attname != NULL)
7577: xmlFree(attname);
7578: if (attvalue != NULL)
7579: xmlFree(attvalue);
1.72 daniel 7580: }
7581:
1.116 daniel 7582: failed:
1.168 daniel 7583:
7584: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7585: break;
7586: if (!IS_BLANK(RAW)) {
7587: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7588: ctxt->sax->error(ctxt->userData,
7589: "attributes construct error\n");
7590: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7591: ctxt->wellFormed = 0;
1.180 daniel 7592: ctxt->disableSAX = 1;
1.168 daniel 7593: }
1.42 daniel 7594: SKIP_BLANKS;
1.91 daniel 7595: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7597: ctxt->sax->error(ctxt->userData,
1.31 daniel 7598: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7599: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7600: ctxt->wellFormed = 0;
1.180 daniel 7601: ctxt->disableSAX = 1;
1.29 daniel 7602: break;
1.3 veillard 7603: }
1.91 daniel 7604: GROW;
1.3 veillard 7605: }
7606:
1.43 daniel 7607: /*
1.72 daniel 7608: * SAX: Start of Element !
1.43 daniel 7609: */
1.171 daniel 7610: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7611: (!ctxt->disableSAX))
1.74 daniel 7612: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7613:
1.72 daniel 7614: if (atts != NULL) {
1.123 daniel 7615: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7616: xmlFree(atts);
1.72 daniel 7617: }
1.83 daniel 7618: return(name);
1.3 veillard 7619: }
7620:
1.50 daniel 7621: /**
7622: * xmlParseEndTag:
7623: * @ctxt: an XML parser context
7624: *
7625: * parse an end of tag
1.27 daniel 7626: *
7627: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7628: *
7629: * With namespace
7630: *
1.72 daniel 7631: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7632: */
7633:
1.55 daniel 7634: void
1.140 daniel 7635: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7636: xmlChar *name;
1.140 daniel 7637: xmlChar *oldname;
1.7 veillard 7638:
1.91 daniel 7639: GROW;
1.152 daniel 7640: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7642: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7643: ctxt->wellFormed = 0;
1.180 daniel 7644: ctxt->disableSAX = 1;
1.123 daniel 7645: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7646: return;
7647: }
1.40 daniel 7648: SKIP(2);
1.7 veillard 7649:
1.72 daniel 7650: name = xmlParseName(ctxt);
1.7 veillard 7651:
7652: /*
7653: * We should definitely be at the ending "S? '>'" part
7654: */
1.91 daniel 7655: GROW;
1.42 daniel 7656: SKIP_BLANKS;
1.153 daniel 7657: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7659: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7660: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7661: ctxt->wellFormed = 0;
1.180 daniel 7662: ctxt->disableSAX = 1;
1.7 veillard 7663: } else
1.40 daniel 7664: NEXT;
1.7 veillard 7665:
1.72 daniel 7666: /*
1.98 daniel 7667: * [ WFC: Element Type Match ]
7668: * The Name in an element's end-tag must match the element type in the
7669: * start-tag.
7670: *
1.83 daniel 7671: */
1.147 daniel 7672: if ((name == NULL) || (ctxt->name == NULL) ||
7673: (xmlStrcmp(name, ctxt->name))) {
7674: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7675: if ((name != NULL) && (ctxt->name != NULL)) {
7676: ctxt->sax->error(ctxt->userData,
7677: "Opening and ending tag mismatch: %s and %s\n",
7678: ctxt->name, name);
7679: } else if (ctxt->name != NULL) {
7680: ctxt->sax->error(ctxt->userData,
7681: "Ending tag eror for: %s\n", ctxt->name);
7682: } else {
7683: ctxt->sax->error(ctxt->userData,
7684: "Ending tag error: internal error ???\n");
7685: }
1.122 daniel 7686:
1.147 daniel 7687: }
1.123 daniel 7688: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7689: ctxt->wellFormed = 0;
1.180 daniel 7690: ctxt->disableSAX = 1;
1.83 daniel 7691: }
7692:
7693: /*
1.72 daniel 7694: * SAX: End of Tag
7695: */
1.171 daniel 7696: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7697: (!ctxt->disableSAX))
1.74 daniel 7698: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7699:
7700: if (name != NULL)
1.119 daniel 7701: xmlFree(name);
1.140 daniel 7702: oldname = namePop(ctxt);
1.176 daniel 7703: spacePop(ctxt);
1.140 daniel 7704: if (oldname != NULL) {
7705: #ifdef DEBUG_STACK
7706: fprintf(stderr,"Close: popped %s\n", oldname);
7707: #endif
7708: xmlFree(oldname);
7709: }
1.7 veillard 7710: return;
7711: }
7712:
1.50 daniel 7713: /**
7714: * xmlParseCDSect:
7715: * @ctxt: an XML parser context
7716: *
7717: * Parse escaped pure raw content.
1.29 daniel 7718: *
7719: * [18] CDSect ::= CDStart CData CDEnd
7720: *
7721: * [19] CDStart ::= '<![CDATA['
7722: *
7723: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7724: *
7725: * [21] CDEnd ::= ']]>'
1.3 veillard 7726: */
1.55 daniel 7727: void
7728: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7729: xmlChar *buf = NULL;
7730: int len = 0;
1.140 daniel 7731: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7732: int r, rl;
7733: int s, sl;
7734: int cur, l;
1.3 veillard 7735:
1.106 daniel 7736: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7737: (NXT(2) == '[') && (NXT(3) == 'C') &&
7738: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7739: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7740: (NXT(8) == '[')) {
7741: SKIP(9);
1.29 daniel 7742: } else
1.45 daniel 7743: return;
1.109 daniel 7744:
7745: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7746: r = CUR_CHAR(rl);
7747: if (!IS_CHAR(r)) {
1.55 daniel 7748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7749: ctxt->sax->error(ctxt->userData,
1.135 daniel 7750: "CData section not finished\n");
1.59 daniel 7751: ctxt->wellFormed = 0;
1.180 daniel 7752: ctxt->disableSAX = 1;
1.123 daniel 7753: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7754: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7755: return;
1.3 veillard 7756: }
1.152 daniel 7757: NEXTL(rl);
7758: s = CUR_CHAR(sl);
7759: if (!IS_CHAR(s)) {
1.55 daniel 7760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7761: ctxt->sax->error(ctxt->userData,
1.135 daniel 7762: "CData section not finished\n");
1.123 daniel 7763: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7764: ctxt->wellFormed = 0;
1.180 daniel 7765: ctxt->disableSAX = 1;
1.109 daniel 7766: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7767: return;
1.3 veillard 7768: }
1.152 daniel 7769: NEXTL(sl);
7770: cur = CUR_CHAR(l);
1.135 daniel 7771: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7772: if (buf == NULL) {
7773: fprintf(stderr, "malloc of %d byte failed\n", size);
7774: return;
7775: }
1.108 veillard 7776: while (IS_CHAR(cur) &&
1.110 daniel 7777: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7778: if (len + 5 >= size) {
1.135 daniel 7779: size *= 2;
1.204 ! veillard 7780: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 7781: if (buf == NULL) {
7782: fprintf(stderr, "realloc of %d byte failed\n", size);
7783: return;
7784: }
7785: }
1.152 daniel 7786: COPY_BUF(rl,buf,len,r);
1.110 daniel 7787: r = s;
1.152 daniel 7788: rl = sl;
1.110 daniel 7789: s = cur;
1.152 daniel 7790: sl = l;
7791: NEXTL(l);
7792: cur = CUR_CHAR(l);
1.3 veillard 7793: }
1.135 daniel 7794: buf[len] = 0;
1.109 daniel 7795: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7796: if (cur != '>') {
1.55 daniel 7797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7798: ctxt->sax->error(ctxt->userData,
1.135 daniel 7799: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7800: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7801: ctxt->wellFormed = 0;
1.180 daniel 7802: ctxt->disableSAX = 1;
1.135 daniel 7803: xmlFree(buf);
1.45 daniel 7804: return;
1.3 veillard 7805: }
1.152 daniel 7806: NEXTL(l);
1.16 daniel 7807:
1.45 daniel 7808: /*
1.135 daniel 7809: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7810: */
1.171 daniel 7811: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7812: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7813: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7814: }
1.135 daniel 7815: xmlFree(buf);
1.2 veillard 7816: }
7817:
1.50 daniel 7818: /**
7819: * xmlParseContent:
7820: * @ctxt: an XML parser context
7821: *
7822: * Parse a content:
1.2 veillard 7823: *
1.27 daniel 7824: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7825: */
7826:
1.55 daniel 7827: void
7828: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7829: GROW;
1.176 daniel 7830: while (((RAW != 0) || (ctxt->token != 0)) &&
7831: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7832: const xmlChar *test = CUR_PTR;
1.91 daniel 7833: int cons = ctxt->input->consumed;
1.123 daniel 7834: xmlChar tok = ctxt->token;
1.27 daniel 7835:
7836: /*
1.152 daniel 7837: * Handle possible processed charrefs.
7838: */
7839: if (ctxt->token != 0) {
7840: xmlParseCharData(ctxt, 0);
7841: }
7842: /*
1.27 daniel 7843: * First case : a Processing Instruction.
7844: */
1.152 daniel 7845: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7846: xmlParsePI(ctxt);
7847: }
1.72 daniel 7848:
1.27 daniel 7849: /*
7850: * Second case : a CDSection
7851: */
1.152 daniel 7852: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7853: (NXT(2) == '[') && (NXT(3) == 'C') &&
7854: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7855: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7856: (NXT(8) == '[')) {
1.45 daniel 7857: xmlParseCDSect(ctxt);
1.27 daniel 7858: }
1.72 daniel 7859:
1.27 daniel 7860: /*
7861: * Third case : a comment
7862: */
1.152 daniel 7863: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7864: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7865: xmlParseComment(ctxt);
1.97 daniel 7866: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7867: }
1.72 daniel 7868:
1.27 daniel 7869: /*
7870: * Fourth case : a sub-element.
7871: */
1.152 daniel 7872: else if (RAW == '<') {
1.72 daniel 7873: xmlParseElement(ctxt);
1.45 daniel 7874: }
1.72 daniel 7875:
1.45 daniel 7876: /*
1.50 daniel 7877: * Fifth case : a reference. If if has not been resolved,
7878: * parsing returns it's Name, create the node
1.45 daniel 7879: */
1.97 daniel 7880:
1.152 daniel 7881: else if (RAW == '&') {
1.77 daniel 7882: xmlParseReference(ctxt);
1.27 daniel 7883: }
1.72 daniel 7884:
1.27 daniel 7885: /*
7886: * Last case, text. Note that References are handled directly.
7887: */
7888: else {
1.45 daniel 7889: xmlParseCharData(ctxt, 0);
1.3 veillard 7890: }
1.14 veillard 7891:
1.91 daniel 7892: GROW;
1.14 veillard 7893: /*
1.45 daniel 7894: * Pop-up of finished entities.
1.14 veillard 7895: */
1.152 daniel 7896: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7897: xmlPopInput(ctxt);
1.135 daniel 7898: SHRINK;
1.45 daniel 7899:
1.113 daniel 7900: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7901: (tok == ctxt->token)) {
1.55 daniel 7902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7903: ctxt->sax->error(ctxt->userData,
1.59 daniel 7904: "detected an error in element content\n");
1.123 daniel 7905: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7906: ctxt->wellFormed = 0;
1.180 daniel 7907: ctxt->disableSAX = 1;
1.29 daniel 7908: break;
7909: }
1.3 veillard 7910: }
1.2 veillard 7911: }
7912:
1.50 daniel 7913: /**
7914: * xmlParseElement:
7915: * @ctxt: an XML parser context
7916: *
7917: * parse an XML element, this is highly recursive
1.26 daniel 7918: *
7919: * [39] element ::= EmptyElemTag | STag content ETag
7920: *
1.98 daniel 7921: * [ WFC: Element Type Match ]
7922: * The Name in an element's end-tag must match the element type in the
7923: * start-tag.
7924: *
7925: * [ VC: Element Valid ]
1.117 daniel 7926: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7927: * where the Name matches the element type and one of the following holds:
7928: * - The declaration matches EMPTY and the element has no content.
7929: * - The declaration matches children and the sequence of child elements
7930: * belongs to the language generated by the regular expression in the
7931: * content model, with optional white space (characters matching the
7932: * nonterminal S) between each pair of child elements.
7933: * - The declaration matches Mixed and the content consists of character
7934: * data and child elements whose types match names in the content model.
7935: * - The declaration matches ANY, and the types of any child elements have
7936: * been declared.
1.2 veillard 7937: */
1.26 daniel 7938:
1.72 daniel 7939: void
1.69 daniel 7940: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7941: const xmlChar *openTag = CUR_PTR;
7942: xmlChar *name;
1.140 daniel 7943: xmlChar *oldname;
1.32 daniel 7944: xmlParserNodeInfo node_info;
1.118 daniel 7945: xmlNodePtr ret;
1.2 veillard 7946:
1.32 daniel 7947: /* Capture start position */
1.118 daniel 7948: if (ctxt->record_info) {
7949: node_info.begin_pos = ctxt->input->consumed +
7950: (CUR_PTR - ctxt->input->base);
7951: node_info.begin_line = ctxt->input->line;
7952: }
1.32 daniel 7953:
1.176 daniel 7954: if (ctxt->spaceNr == 0)
7955: spacePush(ctxt, -1);
7956: else
7957: spacePush(ctxt, *ctxt->space);
7958:
1.83 daniel 7959: name = xmlParseStartTag(ctxt);
7960: if (name == NULL) {
1.176 daniel 7961: spacePop(ctxt);
1.83 daniel 7962: return;
7963: }
1.140 daniel 7964: namePush(ctxt, name);
1.118 daniel 7965: ret = ctxt->node;
1.2 veillard 7966:
7967: /*
1.99 daniel 7968: * [ VC: Root Element Type ]
7969: * The Name in the document type declaration must match the element
7970: * type of the root element.
7971: */
1.105 daniel 7972: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7973: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7974: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7975:
7976: /*
1.2 veillard 7977: * Check for an Empty Element.
7978: */
1.152 daniel 7979: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7980: SKIP(2);
1.171 daniel 7981: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7982: (!ctxt->disableSAX))
1.83 daniel 7983: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7984: oldname = namePop(ctxt);
1.176 daniel 7985: spacePop(ctxt);
1.140 daniel 7986: if (oldname != NULL) {
7987: #ifdef DEBUG_STACK
7988: fprintf(stderr,"Close: popped %s\n", oldname);
7989: #endif
7990: xmlFree(oldname);
7991: }
1.72 daniel 7992: return;
1.2 veillard 7993: }
1.152 daniel 7994: if (RAW == '>') {
1.91 daniel 7995: NEXT;
7996: } else {
1.55 daniel 7997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7998: ctxt->sax->error(ctxt->userData,
7999: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 8000: openTag);
1.59 daniel 8001: ctxt->wellFormed = 0;
1.180 daniel 8002: ctxt->disableSAX = 1;
1.123 daniel 8003: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 8004:
8005: /*
8006: * end of parsing of this node.
8007: */
8008: nodePop(ctxt);
1.140 daniel 8009: oldname = namePop(ctxt);
1.176 daniel 8010: spacePop(ctxt);
1.140 daniel 8011: if (oldname != NULL) {
8012: #ifdef DEBUG_STACK
8013: fprintf(stderr,"Close: popped %s\n", oldname);
8014: #endif
8015: xmlFree(oldname);
8016: }
1.118 daniel 8017:
8018: /*
8019: * Capture end position and add node
8020: */
8021: if ( ret != NULL && ctxt->record_info ) {
8022: node_info.end_pos = ctxt->input->consumed +
8023: (CUR_PTR - ctxt->input->base);
8024: node_info.end_line = ctxt->input->line;
8025: node_info.node = ret;
8026: xmlParserAddNodeInfo(ctxt, &node_info);
8027: }
1.72 daniel 8028: return;
1.2 veillard 8029: }
8030:
8031: /*
8032: * Parse the content of the element:
8033: */
1.45 daniel 8034: xmlParseContent(ctxt);
1.153 daniel 8035: if (!IS_CHAR(RAW)) {
1.55 daniel 8036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8037: ctxt->sax->error(ctxt->userData,
1.57 daniel 8038: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 8039: ctxt->wellFormed = 0;
1.180 daniel 8040: ctxt->disableSAX = 1;
1.123 daniel 8041: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 8042:
8043: /*
8044: * end of parsing of this node.
8045: */
8046: nodePop(ctxt);
1.140 daniel 8047: oldname = namePop(ctxt);
1.176 daniel 8048: spacePop(ctxt);
1.140 daniel 8049: if (oldname != NULL) {
8050: #ifdef DEBUG_STACK
8051: fprintf(stderr,"Close: popped %s\n", oldname);
8052: #endif
8053: xmlFree(oldname);
8054: }
1.72 daniel 8055: return;
1.2 veillard 8056: }
8057:
8058: /*
1.27 daniel 8059: * parse the end of tag: '</' should be here.
1.2 veillard 8060: */
1.140 daniel 8061: xmlParseEndTag(ctxt);
1.118 daniel 8062:
8063: /*
8064: * Capture end position and add node
8065: */
8066: if ( ret != NULL && ctxt->record_info ) {
8067: node_info.end_pos = ctxt->input->consumed +
8068: (CUR_PTR - ctxt->input->base);
8069: node_info.end_line = ctxt->input->line;
8070: node_info.node = ret;
8071: xmlParserAddNodeInfo(ctxt, &node_info);
8072: }
1.2 veillard 8073: }
8074:
1.50 daniel 8075: /**
8076: * xmlParseVersionNum:
8077: * @ctxt: an XML parser context
8078: *
8079: * parse the XML version value.
1.29 daniel 8080: *
8081: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8082: *
8083: * Returns the string giving the XML version number, or NULL
1.29 daniel 8084: */
1.123 daniel 8085: xmlChar *
1.55 daniel 8086: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8087: xmlChar *buf = NULL;
8088: int len = 0;
8089: int size = 10;
8090: xmlChar cur;
1.29 daniel 8091:
1.135 daniel 8092: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8093: if (buf == NULL) {
8094: fprintf(stderr, "malloc of %d byte failed\n", size);
8095: return(NULL);
8096: }
8097: cur = CUR;
1.152 daniel 8098: while (((cur >= 'a') && (cur <= 'z')) ||
8099: ((cur >= 'A') && (cur <= 'Z')) ||
8100: ((cur >= '0') && (cur <= '9')) ||
8101: (cur == '_') || (cur == '.') ||
8102: (cur == ':') || (cur == '-')) {
1.135 daniel 8103: if (len + 1 >= size) {
8104: size *= 2;
1.204 ! veillard 8105: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8106: if (buf == NULL) {
8107: fprintf(stderr, "realloc of %d byte failed\n", size);
8108: return(NULL);
8109: }
8110: }
8111: buf[len++] = cur;
8112: NEXT;
8113: cur=CUR;
8114: }
8115: buf[len] = 0;
8116: return(buf);
1.29 daniel 8117: }
8118:
1.50 daniel 8119: /**
8120: * xmlParseVersionInfo:
8121: * @ctxt: an XML parser context
8122: *
8123: * parse the XML version.
1.29 daniel 8124: *
8125: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8126: *
8127: * [25] Eq ::= S? '=' S?
1.50 daniel 8128: *
1.68 daniel 8129: * Returns the version string, e.g. "1.0"
1.29 daniel 8130: */
8131:
1.123 daniel 8132: xmlChar *
1.55 daniel 8133: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8134: xmlChar *version = NULL;
8135: const xmlChar *q;
1.29 daniel 8136:
1.152 daniel 8137: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8138: (NXT(2) == 'r') && (NXT(3) == 's') &&
8139: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8140: (NXT(6) == 'n')) {
8141: SKIP(7);
1.42 daniel 8142: SKIP_BLANKS;
1.152 daniel 8143: if (RAW != '=') {
1.55 daniel 8144: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8145: ctxt->sax->error(ctxt->userData,
8146: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8147: ctxt->wellFormed = 0;
1.180 daniel 8148: ctxt->disableSAX = 1;
1.123 daniel 8149: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8150: return(NULL);
8151: }
1.40 daniel 8152: NEXT;
1.42 daniel 8153: SKIP_BLANKS;
1.152 daniel 8154: if (RAW == '"') {
1.40 daniel 8155: NEXT;
8156: q = CUR_PTR;
1.29 daniel 8157: version = xmlParseVersionNum(ctxt);
1.152 daniel 8158: if (RAW != '"') {
1.55 daniel 8159: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8160: ctxt->sax->error(ctxt->userData,
8161: "String not closed\n%.50s\n", q);
1.59 daniel 8162: ctxt->wellFormed = 0;
1.180 daniel 8163: ctxt->disableSAX = 1;
1.123 daniel 8164: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8165: } else
1.40 daniel 8166: NEXT;
1.152 daniel 8167: } else if (RAW == '\''){
1.40 daniel 8168: NEXT;
8169: q = CUR_PTR;
1.29 daniel 8170: version = xmlParseVersionNum(ctxt);
1.152 daniel 8171: if (RAW != '\'') {
1.55 daniel 8172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8173: ctxt->sax->error(ctxt->userData,
8174: "String not closed\n%.50s\n", q);
1.123 daniel 8175: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8176: ctxt->wellFormed = 0;
1.180 daniel 8177: ctxt->disableSAX = 1;
1.55 daniel 8178: } else
1.40 daniel 8179: NEXT;
1.31 daniel 8180: } else {
1.55 daniel 8181: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8182: ctxt->sax->error(ctxt->userData,
1.59 daniel 8183: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8184: ctxt->wellFormed = 0;
1.180 daniel 8185: ctxt->disableSAX = 1;
1.123 daniel 8186: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8187: }
8188: }
8189: return(version);
8190: }
8191:
1.50 daniel 8192: /**
8193: * xmlParseEncName:
8194: * @ctxt: an XML parser context
8195: *
8196: * parse the XML encoding name
1.29 daniel 8197: *
8198: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8199: *
1.68 daniel 8200: * Returns the encoding name value or NULL
1.29 daniel 8201: */
1.123 daniel 8202: xmlChar *
1.55 daniel 8203: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8204: xmlChar *buf = NULL;
8205: int len = 0;
8206: int size = 10;
8207: xmlChar cur;
1.29 daniel 8208:
1.135 daniel 8209: cur = CUR;
8210: if (((cur >= 'a') && (cur <= 'z')) ||
8211: ((cur >= 'A') && (cur <= 'Z'))) {
8212: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8213: if (buf == NULL) {
8214: fprintf(stderr, "malloc of %d byte failed\n", size);
8215: return(NULL);
8216: }
8217:
8218: buf[len++] = cur;
1.40 daniel 8219: NEXT;
1.135 daniel 8220: cur = CUR;
1.152 daniel 8221: while (((cur >= 'a') && (cur <= 'z')) ||
8222: ((cur >= 'A') && (cur <= 'Z')) ||
8223: ((cur >= '0') && (cur <= '9')) ||
8224: (cur == '.') || (cur == '_') ||
8225: (cur == '-')) {
1.135 daniel 8226: if (len + 1 >= size) {
8227: size *= 2;
1.204 ! veillard 8228: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8229: if (buf == NULL) {
8230: fprintf(stderr, "realloc of %d byte failed\n", size);
8231: return(NULL);
8232: }
8233: }
8234: buf[len++] = cur;
8235: NEXT;
8236: cur = CUR;
8237: if (cur == 0) {
8238: SHRINK;
8239: GROW;
8240: cur = CUR;
8241: }
8242: }
8243: buf[len] = 0;
1.29 daniel 8244: } else {
1.55 daniel 8245: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8246: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8247: ctxt->wellFormed = 0;
1.180 daniel 8248: ctxt->disableSAX = 1;
1.123 daniel 8249: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8250: }
1.135 daniel 8251: return(buf);
1.29 daniel 8252: }
8253:
1.50 daniel 8254: /**
8255: * xmlParseEncodingDecl:
8256: * @ctxt: an XML parser context
8257: *
8258: * parse the XML encoding declaration
1.29 daniel 8259: *
8260: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8261: *
8262: * TODO: this should setup the conversion filters.
8263: *
1.68 daniel 8264: * Returns the encoding value or NULL
1.29 daniel 8265: */
8266:
1.123 daniel 8267: xmlChar *
1.55 daniel 8268: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8269: xmlChar *encoding = NULL;
8270: const xmlChar *q;
1.29 daniel 8271:
1.42 daniel 8272: SKIP_BLANKS;
1.152 daniel 8273: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8274: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8275: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8276: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8277: SKIP(8);
1.42 daniel 8278: SKIP_BLANKS;
1.152 daniel 8279: if (RAW != '=') {
1.55 daniel 8280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8281: ctxt->sax->error(ctxt->userData,
8282: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8283: ctxt->wellFormed = 0;
1.180 daniel 8284: ctxt->disableSAX = 1;
1.123 daniel 8285: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8286: return(NULL);
8287: }
1.40 daniel 8288: NEXT;
1.42 daniel 8289: SKIP_BLANKS;
1.152 daniel 8290: if (RAW == '"') {
1.40 daniel 8291: NEXT;
8292: q = CUR_PTR;
1.29 daniel 8293: encoding = xmlParseEncName(ctxt);
1.152 daniel 8294: if (RAW != '"') {
1.55 daniel 8295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8296: ctxt->sax->error(ctxt->userData,
8297: "String not closed\n%.50s\n", q);
1.59 daniel 8298: ctxt->wellFormed = 0;
1.180 daniel 8299: ctxt->disableSAX = 1;
1.123 daniel 8300: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8301: } else
1.40 daniel 8302: NEXT;
1.152 daniel 8303: } else if (RAW == '\''){
1.40 daniel 8304: NEXT;
8305: q = CUR_PTR;
1.29 daniel 8306: encoding = xmlParseEncName(ctxt);
1.152 daniel 8307: if (RAW != '\'') {
1.55 daniel 8308: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8309: ctxt->sax->error(ctxt->userData,
8310: "String not closed\n%.50s\n", q);
1.59 daniel 8311: ctxt->wellFormed = 0;
1.180 daniel 8312: ctxt->disableSAX = 1;
1.123 daniel 8313: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8314: } else
1.40 daniel 8315: NEXT;
1.152 daniel 8316: } else if (RAW == '"'){
1.55 daniel 8317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8318: ctxt->sax->error(ctxt->userData,
1.59 daniel 8319: "xmlParseEncodingDecl : expected ' or \"\n");
8320: ctxt->wellFormed = 0;
1.180 daniel 8321: ctxt->disableSAX = 1;
1.123 daniel 8322: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8323: }
1.193 daniel 8324: if (encoding != NULL) {
8325: xmlCharEncoding enc;
8326: xmlCharEncodingHandlerPtr handler;
8327:
1.195 daniel 8328: if (ctxt->input->encoding != NULL)
8329: xmlFree((xmlChar *) ctxt->input->encoding);
8330: ctxt->input->encoding = encoding;
8331:
1.193 daniel 8332: enc = xmlParseCharEncoding((const char *) encoding);
8333: /*
8334: * registered set of known encodings
8335: */
8336: if (enc != XML_CHAR_ENCODING_ERROR) {
8337: xmlSwitchEncoding(ctxt, enc);
8338: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8339: xmlFree(encoding);
8340: return(NULL);
8341: }
8342: } else {
8343: /*
8344: * fallback for unknown encodings
8345: */
8346: handler = xmlFindCharEncodingHandler((const char *) encoding);
8347: if (handler != NULL) {
8348: xmlSwitchToEncoding(ctxt, handler);
8349: } else {
8350: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8351: xmlFree(encoding);
8352: return(NULL);
8353: }
8354: }
8355: }
1.29 daniel 8356: }
8357: return(encoding);
8358: }
8359:
1.50 daniel 8360: /**
8361: * xmlParseSDDecl:
8362: * @ctxt: an XML parser context
8363: *
8364: * parse the XML standalone declaration
1.29 daniel 8365: *
8366: * [32] SDDecl ::= S 'standalone' Eq
8367: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8368: *
8369: * [ VC: Standalone Document Declaration ]
8370: * TODO The standalone document declaration must have the value "no"
8371: * if any external markup declarations contain declarations of:
8372: * - attributes with default values, if elements to which these
8373: * attributes apply appear in the document without specifications
8374: * of values for these attributes, or
8375: * - entities (other than amp, lt, gt, apos, quot), if references
8376: * to those entities appear in the document, or
8377: * - attributes with values subject to normalization, where the
8378: * attribute appears in the document with a value which will change
8379: * as a result of normalization, or
8380: * - element types with element content, if white space occurs directly
8381: * within any instance of those types.
1.68 daniel 8382: *
8383: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8384: */
8385:
1.55 daniel 8386: int
8387: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8388: int standalone = -1;
8389:
1.42 daniel 8390: SKIP_BLANKS;
1.152 daniel 8391: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8392: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8393: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8394: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8395: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8396: SKIP(10);
1.81 daniel 8397: SKIP_BLANKS;
1.152 daniel 8398: if (RAW != '=') {
1.55 daniel 8399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8400: ctxt->sax->error(ctxt->userData,
1.59 daniel 8401: "XML standalone declaration : expected '='\n");
1.123 daniel 8402: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8403: ctxt->wellFormed = 0;
1.180 daniel 8404: ctxt->disableSAX = 1;
1.32 daniel 8405: return(standalone);
8406: }
1.40 daniel 8407: NEXT;
1.42 daniel 8408: SKIP_BLANKS;
1.152 daniel 8409: if (RAW == '\''){
1.40 daniel 8410: NEXT;
1.152 daniel 8411: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8412: standalone = 0;
1.40 daniel 8413: SKIP(2);
1.152 daniel 8414: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8415: (NXT(2) == 's')) {
1.29 daniel 8416: standalone = 1;
1.40 daniel 8417: SKIP(3);
1.29 daniel 8418: } else {
1.55 daniel 8419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8420: ctxt->sax->error(ctxt->userData,
8421: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8422: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8423: ctxt->wellFormed = 0;
1.180 daniel 8424: ctxt->disableSAX = 1;
1.29 daniel 8425: }
1.152 daniel 8426: if (RAW != '\'') {
1.55 daniel 8427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8428: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8429: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8430: ctxt->wellFormed = 0;
1.180 daniel 8431: ctxt->disableSAX = 1;
1.55 daniel 8432: } else
1.40 daniel 8433: NEXT;
1.152 daniel 8434: } else if (RAW == '"'){
1.40 daniel 8435: NEXT;
1.152 daniel 8436: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8437: standalone = 0;
1.40 daniel 8438: SKIP(2);
1.152 daniel 8439: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8440: (NXT(2) == 's')) {
1.29 daniel 8441: standalone = 1;
1.40 daniel 8442: SKIP(3);
1.29 daniel 8443: } else {
1.55 daniel 8444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8445: ctxt->sax->error(ctxt->userData,
1.59 daniel 8446: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8447: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8448: ctxt->wellFormed = 0;
1.180 daniel 8449: ctxt->disableSAX = 1;
1.29 daniel 8450: }
1.152 daniel 8451: if (RAW != '"') {
1.55 daniel 8452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8453: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8454: ctxt->wellFormed = 0;
1.180 daniel 8455: ctxt->disableSAX = 1;
1.123 daniel 8456: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8457: } else
1.40 daniel 8458: NEXT;
1.37 daniel 8459: } else {
1.55 daniel 8460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8461: ctxt->sax->error(ctxt->userData,
8462: "Standalone value not found\n");
1.59 daniel 8463: ctxt->wellFormed = 0;
1.180 daniel 8464: ctxt->disableSAX = 1;
1.123 daniel 8465: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8466: }
1.29 daniel 8467: }
8468: return(standalone);
8469: }
8470:
1.50 daniel 8471: /**
8472: * xmlParseXMLDecl:
8473: * @ctxt: an XML parser context
8474: *
8475: * parse an XML declaration header
1.29 daniel 8476: *
8477: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8478: */
8479:
1.55 daniel 8480: void
8481: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8482: xmlChar *version;
1.1 veillard 8483:
8484: /*
1.19 daniel 8485: * We know that '<?xml' is here.
1.1 veillard 8486: */
1.40 daniel 8487: SKIP(5);
1.1 veillard 8488:
1.153 daniel 8489: if (!IS_BLANK(RAW)) {
1.59 daniel 8490: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8491: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8492: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8493: ctxt->wellFormed = 0;
1.180 daniel 8494: ctxt->disableSAX = 1;
1.59 daniel 8495: }
1.42 daniel 8496: SKIP_BLANKS;
1.1 veillard 8497:
8498: /*
1.29 daniel 8499: * We should have the VersionInfo here.
1.1 veillard 8500: */
1.29 daniel 8501: version = xmlParseVersionInfo(ctxt);
8502: if (version == NULL)
1.45 daniel 8503: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8504: ctxt->version = xmlStrdup(version);
1.119 daniel 8505: xmlFree(version);
1.29 daniel 8506:
8507: /*
8508: * We may have the encoding declaration
8509: */
1.153 daniel 8510: if (!IS_BLANK(RAW)) {
1.152 daniel 8511: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8512: SKIP(2);
8513: return;
8514: }
8515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8516: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8517: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8518: ctxt->wellFormed = 0;
1.180 daniel 8519: ctxt->disableSAX = 1;
1.59 daniel 8520: }
1.195 daniel 8521: xmlParseEncodingDecl(ctxt);
1.193 daniel 8522: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8523: /*
8524: * The XML REC instructs us to stop parsing right here
8525: */
8526: return;
8527: }
1.1 veillard 8528:
8529: /*
1.29 daniel 8530: * We may have the standalone status.
1.1 veillard 8531: */
1.164 daniel 8532: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8533: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8534: SKIP(2);
8535: return;
8536: }
8537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8538: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8539: ctxt->wellFormed = 0;
1.180 daniel 8540: ctxt->disableSAX = 1;
1.123 daniel 8541: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8542: }
8543: SKIP_BLANKS;
1.167 daniel 8544: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8545:
1.42 daniel 8546: SKIP_BLANKS;
1.152 daniel 8547: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8548: SKIP(2);
1.152 daniel 8549: } else if (RAW == '>') {
1.31 daniel 8550: /* Deprecated old WD ... */
1.55 daniel 8551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8552: ctxt->sax->error(ctxt->userData,
8553: "XML declaration must end-up with '?>'\n");
1.59 daniel 8554: ctxt->wellFormed = 0;
1.180 daniel 8555: ctxt->disableSAX = 1;
1.123 daniel 8556: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8557: NEXT;
1.29 daniel 8558: } else {
1.55 daniel 8559: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8560: ctxt->sax->error(ctxt->userData,
8561: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8562: ctxt->wellFormed = 0;
1.180 daniel 8563: ctxt->disableSAX = 1;
1.123 daniel 8564: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8565: MOVETO_ENDTAG(CUR_PTR);
8566: NEXT;
1.29 daniel 8567: }
1.1 veillard 8568: }
8569:
1.50 daniel 8570: /**
8571: * xmlParseMisc:
8572: * @ctxt: an XML parser context
8573: *
8574: * parse an XML Misc* optionnal field.
1.21 daniel 8575: *
1.22 daniel 8576: * [27] Misc ::= Comment | PI | S
1.1 veillard 8577: */
8578:
1.55 daniel 8579: void
8580: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8581: while (((RAW == '<') && (NXT(1) == '?')) ||
8582: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8583: (NXT(2) == '-') && (NXT(3) == '-')) ||
8584: IS_BLANK(CUR)) {
1.152 daniel 8585: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8586: xmlParsePI(ctxt);
1.40 daniel 8587: } else if (IS_BLANK(CUR)) {
8588: NEXT;
1.1 veillard 8589: } else
1.114 daniel 8590: xmlParseComment(ctxt);
1.1 veillard 8591: }
8592: }
8593:
1.50 daniel 8594: /**
1.181 daniel 8595: * xmlParseDocument:
1.50 daniel 8596: * @ctxt: an XML parser context
8597: *
8598: * parse an XML document (and build a tree if using the standard SAX
8599: * interface).
1.21 daniel 8600: *
1.22 daniel 8601: * [1] document ::= prolog element Misc*
1.29 daniel 8602: *
8603: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8604: *
1.68 daniel 8605: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8606: * as a result of the parsing.
1.1 veillard 8607: */
8608:
1.55 daniel 8609: int
8610: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8611: xmlChar start[4];
8612: xmlCharEncoding enc;
8613:
1.45 daniel 8614: xmlDefaultSAXHandlerInit();
8615:
1.91 daniel 8616: GROW;
8617:
1.14 veillard 8618: /*
1.44 daniel 8619: * SAX: beginning of the document processing.
8620: */
1.72 daniel 8621: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8622: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8623:
1.156 daniel 8624: /*
8625: * Get the 4 first bytes and decode the charset
8626: * if enc != XML_CHAR_ENCODING_NONE
8627: * plug some encoding conversion routines.
8628: */
8629: start[0] = RAW;
8630: start[1] = NXT(1);
8631: start[2] = NXT(2);
8632: start[3] = NXT(3);
8633: enc = xmlDetectCharEncoding(start, 4);
8634: if (enc != XML_CHAR_ENCODING_NONE) {
8635: xmlSwitchEncoding(ctxt, enc);
8636: }
8637:
1.1 veillard 8638:
1.59 daniel 8639: if (CUR == 0) {
8640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8641: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8642: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8643: ctxt->wellFormed = 0;
1.180 daniel 8644: ctxt->disableSAX = 1;
1.59 daniel 8645: }
1.1 veillard 8646:
8647: /*
8648: * Check for the XMLDecl in the Prolog.
8649: */
1.91 daniel 8650: GROW;
1.152 daniel 8651: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8652: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8653: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 8654:
8655: /*
8656: * Note that we will switch encoding on the fly.
8657: */
1.19 daniel 8658: xmlParseXMLDecl(ctxt);
1.193 daniel 8659: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8660: /*
8661: * The XML REC instructs us to stop parsing right here
8662: */
8663: return(-1);
8664: }
1.167 daniel 8665: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8666: SKIP_BLANKS;
1.1 veillard 8667: } else {
1.72 daniel 8668: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8669: }
1.171 daniel 8670: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8671: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8672:
8673: /*
8674: * The Misc part of the Prolog
8675: */
1.91 daniel 8676: GROW;
1.16 daniel 8677: xmlParseMisc(ctxt);
1.1 veillard 8678:
8679: /*
1.29 daniel 8680: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8681: * (doctypedecl Misc*)?
8682: */
1.91 daniel 8683: GROW;
1.152 daniel 8684: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8685: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8686: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8687: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8688: (NXT(8) == 'E')) {
1.165 daniel 8689:
1.166 daniel 8690: ctxt->inSubset = 1;
1.22 daniel 8691: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8692: if (RAW == '[') {
1.140 daniel 8693: ctxt->instate = XML_PARSER_DTD;
8694: xmlParseInternalSubset(ctxt);
8695: }
1.165 daniel 8696:
8697: /*
8698: * Create and update the external subset.
8699: */
1.166 daniel 8700: ctxt->inSubset = 2;
1.171 daniel 8701: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8702: (!ctxt->disableSAX))
1.165 daniel 8703: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8704: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8705: ctxt->inSubset = 0;
1.165 daniel 8706:
8707:
1.96 daniel 8708: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8709: xmlParseMisc(ctxt);
1.21 daniel 8710: }
8711:
8712: /*
8713: * Time to start parsing the tree itself
1.1 veillard 8714: */
1.91 daniel 8715: GROW;
1.152 daniel 8716: if (RAW != '<') {
1.59 daniel 8717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8718: ctxt->sax->error(ctxt->userData,
1.151 daniel 8719: "Start tag expected, '<' not found\n");
1.140 daniel 8720: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8721: ctxt->wellFormed = 0;
1.180 daniel 8722: ctxt->disableSAX = 1;
1.140 daniel 8723: ctxt->instate = XML_PARSER_EOF;
8724: } else {
8725: ctxt->instate = XML_PARSER_CONTENT;
8726: xmlParseElement(ctxt);
8727: ctxt->instate = XML_PARSER_EPILOG;
8728:
8729:
8730: /*
8731: * The Misc part at the end
8732: */
8733: xmlParseMisc(ctxt);
8734:
1.152 daniel 8735: if (RAW != 0) {
1.140 daniel 8736: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8737: ctxt->sax->error(ctxt->userData,
8738: "Extra content at the end of the document\n");
8739: ctxt->wellFormed = 0;
1.180 daniel 8740: ctxt->disableSAX = 1;
1.140 daniel 8741: ctxt->errNo = XML_ERR_DOCUMENT_END;
8742: }
8743: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8744: }
8745:
1.44 daniel 8746: /*
8747: * SAX: end of the document processing.
8748: */
1.171 daniel 8749: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8750: (!ctxt->disableSAX))
1.74 daniel 8751: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8752:
1.59 daniel 8753: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8754: return(0);
8755: }
8756:
1.98 daniel 8757: /************************************************************************
8758: * *
1.128 daniel 8759: * Progressive parsing interfaces *
8760: * *
8761: ************************************************************************/
8762:
8763: /**
8764: * xmlParseLookupSequence:
8765: * @ctxt: an XML parser context
8766: * @first: the first char to lookup
1.140 daniel 8767: * @next: the next char to lookup or zero
8768: * @third: the next char to lookup or zero
1.128 daniel 8769: *
1.140 daniel 8770: * Try to find if a sequence (first, next, third) or just (first next) or
8771: * (first) is available in the input stream.
8772: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8773: * to avoid rescanning sequences of bytes, it DOES change the state of the
8774: * parser, do not use liberally.
1.128 daniel 8775: *
1.140 daniel 8776: * Returns the index to the current parsing point if the full sequence
8777: * is available, -1 otherwise.
1.128 daniel 8778: */
8779: int
1.140 daniel 8780: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8781: xmlChar next, xmlChar third) {
8782: int base, len;
8783: xmlParserInputPtr in;
8784: const xmlChar *buf;
8785:
8786: in = ctxt->input;
8787: if (in == NULL) return(-1);
8788: base = in->cur - in->base;
8789: if (base < 0) return(-1);
8790: if (ctxt->checkIndex > base)
8791: base = ctxt->checkIndex;
8792: if (in->buf == NULL) {
8793: buf = in->base;
8794: len = in->length;
8795: } else {
8796: buf = in->buf->buffer->content;
8797: len = in->buf->buffer->use;
8798: }
8799: /* take into account the sequence length */
8800: if (third) len -= 2;
8801: else if (next) len --;
8802: for (;base < len;base++) {
8803: if (buf[base] == first) {
8804: if (third != 0) {
8805: if ((buf[base + 1] != next) ||
8806: (buf[base + 2] != third)) continue;
8807: } else if (next != 0) {
8808: if (buf[base + 1] != next) continue;
8809: }
8810: ctxt->checkIndex = 0;
8811: #ifdef DEBUG_PUSH
8812: if (next == 0)
8813: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8814: first, base);
8815: else if (third == 0)
8816: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8817: first, next, base);
8818: else
8819: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8820: first, next, third, base);
8821: #endif
8822: return(base - (in->cur - in->base));
8823: }
8824: }
8825: ctxt->checkIndex = base;
8826: #ifdef DEBUG_PUSH
8827: if (next == 0)
8828: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8829: else if (third == 0)
8830: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8831: else
8832: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8833: #endif
8834: return(-1);
1.128 daniel 8835: }
8836:
8837: /**
1.143 daniel 8838: * xmlParseTryOrFinish:
1.128 daniel 8839: * @ctxt: an XML parser context
1.143 daniel 8840: * @terminate: last chunk indicator
1.128 daniel 8841: *
8842: * Try to progress on parsing
8843: *
8844: * Returns zero if no parsing was possible
8845: */
8846: int
1.143 daniel 8847: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8848: int ret = 0;
1.140 daniel 8849: int avail;
8850: xmlChar cur, next;
8851:
8852: #ifdef DEBUG_PUSH
8853: switch (ctxt->instate) {
8854: case XML_PARSER_EOF:
8855: fprintf(stderr, "PP: try EOF\n"); break;
8856: case XML_PARSER_START:
8857: fprintf(stderr, "PP: try START\n"); break;
8858: case XML_PARSER_MISC:
8859: fprintf(stderr, "PP: try MISC\n");break;
8860: case XML_PARSER_COMMENT:
8861: fprintf(stderr, "PP: try COMMENT\n");break;
8862: case XML_PARSER_PROLOG:
8863: fprintf(stderr, "PP: try PROLOG\n");break;
8864: case XML_PARSER_START_TAG:
8865: fprintf(stderr, "PP: try START_TAG\n");break;
8866: case XML_PARSER_CONTENT:
8867: fprintf(stderr, "PP: try CONTENT\n");break;
8868: case XML_PARSER_CDATA_SECTION:
8869: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8870: case XML_PARSER_END_TAG:
8871: fprintf(stderr, "PP: try END_TAG\n");break;
8872: case XML_PARSER_ENTITY_DECL:
8873: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8874: case XML_PARSER_ENTITY_VALUE:
8875: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8876: case XML_PARSER_ATTRIBUTE_VALUE:
8877: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8878: case XML_PARSER_DTD:
8879: fprintf(stderr, "PP: try DTD\n");break;
8880: case XML_PARSER_EPILOG:
8881: fprintf(stderr, "PP: try EPILOG\n");break;
8882: case XML_PARSER_PI:
8883: fprintf(stderr, "PP: try PI\n");break;
8884: }
8885: #endif
1.128 daniel 8886:
8887: while (1) {
1.140 daniel 8888: /*
8889: * Pop-up of finished entities.
8890: */
1.152 daniel 8891: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8892: xmlPopInput(ctxt);
8893:
1.184 daniel 8894: if (ctxt->input ==NULL) break;
8895: if (ctxt->input->buf == NULL)
8896: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8897: else
1.184 daniel 8898: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8899: if (avail < 1)
8900: goto done;
1.128 daniel 8901: switch (ctxt->instate) {
8902: case XML_PARSER_EOF:
1.140 daniel 8903: /*
8904: * Document parsing is done !
8905: */
8906: goto done;
8907: case XML_PARSER_START:
8908: /*
8909: * Very first chars read from the document flow.
8910: */
1.184 daniel 8911: cur = ctxt->input->cur[0];
1.140 daniel 8912: if (IS_BLANK(cur)) {
8913: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8914: ctxt->sax->setDocumentLocator(ctxt->userData,
8915: &xmlDefaultSAXLocator);
8916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8917: ctxt->sax->error(ctxt->userData,
8918: "Extra spaces at the beginning of the document are not allowed\n");
8919: ctxt->errNo = XML_ERR_DOCUMENT_START;
8920: ctxt->wellFormed = 0;
1.180 daniel 8921: ctxt->disableSAX = 1;
1.140 daniel 8922: SKIP_BLANKS;
8923: ret++;
1.184 daniel 8924: if (ctxt->input->buf == NULL)
8925: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8926: else
1.184 daniel 8927: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8928: }
8929: if (avail < 2)
8930: goto done;
8931:
1.184 daniel 8932: cur = ctxt->input->cur[0];
8933: next = ctxt->input->cur[1];
1.140 daniel 8934: if (cur == 0) {
8935: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8936: ctxt->sax->setDocumentLocator(ctxt->userData,
8937: &xmlDefaultSAXLocator);
8938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8939: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8940: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8941: ctxt->wellFormed = 0;
1.180 daniel 8942: ctxt->disableSAX = 1;
1.140 daniel 8943: ctxt->instate = XML_PARSER_EOF;
8944: #ifdef DEBUG_PUSH
8945: fprintf(stderr, "PP: entering EOF\n");
8946: #endif
8947: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8948: ctxt->sax->endDocument(ctxt->userData);
8949: goto done;
8950: }
8951: if ((cur == '<') && (next == '?')) {
8952: /* PI or XML decl */
8953: if (avail < 5) return(ret);
1.143 daniel 8954: if ((!terminate) &&
8955: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8956: return(ret);
8957: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8958: ctxt->sax->setDocumentLocator(ctxt->userData,
8959: &xmlDefaultSAXLocator);
1.184 daniel 8960: if ((ctxt->input->cur[2] == 'x') &&
8961: (ctxt->input->cur[3] == 'm') &&
8962: (ctxt->input->cur[4] == 'l') &&
8963: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8964: ret += 5;
8965: #ifdef DEBUG_PUSH
8966: fprintf(stderr, "PP: Parsing XML Decl\n");
8967: #endif
8968: xmlParseXMLDecl(ctxt);
1.193 daniel 8969: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8970: /*
8971: * The XML REC instructs us to stop parsing right
8972: * here
8973: */
8974: ctxt->instate = XML_PARSER_EOF;
8975: return(0);
8976: }
1.167 daniel 8977: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8978: if ((ctxt->encoding == NULL) &&
8979: (ctxt->input->encoding != NULL))
8980: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8981: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8982: (!ctxt->disableSAX))
1.140 daniel 8983: ctxt->sax->startDocument(ctxt->userData);
8984: ctxt->instate = XML_PARSER_MISC;
8985: #ifdef DEBUG_PUSH
8986: fprintf(stderr, "PP: entering MISC\n");
8987: #endif
8988: } else {
8989: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8990: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8991: (!ctxt->disableSAX))
1.140 daniel 8992: ctxt->sax->startDocument(ctxt->userData);
8993: ctxt->instate = XML_PARSER_MISC;
8994: #ifdef DEBUG_PUSH
8995: fprintf(stderr, "PP: entering MISC\n");
8996: #endif
8997: }
8998: } else {
8999: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9000: ctxt->sax->setDocumentLocator(ctxt->userData,
9001: &xmlDefaultSAXLocator);
9002: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9003: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9004: (!ctxt->disableSAX))
1.140 daniel 9005: ctxt->sax->startDocument(ctxt->userData);
9006: ctxt->instate = XML_PARSER_MISC;
9007: #ifdef DEBUG_PUSH
9008: fprintf(stderr, "PP: entering MISC\n");
9009: #endif
9010: }
9011: break;
9012: case XML_PARSER_MISC:
9013: SKIP_BLANKS;
1.184 daniel 9014: if (ctxt->input->buf == NULL)
9015: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9016: else
1.184 daniel 9017: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9018: if (avail < 2)
9019: goto done;
1.184 daniel 9020: cur = ctxt->input->cur[0];
9021: next = ctxt->input->cur[1];
1.140 daniel 9022: if ((cur == '<') && (next == '?')) {
1.143 daniel 9023: if ((!terminate) &&
9024: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9025: goto done;
9026: #ifdef DEBUG_PUSH
9027: fprintf(stderr, "PP: Parsing PI\n");
9028: #endif
9029: xmlParsePI(ctxt);
9030: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9031: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9032: if ((!terminate) &&
9033: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9034: goto done;
9035: #ifdef DEBUG_PUSH
9036: fprintf(stderr, "PP: Parsing Comment\n");
9037: #endif
9038: xmlParseComment(ctxt);
9039: ctxt->instate = XML_PARSER_MISC;
9040: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9041: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9042: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9043: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9044: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 9045: if ((!terminate) &&
9046: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9047: goto done;
9048: #ifdef DEBUG_PUSH
9049: fprintf(stderr, "PP: Parsing internal subset\n");
9050: #endif
1.166 daniel 9051: ctxt->inSubset = 1;
1.140 daniel 9052: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9053: if (RAW == '[') {
1.140 daniel 9054: ctxt->instate = XML_PARSER_DTD;
9055: #ifdef DEBUG_PUSH
9056: fprintf(stderr, "PP: entering DTD\n");
9057: #endif
9058: } else {
1.166 daniel 9059: /*
9060: * Create and update the external subset.
9061: */
9062: ctxt->inSubset = 2;
1.171 daniel 9063: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9064: (ctxt->sax->externalSubset != NULL))
9065: ctxt->sax->externalSubset(ctxt->userData,
9066: ctxt->intSubName, ctxt->extSubSystem,
9067: ctxt->extSubURI);
9068: ctxt->inSubset = 0;
1.140 daniel 9069: ctxt->instate = XML_PARSER_PROLOG;
9070: #ifdef DEBUG_PUSH
9071: fprintf(stderr, "PP: entering PROLOG\n");
9072: #endif
9073: }
9074: } else if ((cur == '<') && (next == '!') &&
9075: (avail < 9)) {
9076: goto done;
9077: } else {
9078: ctxt->instate = XML_PARSER_START_TAG;
9079: #ifdef DEBUG_PUSH
9080: fprintf(stderr, "PP: entering START_TAG\n");
9081: #endif
9082: }
9083: break;
1.128 daniel 9084: case XML_PARSER_PROLOG:
1.140 daniel 9085: SKIP_BLANKS;
1.184 daniel 9086: if (ctxt->input->buf == NULL)
9087: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9088: else
1.184 daniel 9089: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9090: if (avail < 2)
9091: goto done;
1.184 daniel 9092: cur = ctxt->input->cur[0];
9093: next = ctxt->input->cur[1];
1.140 daniel 9094: if ((cur == '<') && (next == '?')) {
1.143 daniel 9095: if ((!terminate) &&
9096: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9097: goto done;
9098: #ifdef DEBUG_PUSH
9099: fprintf(stderr, "PP: Parsing PI\n");
9100: #endif
9101: xmlParsePI(ctxt);
9102: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9103: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9104: if ((!terminate) &&
9105: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9106: goto done;
9107: #ifdef DEBUG_PUSH
9108: fprintf(stderr, "PP: Parsing Comment\n");
9109: #endif
9110: xmlParseComment(ctxt);
9111: ctxt->instate = XML_PARSER_PROLOG;
9112: } else if ((cur == '<') && (next == '!') &&
9113: (avail < 4)) {
9114: goto done;
9115: } else {
9116: ctxt->instate = XML_PARSER_START_TAG;
9117: #ifdef DEBUG_PUSH
9118: fprintf(stderr, "PP: entering START_TAG\n");
9119: #endif
9120: }
9121: break;
9122: case XML_PARSER_EPILOG:
9123: SKIP_BLANKS;
1.184 daniel 9124: if (ctxt->input->buf == NULL)
9125: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9126: else
1.184 daniel 9127: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9128: if (avail < 2)
9129: goto done;
1.184 daniel 9130: cur = ctxt->input->cur[0];
9131: next = ctxt->input->cur[1];
1.140 daniel 9132: if ((cur == '<') && (next == '?')) {
1.143 daniel 9133: if ((!terminate) &&
9134: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9135: goto done;
9136: #ifdef DEBUG_PUSH
9137: fprintf(stderr, "PP: Parsing PI\n");
9138: #endif
9139: xmlParsePI(ctxt);
9140: ctxt->instate = XML_PARSER_EPILOG;
9141: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9142: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9143: if ((!terminate) &&
9144: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9145: goto done;
9146: #ifdef DEBUG_PUSH
9147: fprintf(stderr, "PP: Parsing Comment\n");
9148: #endif
9149: xmlParseComment(ctxt);
9150: ctxt->instate = XML_PARSER_EPILOG;
9151: } else if ((cur == '<') && (next == '!') &&
9152: (avail < 4)) {
9153: goto done;
9154: } else {
9155: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9156: ctxt->sax->error(ctxt->userData,
9157: "Extra content at the end of the document\n");
9158: ctxt->wellFormed = 0;
1.180 daniel 9159: ctxt->disableSAX = 1;
1.140 daniel 9160: ctxt->errNo = XML_ERR_DOCUMENT_END;
9161: ctxt->instate = XML_PARSER_EOF;
9162: #ifdef DEBUG_PUSH
9163: fprintf(stderr, "PP: entering EOF\n");
9164: #endif
1.171 daniel 9165: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9166: (!ctxt->disableSAX))
1.140 daniel 9167: ctxt->sax->endDocument(ctxt->userData);
9168: goto done;
9169: }
9170: break;
9171: case XML_PARSER_START_TAG: {
9172: xmlChar *name, *oldname;
9173:
1.184 daniel 9174: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9175: goto done;
1.184 daniel 9176: cur = ctxt->input->cur[0];
1.140 daniel 9177: if (cur != '<') {
9178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9179: ctxt->sax->error(ctxt->userData,
9180: "Start tag expect, '<' not found\n");
9181: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9182: ctxt->wellFormed = 0;
1.180 daniel 9183: ctxt->disableSAX = 1;
1.140 daniel 9184: ctxt->instate = XML_PARSER_EOF;
9185: #ifdef DEBUG_PUSH
9186: fprintf(stderr, "PP: entering EOF\n");
9187: #endif
1.171 daniel 9188: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9189: (!ctxt->disableSAX))
1.140 daniel 9190: ctxt->sax->endDocument(ctxt->userData);
9191: goto done;
9192: }
1.143 daniel 9193: if ((!terminate) &&
9194: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9195: goto done;
1.176 daniel 9196: if (ctxt->spaceNr == 0)
9197: spacePush(ctxt, -1);
9198: else
9199: spacePush(ctxt, *ctxt->space);
1.140 daniel 9200: name = xmlParseStartTag(ctxt);
9201: if (name == NULL) {
1.176 daniel 9202: spacePop(ctxt);
1.140 daniel 9203: ctxt->instate = XML_PARSER_EOF;
9204: #ifdef DEBUG_PUSH
9205: fprintf(stderr, "PP: entering EOF\n");
9206: #endif
1.171 daniel 9207: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9208: (!ctxt->disableSAX))
1.140 daniel 9209: ctxt->sax->endDocument(ctxt->userData);
9210: goto done;
9211: }
9212: namePush(ctxt, xmlStrdup(name));
9213:
9214: /*
9215: * [ VC: Root Element Type ]
9216: * The Name in the document type declaration must match
9217: * the element type of the root element.
9218: */
9219: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9220: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9221: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9222:
9223: /*
9224: * Check for an Empty Element.
9225: */
1.152 daniel 9226: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9227: SKIP(2);
1.171 daniel 9228: if ((ctxt->sax != NULL) &&
9229: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9230: ctxt->sax->endElement(ctxt->userData, name);
9231: xmlFree(name);
9232: oldname = namePop(ctxt);
1.176 daniel 9233: spacePop(ctxt);
1.140 daniel 9234: if (oldname != NULL) {
9235: #ifdef DEBUG_STACK
9236: fprintf(stderr,"Close: popped %s\n", oldname);
9237: #endif
9238: xmlFree(oldname);
9239: }
9240: if (ctxt->name == NULL) {
9241: ctxt->instate = XML_PARSER_EPILOG;
9242: #ifdef DEBUG_PUSH
9243: fprintf(stderr, "PP: entering EPILOG\n");
9244: #endif
9245: } else {
9246: ctxt->instate = XML_PARSER_CONTENT;
9247: #ifdef DEBUG_PUSH
9248: fprintf(stderr, "PP: entering CONTENT\n");
9249: #endif
9250: }
9251: break;
9252: }
1.152 daniel 9253: if (RAW == '>') {
1.140 daniel 9254: NEXT;
9255: } else {
9256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9257: ctxt->sax->error(ctxt->userData,
9258: "Couldn't find end of Start Tag %s\n",
9259: name);
9260: ctxt->wellFormed = 0;
1.180 daniel 9261: ctxt->disableSAX = 1;
1.140 daniel 9262: ctxt->errNo = XML_ERR_GT_REQUIRED;
9263:
9264: /*
9265: * end of parsing of this node.
9266: */
9267: nodePop(ctxt);
9268: oldname = namePop(ctxt);
1.176 daniel 9269: spacePop(ctxt);
1.140 daniel 9270: if (oldname != NULL) {
9271: #ifdef DEBUG_STACK
9272: fprintf(stderr,"Close: popped %s\n", oldname);
9273: #endif
9274: xmlFree(oldname);
9275: }
9276: }
9277: xmlFree(name);
9278: ctxt->instate = XML_PARSER_CONTENT;
9279: #ifdef DEBUG_PUSH
9280: fprintf(stderr, "PP: entering CONTENT\n");
9281: #endif
9282: break;
9283: }
1.128 daniel 9284: case XML_PARSER_CONTENT:
1.140 daniel 9285: /*
9286: * Handle preparsed entities and charRef
9287: */
9288: if (ctxt->token != 0) {
9289: xmlChar cur[2] = { 0 , 0 } ;
9290:
9291: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9292: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9293: (ctxt->sax->characters != NULL))
1.140 daniel 9294: ctxt->sax->characters(ctxt->userData, cur, 1);
9295: ctxt->token = 0;
9296: }
1.184 daniel 9297: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9298: goto done;
1.184 daniel 9299: cur = ctxt->input->cur[0];
9300: next = ctxt->input->cur[1];
1.140 daniel 9301: if ((cur == '<') && (next == '?')) {
1.143 daniel 9302: if ((!terminate) &&
9303: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9304: goto done;
9305: #ifdef DEBUG_PUSH
9306: fprintf(stderr, "PP: Parsing PI\n");
9307: #endif
9308: xmlParsePI(ctxt);
9309: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9310: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9311: if ((!terminate) &&
9312: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9313: goto done;
9314: #ifdef DEBUG_PUSH
9315: fprintf(stderr, "PP: Parsing Comment\n");
9316: #endif
9317: xmlParseComment(ctxt);
9318: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9319: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9320: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9321: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9322: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9323: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9324: SKIP(9);
9325: ctxt->instate = XML_PARSER_CDATA_SECTION;
9326: #ifdef DEBUG_PUSH
9327: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9328: #endif
9329: break;
9330: } else if ((cur == '<') && (next == '!') &&
9331: (avail < 9)) {
9332: goto done;
9333: } else if ((cur == '<') && (next == '/')) {
9334: ctxt->instate = XML_PARSER_END_TAG;
9335: #ifdef DEBUG_PUSH
9336: fprintf(stderr, "PP: entering END_TAG\n");
9337: #endif
9338: break;
9339: } else if (cur == '<') {
9340: ctxt->instate = XML_PARSER_START_TAG;
9341: #ifdef DEBUG_PUSH
9342: fprintf(stderr, "PP: entering START_TAG\n");
9343: #endif
9344: break;
9345: } else if (cur == '&') {
1.143 daniel 9346: if ((!terminate) &&
9347: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9348: goto done;
9349: #ifdef DEBUG_PUSH
9350: fprintf(stderr, "PP: Parsing Reference\n");
9351: #endif
9352: /* TODO: check generation of subtrees if noent !!! */
9353: xmlParseReference(ctxt);
9354: } else {
1.156 daniel 9355: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9356: /*
1.181 daniel 9357: * Goal of the following test is:
1.140 daniel 9358: * - minimize calls to the SAX 'character' callback
9359: * when they are mergeable
9360: * - handle an problem for isBlank when we only parse
9361: * a sequence of blank chars and the next one is
9362: * not available to check against '<' presence.
9363: * - tries to homogenize the differences in SAX
9364: * callbacks beween the push and pull versions
9365: * of the parser.
9366: */
9367: if ((ctxt->inputNr == 1) &&
9368: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9369: if ((!terminate) &&
9370: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9371: goto done;
9372: }
9373: ctxt->checkIndex = 0;
9374: #ifdef DEBUG_PUSH
9375: fprintf(stderr, "PP: Parsing char data\n");
9376: #endif
9377: xmlParseCharData(ctxt, 0);
9378: }
9379: /*
9380: * Pop-up of finished entities.
9381: */
1.152 daniel 9382: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9383: xmlPopInput(ctxt);
9384: break;
9385: case XML_PARSER_CDATA_SECTION: {
9386: /*
9387: * The Push mode need to have the SAX callback for
9388: * cdataBlock merge back contiguous callbacks.
9389: */
9390: int base;
9391:
9392: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9393: if (base < 0) {
9394: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9395: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9396: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9397: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9398: XML_PARSER_BIG_BUFFER_SIZE);
9399: }
9400: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9401: ctxt->checkIndex = 0;
9402: }
9403: goto done;
9404: } else {
1.171 daniel 9405: if ((ctxt->sax != NULL) && (base > 0) &&
9406: (!ctxt->disableSAX)) {
1.140 daniel 9407: if (ctxt->sax->cdataBlock != NULL)
9408: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9409: ctxt->input->cur, base);
1.140 daniel 9410: }
9411: SKIP(base + 3);
9412: ctxt->checkIndex = 0;
9413: ctxt->instate = XML_PARSER_CONTENT;
9414: #ifdef DEBUG_PUSH
9415: fprintf(stderr, "PP: entering CONTENT\n");
9416: #endif
9417: }
9418: break;
9419: }
1.141 daniel 9420: case XML_PARSER_END_TAG:
1.140 daniel 9421: if (avail < 2)
9422: goto done;
1.143 daniel 9423: if ((!terminate) &&
9424: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9425: goto done;
9426: xmlParseEndTag(ctxt);
9427: if (ctxt->name == NULL) {
9428: ctxt->instate = XML_PARSER_EPILOG;
9429: #ifdef DEBUG_PUSH
9430: fprintf(stderr, "PP: entering EPILOG\n");
9431: #endif
9432: } else {
9433: ctxt->instate = XML_PARSER_CONTENT;
9434: #ifdef DEBUG_PUSH
9435: fprintf(stderr, "PP: entering CONTENT\n");
9436: #endif
9437: }
9438: break;
9439: case XML_PARSER_DTD: {
9440: /*
9441: * Sorry but progressive parsing of the internal subset
9442: * is not expected to be supported. We first check that
9443: * the full content of the internal subset is available and
9444: * the parsing is launched only at that point.
9445: * Internal subset ends up with "']' S? '>'" in an unescaped
9446: * section and not in a ']]>' sequence which are conditional
9447: * sections (whoever argued to keep that crap in XML deserve
9448: * a place in hell !).
9449: */
9450: int base, i;
9451: xmlChar *buf;
9452: xmlChar quote = 0;
9453:
1.184 daniel 9454: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9455: if (base < 0) return(0);
9456: if (ctxt->checkIndex > base)
9457: base = ctxt->checkIndex;
1.184 daniel 9458: buf = ctxt->input->buf->buffer->content;
1.202 daniel 9459: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9460: base++) {
1.140 daniel 9461: if (quote != 0) {
9462: if (buf[base] == quote)
9463: quote = 0;
9464: continue;
9465: }
9466: if (buf[base] == '"') {
9467: quote = '"';
9468: continue;
9469: }
9470: if (buf[base] == '\'') {
9471: quote = '\'';
9472: continue;
9473: }
9474: if (buf[base] == ']') {
1.202 daniel 9475: if ((unsigned int) base +1 >=
9476: ctxt->input->buf->buffer->use)
1.140 daniel 9477: break;
9478: if (buf[base + 1] == ']') {
9479: /* conditional crap, skip both ']' ! */
9480: base++;
9481: continue;
9482: }
1.202 daniel 9483: for (i = 0;
9484: (unsigned int) base + i < ctxt->input->buf->buffer->use;
9485: i++) {
1.140 daniel 9486: if (buf[base + i] == '>')
9487: goto found_end_int_subset;
9488: }
9489: break;
9490: }
9491: }
9492: /*
9493: * We didn't found the end of the Internal subset
9494: */
9495: if (quote == 0)
9496: ctxt->checkIndex = base;
9497: #ifdef DEBUG_PUSH
9498: if (next == 0)
9499: fprintf(stderr, "PP: lookup of int subset end filed\n");
9500: #endif
9501: goto done;
9502:
9503: found_end_int_subset:
9504: xmlParseInternalSubset(ctxt);
1.166 daniel 9505: ctxt->inSubset = 2;
1.171 daniel 9506: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9507: (ctxt->sax->externalSubset != NULL))
9508: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9509: ctxt->extSubSystem, ctxt->extSubURI);
9510: ctxt->inSubset = 0;
1.140 daniel 9511: ctxt->instate = XML_PARSER_PROLOG;
9512: ctxt->checkIndex = 0;
9513: #ifdef DEBUG_PUSH
9514: fprintf(stderr, "PP: entering PROLOG\n");
9515: #endif
9516: break;
9517: }
9518: case XML_PARSER_COMMENT:
9519: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9520: ctxt->instate = XML_PARSER_CONTENT;
9521: #ifdef DEBUG_PUSH
9522: fprintf(stderr, "PP: entering CONTENT\n");
9523: #endif
9524: break;
9525: case XML_PARSER_PI:
9526: fprintf(stderr, "PP: internal error, state == PI\n");
9527: ctxt->instate = XML_PARSER_CONTENT;
9528: #ifdef DEBUG_PUSH
9529: fprintf(stderr, "PP: entering CONTENT\n");
9530: #endif
9531: break;
1.128 daniel 9532: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9533: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9534: ctxt->instate = XML_PARSER_DTD;
9535: #ifdef DEBUG_PUSH
9536: fprintf(stderr, "PP: entering DTD\n");
9537: #endif
9538: break;
1.128 daniel 9539: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9540: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9541: ctxt->instate = XML_PARSER_CONTENT;
9542: #ifdef DEBUG_PUSH
9543: fprintf(stderr, "PP: entering DTD\n");
9544: #endif
9545: break;
1.128 daniel 9546: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9547: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9548: ctxt->instate = XML_PARSER_START_TAG;
9549: #ifdef DEBUG_PUSH
9550: fprintf(stderr, "PP: entering START_TAG\n");
9551: #endif
9552: break;
9553: case XML_PARSER_SYSTEM_LITERAL:
9554: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9555: ctxt->instate = XML_PARSER_START_TAG;
9556: #ifdef DEBUG_PUSH
9557: fprintf(stderr, "PP: entering START_TAG\n");
9558: #endif
9559: break;
1.128 daniel 9560: }
9561: }
1.140 daniel 9562: done:
9563: #ifdef DEBUG_PUSH
9564: fprintf(stderr, "PP: done %d\n", ret);
9565: #endif
1.128 daniel 9566: return(ret);
9567: }
9568:
9569: /**
1.143 daniel 9570: * xmlParseTry:
9571: * @ctxt: an XML parser context
9572: *
9573: * Try to progress on parsing
9574: *
9575: * Returns zero if no parsing was possible
9576: */
9577: int
9578: xmlParseTry(xmlParserCtxtPtr ctxt) {
9579: return(xmlParseTryOrFinish(ctxt, 0));
9580: }
9581:
9582: /**
1.128 daniel 9583: * xmlParseChunk:
9584: * @ctxt: an XML parser context
9585: * @chunk: an char array
9586: * @size: the size in byte of the chunk
9587: * @terminate: last chunk indicator
9588: *
9589: * Parse a Chunk of memory
9590: *
9591: * Returns zero if no error, the xmlParserErrors otherwise.
9592: */
1.140 daniel 9593: int
1.128 daniel 9594: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9595: int terminate) {
1.132 daniel 9596: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9597: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9598: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9599: int cur = ctxt->input->cur - ctxt->input->base;
9600:
1.132 daniel 9601: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9602: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9603: ctxt->input->cur = ctxt->input->base + cur;
9604: #ifdef DEBUG_PUSH
9605: fprintf(stderr, "PP: pushed %d\n", size);
9606: #endif
9607:
1.150 daniel 9608: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9609: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9610: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9611: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9612: if (terminate) {
1.151 daniel 9613: /*
9614: * Check for termination
9615: */
1.140 daniel 9616: if ((ctxt->instate != XML_PARSER_EOF) &&
9617: (ctxt->instate != XML_PARSER_EPILOG)) {
9618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9619: ctxt->sax->error(ctxt->userData,
9620: "Extra content at the end of the document\n");
9621: ctxt->wellFormed = 0;
1.180 daniel 9622: ctxt->disableSAX = 1;
1.140 daniel 9623: ctxt->errNo = XML_ERR_DOCUMENT_END;
9624: }
9625: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9626: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9627: (!ctxt->disableSAX))
1.140 daniel 9628: ctxt->sax->endDocument(ctxt->userData);
9629: }
9630: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9631: }
9632: return((xmlParserErrors) ctxt->errNo);
9633: }
9634:
9635: /************************************************************************
9636: * *
1.98 daniel 9637: * I/O front end functions to the parser *
9638: * *
9639: ************************************************************************/
1.201 daniel 9640:
9641: /**
9642: * xmlCreatePushParserCtxt:
9643: * @ctxt: an XML parser context
9644: *
9645: * Blocks further parser processing
9646: */
9647: void
9648: xmlStopParser(xmlParserCtxtPtr ctxt) {
9649: ctxt->instate = XML_PARSER_EOF;
9650: if (ctxt->input != NULL)
9651: ctxt->input->cur = BAD_CAST"";
9652: }
1.98 daniel 9653:
1.50 daniel 9654: /**
1.181 daniel 9655: * xmlCreatePushParserCtxt:
1.140 daniel 9656: * @sax: a SAX handler
9657: * @user_data: The user data returned on SAX callbacks
9658: * @chunk: a pointer to an array of chars
9659: * @size: number of chars in the array
9660: * @filename: an optional file name or URI
9661: *
9662: * Create a parser context for using the XML parser in push mode
9663: * To allow content encoding detection, @size should be >= 4
9664: * The value of @filename is used for fetching external entities
9665: * and error/warning reports.
9666: *
9667: * Returns the new parser context or NULL
9668: */
9669: xmlParserCtxtPtr
9670: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9671: const char *chunk, int size, const char *filename) {
9672: xmlParserCtxtPtr ctxt;
9673: xmlParserInputPtr inputStream;
9674: xmlParserInputBufferPtr buf;
9675: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9676:
9677: /*
1.156 daniel 9678: * plug some encoding conversion routines
1.140 daniel 9679: */
9680: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9681: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9682:
9683: buf = xmlAllocParserInputBuffer(enc);
9684: if (buf == NULL) return(NULL);
9685:
9686: ctxt = xmlNewParserCtxt();
9687: if (ctxt == NULL) {
9688: xmlFree(buf);
9689: return(NULL);
9690: }
9691: if (sax != NULL) {
9692: if (ctxt->sax != &xmlDefaultSAXHandler)
9693: xmlFree(ctxt->sax);
9694: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9695: if (ctxt->sax == NULL) {
9696: xmlFree(buf);
9697: xmlFree(ctxt);
9698: return(NULL);
9699: }
9700: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9701: if (user_data != NULL)
9702: ctxt->userData = user_data;
9703: }
9704: if (filename == NULL) {
9705: ctxt->directory = NULL;
9706: } else {
9707: ctxt->directory = xmlParserGetDirectory(filename);
9708: }
9709:
9710: inputStream = xmlNewInputStream(ctxt);
9711: if (inputStream == NULL) {
9712: xmlFreeParserCtxt(ctxt);
9713: return(NULL);
9714: }
9715:
9716: if (filename == NULL)
9717: inputStream->filename = NULL;
9718: else
9719: inputStream->filename = xmlMemStrdup(filename);
9720: inputStream->buf = buf;
9721: inputStream->base = inputStream->buf->buffer->content;
9722: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9723: if (enc != XML_CHAR_ENCODING_NONE) {
9724: xmlSwitchEncoding(ctxt, enc);
9725: }
1.140 daniel 9726:
9727: inputPush(ctxt, inputStream);
9728:
9729: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9730: (ctxt->input->buf != NULL)) {
9731: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9732: #ifdef DEBUG_PUSH
9733: fprintf(stderr, "PP: pushed %d\n", size);
9734: #endif
9735: }
1.190 daniel 9736:
9737: return(ctxt);
9738: }
9739:
9740: /**
9741: * xmlCreateIOParserCtxt:
9742: * @sax: a SAX handler
9743: * @user_data: The user data returned on SAX callbacks
9744: * @ioread: an I/O read function
9745: * @ioclose: an I/O close function
9746: * @ioctx: an I/O handler
9747: * @enc: the charset encoding if known
9748: *
9749: * Create a parser context for using the XML parser with an existing
9750: * I/O stream
9751: *
9752: * Returns the new parser context or NULL
9753: */
9754: xmlParserCtxtPtr
9755: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9756: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9757: void *ioctx, xmlCharEncoding enc) {
9758: xmlParserCtxtPtr ctxt;
9759: xmlParserInputPtr inputStream;
9760: xmlParserInputBufferPtr buf;
9761:
9762: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9763: if (buf == NULL) return(NULL);
9764:
9765: ctxt = xmlNewParserCtxt();
9766: if (ctxt == NULL) {
9767: xmlFree(buf);
9768: return(NULL);
9769: }
9770: if (sax != NULL) {
9771: if (ctxt->sax != &xmlDefaultSAXHandler)
9772: xmlFree(ctxt->sax);
9773: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9774: if (ctxt->sax == NULL) {
9775: xmlFree(buf);
9776: xmlFree(ctxt);
9777: return(NULL);
9778: }
9779: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9780: if (user_data != NULL)
9781: ctxt->userData = user_data;
9782: }
9783:
9784: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9785: if (inputStream == NULL) {
9786: xmlFreeParserCtxt(ctxt);
9787: return(NULL);
9788: }
9789: inputPush(ctxt, inputStream);
1.140 daniel 9790:
9791: return(ctxt);
9792: }
9793:
9794: /**
1.181 daniel 9795: * xmlCreateDocParserCtxt:
1.123 daniel 9796: * @cur: a pointer to an array of xmlChar
1.50 daniel 9797: *
1.192 daniel 9798: * Creates a parser context for an XML in-memory document.
1.69 daniel 9799: *
9800: * Returns the new parser context or NULL
1.16 daniel 9801: */
1.69 daniel 9802: xmlParserCtxtPtr
1.123 daniel 9803: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9804: xmlParserCtxtPtr ctxt;
1.40 daniel 9805: xmlParserInputPtr input;
1.16 daniel 9806:
1.97 daniel 9807: ctxt = xmlNewParserCtxt();
1.16 daniel 9808: if (ctxt == NULL) {
9809: return(NULL);
9810: }
1.96 daniel 9811: input = xmlNewInputStream(ctxt);
1.40 daniel 9812: if (input == NULL) {
1.97 daniel 9813: xmlFreeParserCtxt(ctxt);
1.40 daniel 9814: return(NULL);
9815: }
9816:
9817: input->base = cur;
9818: input->cur = cur;
9819:
9820: inputPush(ctxt, input);
1.69 daniel 9821: return(ctxt);
9822: }
9823:
9824: /**
1.181 daniel 9825: * xmlSAXParseDoc:
1.69 daniel 9826: * @sax: the SAX handler block
1.123 daniel 9827: * @cur: a pointer to an array of xmlChar
1.69 daniel 9828: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9829: * documents
9830: *
9831: * parse an XML in-memory document and build a tree.
9832: * It use the given SAX function block to handle the parsing callback.
9833: * If sax is NULL, fallback to the default DOM tree building routines.
9834: *
9835: * Returns the resulting document tree
9836: */
9837:
9838: xmlDocPtr
1.123 daniel 9839: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9840: xmlDocPtr ret;
9841: xmlParserCtxtPtr ctxt;
9842:
9843: if (cur == NULL) return(NULL);
1.16 daniel 9844:
9845:
1.69 daniel 9846: ctxt = xmlCreateDocParserCtxt(cur);
9847: if (ctxt == NULL) return(NULL);
1.74 daniel 9848: if (sax != NULL) {
9849: ctxt->sax = sax;
9850: ctxt->userData = NULL;
9851: }
1.69 daniel 9852:
1.16 daniel 9853: xmlParseDocument(ctxt);
1.72 daniel 9854: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9855: else {
9856: ret = NULL;
1.72 daniel 9857: xmlFreeDoc(ctxt->myDoc);
9858: ctxt->myDoc = NULL;
1.59 daniel 9859: }
1.86 daniel 9860: if (sax != NULL)
9861: ctxt->sax = NULL;
1.69 daniel 9862: xmlFreeParserCtxt(ctxt);
1.16 daniel 9863:
1.1 veillard 9864: return(ret);
9865: }
9866:
1.50 daniel 9867: /**
1.181 daniel 9868: * xmlParseDoc:
1.123 daniel 9869: * @cur: a pointer to an array of xmlChar
1.55 daniel 9870: *
9871: * parse an XML in-memory document and build a tree.
9872: *
1.68 daniel 9873: * Returns the resulting document tree
1.55 daniel 9874: */
9875:
1.69 daniel 9876: xmlDocPtr
1.123 daniel 9877: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9878: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9879: }
9880:
9881: /**
1.181 daniel 9882: * xmlSAXParseDTD:
1.76 daniel 9883: * @sax: the SAX handler block
9884: * @ExternalID: a NAME* containing the External ID of the DTD
9885: * @SystemID: a NAME* containing the URL to the DTD
9886: *
9887: * Load and parse an external subset.
9888: *
9889: * Returns the resulting xmlDtdPtr or NULL in case of error.
9890: */
9891:
9892: xmlDtdPtr
1.123 daniel 9893: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9894: const xmlChar *SystemID) {
1.76 daniel 9895: xmlDtdPtr ret = NULL;
9896: xmlParserCtxtPtr ctxt;
1.83 daniel 9897: xmlParserInputPtr input = NULL;
1.76 daniel 9898: xmlCharEncoding enc;
9899:
9900: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9901:
1.97 daniel 9902: ctxt = xmlNewParserCtxt();
1.76 daniel 9903: if (ctxt == NULL) {
9904: return(NULL);
9905: }
9906:
9907: /*
9908: * Set-up the SAX context
9909: */
9910: if (ctxt == NULL) return(NULL);
9911: if (sax != NULL) {
1.93 veillard 9912: if (ctxt->sax != NULL)
1.119 daniel 9913: xmlFree(ctxt->sax);
1.76 daniel 9914: ctxt->sax = sax;
9915: ctxt->userData = NULL;
9916: }
9917:
9918: /*
9919: * Ask the Entity resolver to load the damn thing
9920: */
9921:
9922: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9923: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9924: if (input == NULL) {
1.86 daniel 9925: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9926: xmlFreeParserCtxt(ctxt);
9927: return(NULL);
9928: }
9929:
9930: /*
1.156 daniel 9931: * plug some encoding conversion routines here.
1.76 daniel 9932: */
9933: xmlPushInput(ctxt, input);
1.156 daniel 9934: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9935: xmlSwitchEncoding(ctxt, enc);
9936:
1.95 veillard 9937: if (input->filename == NULL)
1.156 daniel 9938: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9939: input->line = 1;
9940: input->col = 1;
9941: input->base = ctxt->input->cur;
9942: input->cur = ctxt->input->cur;
9943: input->free = NULL;
9944:
9945: /*
9946: * let's parse that entity knowing it's an external subset.
9947: */
1.191 daniel 9948: ctxt->inSubset = 2;
9949: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9950: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9951: ExternalID, SystemID);
1.79 daniel 9952: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9953:
9954: if (ctxt->myDoc != NULL) {
9955: if (ctxt->wellFormed) {
1.191 daniel 9956: ret = ctxt->myDoc->extSubset;
9957: ctxt->myDoc->extSubset = NULL;
1.76 daniel 9958: } else {
9959: ret = NULL;
9960: }
9961: xmlFreeDoc(ctxt->myDoc);
9962: ctxt->myDoc = NULL;
9963: }
1.86 daniel 9964: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9965: xmlFreeParserCtxt(ctxt);
9966:
9967: return(ret);
9968: }
9969:
9970: /**
1.181 daniel 9971: * xmlParseDTD:
1.76 daniel 9972: * @ExternalID: a NAME* containing the External ID of the DTD
9973: * @SystemID: a NAME* containing the URL to the DTD
9974: *
9975: * Load and parse an external subset.
9976: *
9977: * Returns the resulting xmlDtdPtr or NULL in case of error.
9978: */
9979:
9980: xmlDtdPtr
1.123 daniel 9981: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9982: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9983: }
9984:
9985: /**
1.181 daniel 9986: * xmlSAXParseBalancedChunk:
1.144 daniel 9987: * @ctx: an XML parser context (possibly NULL)
9988: * @sax: the SAX handler bloc (possibly NULL)
9989: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9990: * @input: a parser input stream
9991: * @enc: the encoding
9992: *
9993: * Parse a well-balanced chunk of an XML document
9994: * The user has to provide SAX callback block whose routines will be
9995: * called by the parser
9996: * The allowed sequence for the Well Balanced Chunk is the one defined by
9997: * the content production in the XML grammar:
9998: *
9999: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10000: *
1.176 daniel 10001: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 10002: * the error code otherwise
10003: */
10004:
10005: int
10006: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10007: void *user_data, xmlParserInputPtr input,
10008: xmlCharEncoding enc) {
10009: xmlParserCtxtPtr ctxt;
10010: int ret;
10011:
10012: if (input == NULL) return(-1);
10013:
10014: if (ctx != NULL)
10015: ctxt = ctx;
10016: else {
10017: ctxt = xmlNewParserCtxt();
10018: if (ctxt == NULL)
10019: return(-1);
10020: if (sax == NULL)
10021: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10022: }
10023:
10024: /*
10025: * Set-up the SAX context
10026: */
10027: if (sax != NULL) {
10028: if (ctxt->sax != NULL)
10029: xmlFree(ctxt->sax);
10030: ctxt->sax = sax;
10031: ctxt->userData = user_data;
10032: }
10033:
10034: /*
10035: * plug some encoding conversion routines here.
10036: */
10037: xmlPushInput(ctxt, input);
10038: if (enc != XML_CHAR_ENCODING_NONE)
10039: xmlSwitchEncoding(ctxt, enc);
10040:
10041: /*
10042: * let's parse that entity knowing it's an external subset.
10043: */
10044: xmlParseContent(ctxt);
10045: ret = ctxt->errNo;
10046:
10047: if (ctx == NULL) {
10048: if (sax != NULL)
10049: ctxt->sax = NULL;
10050: else
10051: xmlFreeDoc(ctxt->myDoc);
10052: xmlFreeParserCtxt(ctxt);
10053: }
10054: return(ret);
10055: }
10056:
10057: /**
1.181 daniel 10058: * xmlParseExternalEntity:
10059: * @doc: the document the chunk pertains to
10060: * @sax: the SAX handler bloc (possibly NULL)
10061: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10062: * @depth: Used for loop detection, use 0
1.181 daniel 10063: * @URL: the URL for the entity to load
10064: * @ID: the System ID for the entity to load
10065: * @list: the return value for the set of parsed nodes
10066: *
10067: * Parse an external general entity
10068: * An external general parsed entity is well-formed if it matches the
10069: * production labeled extParsedEnt.
10070: *
10071: * [78] extParsedEnt ::= TextDecl? content
10072: *
10073: * Returns 0 if the entity is well formed, -1 in case of args problem and
10074: * the parser error code otherwise
10075: */
10076:
10077: int
10078: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10079: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10080: xmlParserCtxtPtr ctxt;
10081: xmlDocPtr newDoc;
10082: xmlSAXHandlerPtr oldsax = NULL;
10083: int ret = 0;
10084:
1.185 daniel 10085: if (depth > 40) {
10086: return(XML_ERR_ENTITY_LOOP);
10087: }
10088:
10089:
1.181 daniel 10090:
10091: if (list != NULL)
10092: *list = NULL;
10093: if ((URL == NULL) && (ID == NULL))
10094: return(-1);
10095:
10096:
10097: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10098: if (ctxt == NULL) return(-1);
10099: ctxt->userData = ctxt;
10100: if (sax != NULL) {
10101: oldsax = ctxt->sax;
10102: ctxt->sax = sax;
10103: if (user_data != NULL)
10104: ctxt->userData = user_data;
10105: }
10106: newDoc = xmlNewDoc(BAD_CAST "1.0");
10107: if (newDoc == NULL) {
10108: xmlFreeParserCtxt(ctxt);
10109: return(-1);
10110: }
10111: if (doc != NULL) {
10112: newDoc->intSubset = doc->intSubset;
10113: newDoc->extSubset = doc->extSubset;
10114: }
10115: if (doc->URL != NULL) {
10116: newDoc->URL = xmlStrdup(doc->URL);
10117: }
10118: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10119: if (newDoc->children == NULL) {
10120: if (sax != NULL)
10121: ctxt->sax = oldsax;
10122: xmlFreeParserCtxt(ctxt);
10123: newDoc->intSubset = NULL;
10124: newDoc->extSubset = NULL;
10125: xmlFreeDoc(newDoc);
10126: return(-1);
10127: }
10128: nodePush(ctxt, newDoc->children);
10129: if (doc == NULL) {
10130: ctxt->myDoc = newDoc;
10131: } else {
10132: ctxt->myDoc = doc;
10133: newDoc->children->doc = doc;
10134: }
10135:
10136: /*
10137: * Parse a possible text declaration first
10138: */
10139: GROW;
10140: if ((RAW == '<') && (NXT(1) == '?') &&
10141: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10142: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10143: xmlParseTextDecl(ctxt);
10144: }
10145:
10146: /*
10147: * Doing validity checking on chunk doesn't make sense
10148: */
10149: ctxt->instate = XML_PARSER_CONTENT;
10150: ctxt->validate = 0;
1.185 daniel 10151: ctxt->depth = depth;
1.181 daniel 10152:
10153: xmlParseContent(ctxt);
10154:
10155: if ((RAW == '<') && (NXT(1) == '/')) {
10156: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10157: ctxt->sax->error(ctxt->userData,
10158: "chunk is not well balanced\n");
10159: ctxt->wellFormed = 0;
10160: ctxt->disableSAX = 1;
10161: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10162: } else if (RAW != 0) {
10163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10164: ctxt->sax->error(ctxt->userData,
10165: "extra content at the end of well balanced chunk\n");
10166: ctxt->wellFormed = 0;
10167: ctxt->disableSAX = 1;
10168: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10169: }
10170: if (ctxt->node != newDoc->children) {
10171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10172: ctxt->sax->error(ctxt->userData,
10173: "chunk is not well balanced\n");
10174: ctxt->wellFormed = 0;
10175: ctxt->disableSAX = 1;
10176: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10177: }
10178:
10179: if (!ctxt->wellFormed) {
10180: if (ctxt->errNo == 0)
10181: ret = 1;
10182: else
10183: ret = ctxt->errNo;
10184: } else {
10185: if (list != NULL) {
10186: xmlNodePtr cur;
10187:
10188: /*
10189: * Return the newly created nodeset after unlinking it from
10190: * they pseudo parent.
10191: */
10192: cur = newDoc->children->children;
10193: *list = cur;
10194: while (cur != NULL) {
10195: cur->parent = NULL;
10196: cur = cur->next;
10197: }
10198: newDoc->children->children = NULL;
10199: }
10200: ret = 0;
10201: }
10202: if (sax != NULL)
10203: ctxt->sax = oldsax;
10204: xmlFreeParserCtxt(ctxt);
10205: newDoc->intSubset = NULL;
10206: newDoc->extSubset = NULL;
10207: xmlFreeDoc(newDoc);
10208:
10209: return(ret);
10210: }
10211:
10212: /**
10213: * xmlParseBalancedChunk:
1.176 daniel 10214: * @doc: the document the chunk pertains to
10215: * @sax: the SAX handler bloc (possibly NULL)
10216: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10217: * @depth: Used for loop detection, use 0
1.176 daniel 10218: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10219: * @list: the return value for the set of parsed nodes
10220: *
10221: * Parse a well-balanced chunk of an XML document
10222: * called by the parser
10223: * The allowed sequence for the Well Balanced Chunk is the one defined by
10224: * the content production in the XML grammar:
1.144 daniel 10225: *
1.175 daniel 10226: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10227: *
1.176 daniel 10228: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10229: * the parser error code otherwise
1.144 daniel 10230: */
10231:
1.175 daniel 10232: int
10233: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10234: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10235: xmlParserCtxtPtr ctxt;
1.175 daniel 10236: xmlDocPtr newDoc;
1.181 daniel 10237: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10238: int size;
1.176 daniel 10239: int ret = 0;
1.175 daniel 10240:
1.185 daniel 10241: if (depth > 40) {
10242: return(XML_ERR_ENTITY_LOOP);
10243: }
10244:
1.175 daniel 10245:
1.176 daniel 10246: if (list != NULL)
10247: *list = NULL;
10248: if (string == NULL)
10249: return(-1);
10250:
10251: size = xmlStrlen(string);
10252:
1.183 daniel 10253: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10254: if (ctxt == NULL) return(-1);
10255: ctxt->userData = ctxt;
1.175 daniel 10256: if (sax != NULL) {
1.176 daniel 10257: oldsax = ctxt->sax;
10258: ctxt->sax = sax;
10259: if (user_data != NULL)
10260: ctxt->userData = user_data;
1.175 daniel 10261: }
10262: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10263: if (newDoc == NULL) {
10264: xmlFreeParserCtxt(ctxt);
10265: return(-1);
10266: }
1.175 daniel 10267: if (doc != NULL) {
10268: newDoc->intSubset = doc->intSubset;
10269: newDoc->extSubset = doc->extSubset;
10270: }
1.176 daniel 10271: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10272: if (newDoc->children == NULL) {
10273: if (sax != NULL)
10274: ctxt->sax = oldsax;
10275: xmlFreeParserCtxt(ctxt);
10276: newDoc->intSubset = NULL;
10277: newDoc->extSubset = NULL;
10278: xmlFreeDoc(newDoc);
10279: return(-1);
10280: }
10281: nodePush(ctxt, newDoc->children);
10282: if (doc == NULL) {
10283: ctxt->myDoc = newDoc;
10284: } else {
10285: ctxt->myDoc = doc;
10286: newDoc->children->doc = doc;
10287: }
10288: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10289: ctxt->depth = depth;
1.176 daniel 10290:
10291: /*
10292: * Doing validity checking on chunk doesn't make sense
10293: */
10294: ctxt->validate = 0;
10295:
1.175 daniel 10296: xmlParseContent(ctxt);
1.176 daniel 10297:
10298: if ((RAW == '<') && (NXT(1) == '/')) {
10299: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10300: ctxt->sax->error(ctxt->userData,
10301: "chunk is not well balanced\n");
10302: ctxt->wellFormed = 0;
1.180 daniel 10303: ctxt->disableSAX = 1;
1.176 daniel 10304: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10305: } else if (RAW != 0) {
10306: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10307: ctxt->sax->error(ctxt->userData,
10308: "extra content at the end of well balanced chunk\n");
10309: ctxt->wellFormed = 0;
1.180 daniel 10310: ctxt->disableSAX = 1;
1.176 daniel 10311: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10312: }
10313: if (ctxt->node != newDoc->children) {
10314: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10315: ctxt->sax->error(ctxt->userData,
10316: "chunk is not well balanced\n");
10317: ctxt->wellFormed = 0;
1.180 daniel 10318: ctxt->disableSAX = 1;
1.176 daniel 10319: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10320: }
1.175 daniel 10321:
1.176 daniel 10322: if (!ctxt->wellFormed) {
10323: if (ctxt->errNo == 0)
10324: ret = 1;
10325: else
10326: ret = ctxt->errNo;
10327: } else {
10328: if (list != NULL) {
10329: xmlNodePtr cur;
1.175 daniel 10330:
1.176 daniel 10331: /*
10332: * Return the newly created nodeset after unlinking it from
10333: * they pseudo parent.
10334: */
10335: cur = newDoc->children->children;
10336: *list = cur;
10337: while (cur != NULL) {
10338: cur->parent = NULL;
10339: cur = cur->next;
10340: }
10341: newDoc->children->children = NULL;
10342: }
10343: ret = 0;
1.175 daniel 10344: }
1.176 daniel 10345: if (sax != NULL)
10346: ctxt->sax = oldsax;
1.175 daniel 10347: xmlFreeParserCtxt(ctxt);
10348: newDoc->intSubset = NULL;
10349: newDoc->extSubset = NULL;
1.176 daniel 10350: xmlFreeDoc(newDoc);
1.175 daniel 10351:
1.176 daniel 10352: return(ret);
1.144 daniel 10353: }
10354:
10355: /**
1.181 daniel 10356: * xmlParseBalancedChunkFile:
1.144 daniel 10357: * @doc: the document the chunk pertains to
10358: *
10359: * Parse a well-balanced chunk of an XML document contained in a file
10360: *
10361: * Returns the resulting list of nodes resulting from the parsing,
10362: * they are not added to @node
10363: */
10364:
10365: xmlNodePtr
10366: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10367: /* TODO !!! */
10368: return(NULL);
1.144 daniel 10369: }
10370:
10371: /**
1.181 daniel 10372: * xmlRecoverDoc:
1.123 daniel 10373: * @cur: a pointer to an array of xmlChar
1.59 daniel 10374: *
10375: * parse an XML in-memory document and build a tree.
10376: * In the case the document is not Well Formed, a tree is built anyway
10377: *
1.68 daniel 10378: * Returns the resulting document tree
1.59 daniel 10379: */
10380:
1.69 daniel 10381: xmlDocPtr
1.123 daniel 10382: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10383: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10384: }
10385:
10386: /**
1.181 daniel 10387: * xmlCreateEntityParserCtxt:
10388: * @URL: the entity URL
10389: * @ID: the entity PUBLIC ID
10390: * @base: a posible base for the target URI
10391: *
10392: * Create a parser context for an external entity
10393: * Automatic support for ZLIB/Compress compressed document is provided
10394: * by default if found at compile-time.
10395: *
10396: * Returns the new parser context or NULL
10397: */
10398: xmlParserCtxtPtr
10399: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10400: const xmlChar *base) {
10401: xmlParserCtxtPtr ctxt;
10402: xmlParserInputPtr inputStream;
10403: char *directory = NULL;
10404:
10405: ctxt = xmlNewParserCtxt();
10406: if (ctxt == NULL) {
10407: return(NULL);
10408: }
10409:
1.182 daniel 10410: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
1.181 daniel 10411: if (inputStream == NULL) {
10412: xmlFreeParserCtxt(ctxt);
10413: return(NULL);
10414: }
10415:
10416: inputPush(ctxt, inputStream);
10417:
10418: if ((ctxt->directory == NULL) && (directory == NULL))
1.182 daniel 10419: directory = xmlParserGetDirectory((char *)URL);
1.181 daniel 10420: if ((ctxt->directory == NULL) && (directory != NULL))
10421: ctxt->directory = directory;
10422:
10423: return(ctxt);
10424: }
10425:
10426: /**
10427: * xmlCreateFileParserCtxt:
1.50 daniel 10428: * @filename: the filename
10429: *
1.69 daniel 10430: * Create a parser context for a file content.
10431: * Automatic support for ZLIB/Compress compressed document is provided
10432: * by default if found at compile-time.
1.50 daniel 10433: *
1.69 daniel 10434: * Returns the new parser context or NULL
1.9 httpng 10435: */
1.69 daniel 10436: xmlParserCtxtPtr
10437: xmlCreateFileParserCtxt(const char *filename)
10438: {
10439: xmlParserCtxtPtr ctxt;
1.40 daniel 10440: xmlParserInputPtr inputStream;
1.91 daniel 10441: xmlParserInputBufferPtr buf;
1.111 daniel 10442: char *directory = NULL;
1.9 httpng 10443:
1.91 daniel 10444: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10445: if (buf == NULL) return(NULL);
1.9 httpng 10446:
1.97 daniel 10447: ctxt = xmlNewParserCtxt();
1.16 daniel 10448: if (ctxt == NULL) {
10449: return(NULL);
10450: }
1.97 daniel 10451:
1.96 daniel 10452: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10453: if (inputStream == NULL) {
1.97 daniel 10454: xmlFreeParserCtxt(ctxt);
1.40 daniel 10455: return(NULL);
10456: }
10457:
1.119 daniel 10458: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10459: inputStream->buf = buf;
10460: inputStream->base = inputStream->buf->buffer->content;
10461: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10462:
1.40 daniel 10463: inputPush(ctxt, inputStream);
1.110 daniel 10464: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10465: directory = xmlParserGetDirectory(filename);
10466: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10467: ctxt->directory = directory;
1.106 daniel 10468:
1.69 daniel 10469: return(ctxt);
10470: }
10471:
10472: /**
1.181 daniel 10473: * xmlSAXParseFile:
1.69 daniel 10474: * @sax: the SAX handler block
10475: * @filename: the filename
10476: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10477: * documents
10478: *
10479: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10480: * compressed document is provided by default if found at compile-time.
10481: * It use the given SAX function block to handle the parsing callback.
10482: * If sax is NULL, fallback to the default DOM tree building routines.
10483: *
10484: * Returns the resulting document tree
10485: */
10486:
1.79 daniel 10487: xmlDocPtr
10488: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10489: int recovery) {
10490: xmlDocPtr ret;
10491: xmlParserCtxtPtr ctxt;
1.111 daniel 10492: char *directory = NULL;
1.69 daniel 10493:
10494: ctxt = xmlCreateFileParserCtxt(filename);
10495: if (ctxt == NULL) return(NULL);
1.74 daniel 10496: if (sax != NULL) {
1.93 veillard 10497: if (ctxt->sax != NULL)
1.119 daniel 10498: xmlFree(ctxt->sax);
1.74 daniel 10499: ctxt->sax = sax;
10500: ctxt->userData = NULL;
10501: }
1.106 daniel 10502:
1.110 daniel 10503: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10504: directory = xmlParserGetDirectory(filename);
10505: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10506: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10507:
10508: xmlParseDocument(ctxt);
1.40 daniel 10509:
1.72 daniel 10510: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10511: else {
10512: ret = NULL;
1.72 daniel 10513: xmlFreeDoc(ctxt->myDoc);
10514: ctxt->myDoc = NULL;
1.59 daniel 10515: }
1.86 daniel 10516: if (sax != NULL)
10517: ctxt->sax = NULL;
1.69 daniel 10518: xmlFreeParserCtxt(ctxt);
1.20 daniel 10519:
10520: return(ret);
10521: }
10522:
1.55 daniel 10523: /**
1.181 daniel 10524: * xmlParseFile:
1.55 daniel 10525: * @filename: the filename
10526: *
10527: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10528: * compressed document is provided by default if found at compile-time.
10529: *
1.68 daniel 10530: * Returns the resulting document tree
1.55 daniel 10531: */
10532:
1.79 daniel 10533: xmlDocPtr
10534: xmlParseFile(const char *filename) {
1.59 daniel 10535: return(xmlSAXParseFile(NULL, filename, 0));
10536: }
10537:
10538: /**
1.181 daniel 10539: * xmlRecoverFile:
1.59 daniel 10540: * @filename: the filename
10541: *
10542: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10543: * compressed document is provided by default if found at compile-time.
10544: * In the case the document is not Well Formed, a tree is built anyway
10545: *
1.68 daniel 10546: * Returns the resulting document tree
1.59 daniel 10547: */
10548:
1.79 daniel 10549: xmlDocPtr
10550: xmlRecoverFile(const char *filename) {
1.59 daniel 10551: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10552: }
1.32 daniel 10553:
1.50 daniel 10554: /**
1.181 daniel 10555: * xmlCreateMemoryParserCtxt:
10556: * @buffer: a pointer to a zero terminated char array
10557: * @size: the size of the array (without the trailing 0)
1.50 daniel 10558: *
1.69 daniel 10559: * Create a parser context for an XML in-memory document.
1.50 daniel 10560: *
1.69 daniel 10561: * Returns the new parser context or NULL
1.20 daniel 10562: */
1.69 daniel 10563: xmlParserCtxtPtr
10564: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10565: xmlParserCtxtPtr ctxt;
1.40 daniel 10566: xmlParserInputPtr input;
10567:
1.179 daniel 10568: if (buffer[size] != 0)
1.181 daniel 10569: return(NULL);
1.40 daniel 10570:
1.97 daniel 10571: ctxt = xmlNewParserCtxt();
1.181 daniel 10572: if (ctxt == NULL)
1.20 daniel 10573: return(NULL);
1.97 daniel 10574:
1.96 daniel 10575: input = xmlNewInputStream(ctxt);
1.40 daniel 10576: if (input == NULL) {
1.97 daniel 10577: xmlFreeParserCtxt(ctxt);
1.40 daniel 10578: return(NULL);
10579: }
1.20 daniel 10580:
1.40 daniel 10581: input->filename = NULL;
10582: input->line = 1;
10583: input->col = 1;
1.96 daniel 10584: input->buf = NULL;
1.91 daniel 10585: input->consumed = 0;
1.75 daniel 10586:
1.116 daniel 10587: input->base = BAD_CAST buffer;
10588: input->cur = BAD_CAST buffer;
1.69 daniel 10589: input->free = NULL;
1.20 daniel 10590:
1.40 daniel 10591: inputPush(ctxt, input);
1.69 daniel 10592: return(ctxt);
10593: }
10594:
10595: /**
1.181 daniel 10596: * xmlSAXParseMemory:
1.69 daniel 10597: * @sax: the SAX handler block
10598: * @buffer: an pointer to a char array
1.127 daniel 10599: * @size: the size of the array
10600: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10601: * documents
10602: *
10603: * parse an XML in-memory block and use the given SAX function block
10604: * to handle the parsing callback. If sax is NULL, fallback to the default
10605: * DOM tree building routines.
10606: *
10607: * Returns the resulting document tree
10608: */
10609: xmlDocPtr
10610: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10611: xmlDocPtr ret;
10612: xmlParserCtxtPtr ctxt;
10613:
10614: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10615: if (ctxt == NULL) return(NULL);
1.74 daniel 10616: if (sax != NULL) {
10617: ctxt->sax = sax;
10618: ctxt->userData = NULL;
10619: }
1.20 daniel 10620:
10621: xmlParseDocument(ctxt);
1.40 daniel 10622:
1.72 daniel 10623: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10624: else {
10625: ret = NULL;
1.72 daniel 10626: xmlFreeDoc(ctxt->myDoc);
10627: ctxt->myDoc = NULL;
1.59 daniel 10628: }
1.86 daniel 10629: if (sax != NULL)
10630: ctxt->sax = NULL;
1.69 daniel 10631: xmlFreeParserCtxt(ctxt);
1.16 daniel 10632:
1.9 httpng 10633: return(ret);
1.17 daniel 10634: }
10635:
1.55 daniel 10636: /**
1.181 daniel 10637: * xmlParseMemory:
1.68 daniel 10638: * @buffer: an pointer to a char array
1.55 daniel 10639: * @size: the size of the array
10640: *
10641: * parse an XML in-memory block and build a tree.
10642: *
1.68 daniel 10643: * Returns the resulting document tree
1.55 daniel 10644: */
10645:
10646: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10647: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10648: }
10649:
10650: /**
1.181 daniel 10651: * xmlRecoverMemory:
1.68 daniel 10652: * @buffer: an pointer to a char array
1.59 daniel 10653: * @size: the size of the array
10654: *
10655: * parse an XML in-memory block and build a tree.
10656: * In the case the document is not Well Formed, a tree is built anyway
10657: *
1.68 daniel 10658: * Returns the resulting document tree
1.59 daniel 10659: */
10660:
10661: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10662: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10663: }
10664:
10665:
1.50 daniel 10666: /**
10667: * xmlSetupParserForBuffer:
10668: * @ctxt: an XML parser context
1.123 daniel 10669: * @buffer: a xmlChar * buffer
1.50 daniel 10670: * @filename: a file name
10671: *
1.19 daniel 10672: * Setup the parser context to parse a new buffer; Clears any prior
10673: * contents from the parser context. The buffer parameter must not be
10674: * NULL, but the filename parameter can be
10675: */
1.55 daniel 10676: void
1.123 daniel 10677: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10678: const char* filename)
10679: {
1.96 daniel 10680: xmlParserInputPtr input;
1.40 daniel 10681:
1.96 daniel 10682: input = xmlNewInputStream(ctxt);
10683: if (input == NULL) {
10684: perror("malloc");
1.119 daniel 10685: xmlFree(ctxt);
1.145 daniel 10686: return;
1.96 daniel 10687: }
10688:
10689: xmlClearParserCtxt(ctxt);
10690: if (filename != NULL)
1.119 daniel 10691: input->filename = xmlMemStrdup(filename);
1.96 daniel 10692: input->base = buffer;
10693: input->cur = buffer;
10694: inputPush(ctxt, input);
1.17 daniel 10695: }
10696:
1.123 daniel 10697: /**
10698: * xmlSAXUserParseFile:
10699: * @sax: a SAX handler
10700: * @user_data: The user data returned on SAX callbacks
10701: * @filename: a file name
10702: *
10703: * parse an XML file and call the given SAX handler routines.
10704: * Automatic support for ZLIB/Compress compressed document is provided
10705: *
10706: * Returns 0 in case of success or a error number otherwise
10707: */
1.131 daniel 10708: int
10709: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10710: const char *filename) {
1.123 daniel 10711: int ret = 0;
10712: xmlParserCtxtPtr ctxt;
10713:
10714: ctxt = xmlCreateFileParserCtxt(filename);
10715: if (ctxt == NULL) return -1;
1.134 daniel 10716: if (ctxt->sax != &xmlDefaultSAXHandler)
10717: xmlFree(ctxt->sax);
1.123 daniel 10718: ctxt->sax = sax;
1.140 daniel 10719: if (user_data != NULL)
10720: ctxt->userData = user_data;
1.123 daniel 10721:
10722: xmlParseDocument(ctxt);
10723:
10724: if (ctxt->wellFormed)
10725: ret = 0;
10726: else {
10727: if (ctxt->errNo != 0)
10728: ret = ctxt->errNo;
10729: else
10730: ret = -1;
10731: }
10732: if (sax != NULL)
10733: ctxt->sax = NULL;
10734: xmlFreeParserCtxt(ctxt);
10735:
10736: return ret;
10737: }
10738:
10739: /**
10740: * xmlSAXUserParseMemory:
10741: * @sax: a SAX handler
10742: * @user_data: The user data returned on SAX callbacks
10743: * @buffer: an in-memory XML document input
1.127 daniel 10744: * @size: the length of the XML document in bytes
1.123 daniel 10745: *
10746: * A better SAX parsing routine.
10747: * parse an XML in-memory buffer and call the given SAX handler routines.
10748: *
10749: * Returns 0 in case of success or a error number otherwise
10750: */
10751: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10752: char *buffer, int size) {
10753: int ret = 0;
10754: xmlParserCtxtPtr ctxt;
10755:
10756: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10757: if (ctxt == NULL) return -1;
10758: ctxt->sax = sax;
10759: ctxt->userData = user_data;
10760:
10761: xmlParseDocument(ctxt);
10762:
10763: if (ctxt->wellFormed)
10764: ret = 0;
10765: else {
10766: if (ctxt->errNo != 0)
10767: ret = ctxt->errNo;
10768: else
10769: ret = -1;
10770: }
10771: if (sax != NULL)
10772: ctxt->sax = NULL;
10773: xmlFreeParserCtxt(ctxt);
10774:
10775: return ret;
10776: }
10777:
1.32 daniel 10778:
1.98 daniel 10779: /************************************************************************
10780: * *
1.127 daniel 10781: * Miscellaneous *
1.98 daniel 10782: * *
10783: ************************************************************************/
10784:
1.132 daniel 10785: /**
10786: * xmlCleanupParser:
10787: *
10788: * Cleanup function for the XML parser. It tries to reclaim all
10789: * parsing related global memory allocated for the parser processing.
10790: * It doesn't deallocate any document related memory. Calling this
10791: * function should not prevent reusing the parser.
10792: */
10793:
10794: void
10795: xmlCleanupParser(void) {
10796: xmlCleanupCharEncodingHandlers();
1.133 daniel 10797: xmlCleanupPredefinedEntities();
1.132 daniel 10798: }
1.98 daniel 10799:
1.50 daniel 10800: /**
10801: * xmlParserFindNodeInfo:
10802: * @ctxt: an XML parser context
10803: * @node: an XML node within the tree
10804: *
10805: * Find the parser node info struct for a given node
10806: *
1.68 daniel 10807: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10808: */
10809: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10810: const xmlNode* node)
10811: {
10812: unsigned long pos;
10813:
10814: /* Find position where node should be at */
10815: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10816: if ( ctx->node_seq.buffer[pos].node == node )
10817: return &ctx->node_seq.buffer[pos];
10818: else
10819: return NULL;
10820: }
10821:
10822:
1.50 daniel 10823: /**
1.181 daniel 10824: * xmlInitNodeInfoSeq:
1.50 daniel 10825: * @seq: a node info sequence pointer
10826: *
10827: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10828: */
1.55 daniel 10829: void
10830: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10831: {
10832: seq->length = 0;
10833: seq->maximum = 0;
10834: seq->buffer = NULL;
10835: }
10836:
1.50 daniel 10837: /**
1.181 daniel 10838: * xmlClearNodeInfoSeq:
1.50 daniel 10839: * @seq: a node info sequence pointer
10840: *
10841: * -- Clear (release memory and reinitialize) node
1.32 daniel 10842: * info sequence
10843: */
1.55 daniel 10844: void
10845: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10846: {
10847: if ( seq->buffer != NULL )
1.119 daniel 10848: xmlFree(seq->buffer);
1.32 daniel 10849: xmlInitNodeInfoSeq(seq);
10850: }
10851:
10852:
1.50 daniel 10853: /**
10854: * xmlParserFindNodeInfoIndex:
10855: * @seq: a node info sequence pointer
10856: * @node: an XML node pointer
10857: *
10858: *
1.32 daniel 10859: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10860: * the given node is or should be at in a sorted sequence
1.68 daniel 10861: *
10862: * Returns a long indicating the position of the record
1.32 daniel 10863: */
10864: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10865: const xmlNode* node)
10866: {
10867: unsigned long upper, lower, middle;
10868: int found = 0;
10869:
10870: /* Do a binary search for the key */
10871: lower = 1;
10872: upper = seq->length;
10873: middle = 0;
10874: while ( lower <= upper && !found) {
10875: middle = lower + (upper - lower) / 2;
10876: if ( node == seq->buffer[middle - 1].node )
10877: found = 1;
10878: else if ( node < seq->buffer[middle - 1].node )
10879: upper = middle - 1;
10880: else
10881: lower = middle + 1;
10882: }
10883:
10884: /* Return position */
10885: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10886: return middle;
10887: else
10888: return middle - 1;
10889: }
10890:
10891:
1.50 daniel 10892: /**
10893: * xmlParserAddNodeInfo:
10894: * @ctxt: an XML parser context
1.68 daniel 10895: * @info: a node info sequence pointer
1.50 daniel 10896: *
10897: * Insert node info record into the sorted sequence
1.32 daniel 10898: */
1.55 daniel 10899: void
10900: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10901: const xmlParserNodeInfo* info)
1.32 daniel 10902: {
10903: unsigned long pos;
10904: static unsigned int block_size = 5;
10905:
10906: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10907: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10908: if ( pos < ctxt->node_seq.length
10909: && ctxt->node_seq.buffer[pos].node == info->node ) {
10910: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10911: }
10912:
10913: /* Otherwise, we need to add new node to buffer */
10914: else {
10915: /* Expand buffer by 5 if needed */
1.55 daniel 10916: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10917: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10918: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10919: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10920:
1.55 daniel 10921: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10922: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10923: else
1.119 daniel 10924: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10925:
10926: if ( tmp_buffer == NULL ) {
1.55 daniel 10927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10928: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10929: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10930: return;
10931: }
1.55 daniel 10932: ctxt->node_seq.buffer = tmp_buffer;
10933: ctxt->node_seq.maximum += block_size;
1.32 daniel 10934: }
10935:
10936: /* If position is not at end, move elements out of the way */
1.55 daniel 10937: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10938: unsigned long i;
10939:
1.55 daniel 10940: for ( i = ctxt->node_seq.length; i > pos; i-- )
10941: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10942: }
10943:
10944: /* Copy element and increase length */
1.55 daniel 10945: ctxt->node_seq.buffer[pos] = *info;
10946: ctxt->node_seq.length++;
1.32 daniel 10947: }
10948: }
1.77 daniel 10949:
1.98 daniel 10950:
10951: /**
1.181 daniel 10952: * xmlSubstituteEntitiesDefault:
1.98 daniel 10953: * @val: int 0 or 1
10954: *
10955: * Set and return the previous value for default entity support.
10956: * Initially the parser always keep entity references instead of substituting
10957: * entity values in the output. This function has to be used to change the
10958: * default parser behaviour
10959: * SAX::subtituteEntities() has to be used for changing that on a file by
10960: * file basis.
10961: *
10962: * Returns the last value for 0 for no substitution, 1 for substitution.
10963: */
10964:
10965: int
10966: xmlSubstituteEntitiesDefault(int val) {
10967: int old = xmlSubstituteEntitiesDefaultValue;
10968:
10969: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10970: return(old);
10971: }
10972:
10973: /**
10974: * xmlKeepBlanksDefault:
10975: * @val: int 0 or 1
10976: *
10977: * Set and return the previous value for default blanks text nodes support.
10978: * The 1.x version of the parser used an heuristic to try to detect
10979: * ignorable white spaces. As a result the SAX callback was generating
10980: * ignorableWhitespace() callbacks instead of characters() one, and when
10981: * using the DOM output text nodes containing those blanks were not generated.
10982: * The 2.x and later version will switch to the XML standard way and
10983: * ignorableWhitespace() are only generated when running the parser in
10984: * validating mode and when the current element doesn't allow CDATA or
10985: * mixed content.
10986: * This function is provided as a way to force the standard behaviour
10987: * on 1.X libs and to switch back to the old mode for compatibility when
10988: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10989: * by using xmlIsBlankNode() commodity function to detect the "empty"
10990: * nodes generated.
10991: * This value also affect autogeneration of indentation when saving code
10992: * if blanks sections are kept, indentation is not generated.
10993: *
10994: * Returns the last value for 0 for no substitution, 1 for substitution.
10995: */
10996:
10997: int
10998: xmlKeepBlanksDefault(int val) {
10999: int old = xmlKeepBlanksDefaultValue;
11000:
11001: xmlKeepBlanksDefaultValue = val;
11002: xmlIndentTreeOutput = !val;
1.98 daniel 11003: return(old);
11004: }
1.77 daniel 11005:
Webmaster