Annotation of XML/parser.c, revision 1.200
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.121 daniel 16: #include <string.h> /* for memset() only */
17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.200 ! daniel 65:
! 66: /*
! 67: * Version handling
! 68: */
! 69: const char *xmlParserVersion = LIBXML_VERSION_STRING;
! 70:
! 71: /*
! 72: * xmlCheckVersion:
! 73: * @version: the include version number
! 74: *
! 75: * check the compiled lib version against the include one.
! 76: * This can warn or immediately kill the application
! 77: */
! 78: void
! 79: xmlCheckVersion(int version) {
! 80: int myversion = LIBXML_VERSION;
! 81:
! 82: if ((myversion / 10000) != (version / 10000)) {
! 83: fprintf(stderr,
! 84: "Fatal: program compiled against libxml %d using libxml %d\n",
! 85: (version / 10000), (myversion / 10000));
! 86: exit(1);
! 87: }
! 88: if ((myversion / 100) < (version / 100)) {
! 89: fprintf(stderr,
! 90: "Warning: program compiled against libxml %d using older %d\n",
! 91: (version / 100), (myversion / 100));
! 92: }
! 93: }
! 94:
! 95:
1.91 daniel 96: /************************************************************************
97: * *
98: * Input handling functions for progressive parsing *
99: * *
100: ************************************************************************/
101:
102: /* #define DEBUG_INPUT */
1.140 daniel 103: /* #define DEBUG_STACK */
104: /* #define DEBUG_PUSH */
105:
1.91 daniel 106:
1.110 daniel 107: #define INPUT_CHUNK 250
108: /* we need to keep enough input to show errors in context */
109: #define LINE_LEN 80
1.91 daniel 110:
111: #ifdef DEBUG_INPUT
112: #define CHECK_BUFFER(in) check_buffer(in)
113:
114: void check_buffer(xmlParserInputPtr in) {
115: if (in->base != in->buf->buffer->content) {
116: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
117: }
118: if (in->cur < in->base) {
119: fprintf(stderr, "xmlParserInput: cur < base problem\n");
120: }
121: if (in->cur > in->base + in->buf->buffer->use) {
122: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
123: }
124: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
125: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
126: in->buf->buffer->use, in->buf->buffer->size);
127: }
128:
1.110 daniel 129: #else
130: #define CHECK_BUFFER(in)
131: #endif
132:
1.91 daniel 133:
134: /**
135: * xmlParserInputRead:
136: * @in: an XML parser input
137: * @len: an indicative size for the lookahead
138: *
139: * This function refresh the input for the parser. It doesn't try to
140: * preserve pointers to the input buffer, and discard already read data
141: *
1.123 daniel 142: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 143: * end of this entity
144: */
145: int
146: xmlParserInputRead(xmlParserInputPtr in, int len) {
147: int ret;
148: int used;
149: int index;
150:
151: #ifdef DEBUG_INPUT
152: fprintf(stderr, "Read\n");
153: #endif
154: if (in->buf == NULL) return(-1);
155: if (in->base == NULL) return(-1);
156: if (in->cur == NULL) return(-1);
157: if (in->buf->buffer == NULL) return(-1);
158:
159: CHECK_BUFFER(in);
160:
161: used = in->cur - in->buf->buffer->content;
162: ret = xmlBufferShrink(in->buf->buffer, used);
163: if (ret > 0) {
164: in->cur -= ret;
165: in->consumed += ret;
166: }
167: ret = xmlParserInputBufferRead(in->buf, len);
168: if (in->base != in->buf->buffer->content) {
169: /*
170: * the buffer has been realloced
171: */
172: index = in->cur - in->base;
173: in->base = in->buf->buffer->content;
174: in->cur = &in->buf->buffer->content[index];
175: }
176:
177: CHECK_BUFFER(in);
178:
179: return(ret);
180: }
181:
182: /**
183: * xmlParserInputGrow:
184: * @in: an XML parser input
185: * @len: an indicative size for the lookahead
186: *
187: * This function increase the input for the parser. It tries to
188: * preserve pointers to the input buffer, and keep already read data
189: *
1.123 daniel 190: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 191: * end of this entity
192: */
193: int
194: xmlParserInputGrow(xmlParserInputPtr in, int len) {
195: int ret;
196: int index;
197:
198: #ifdef DEBUG_INPUT
199: fprintf(stderr, "Grow\n");
200: #endif
201: if (in->buf == NULL) return(-1);
202: if (in->base == NULL) return(-1);
203: if (in->cur == NULL) return(-1);
204: if (in->buf->buffer == NULL) return(-1);
205:
206: CHECK_BUFFER(in);
207:
208: index = in->cur - in->base;
209: if (in->buf->buffer->use > index + INPUT_CHUNK) {
210:
211: CHECK_BUFFER(in);
212:
213: return(0);
214: }
1.189 daniel 215: if (in->buf->readcallback != NULL)
1.140 daniel 216: ret = xmlParserInputBufferGrow(in->buf, len);
217: else
218: return(0);
1.135 daniel 219:
220: /*
221: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
222: * block, but we use it really as an integer to do some
223: * pointer arithmetic. Insure will raise it as a bug but in
224: * that specific case, that's not !
225: */
1.91 daniel 226: if (in->base != in->buf->buffer->content) {
227: /*
228: * the buffer has been realloced
229: */
230: index = in->cur - in->base;
231: in->base = in->buf->buffer->content;
232: in->cur = &in->buf->buffer->content[index];
233: }
234:
235: CHECK_BUFFER(in);
236:
237: return(ret);
238: }
239:
240: /**
241: * xmlParserInputShrink:
242: * @in: an XML parser input
243: *
244: * This function removes used input for the parser.
245: */
246: void
247: xmlParserInputShrink(xmlParserInputPtr in) {
248: int used;
249: int ret;
250: int index;
251:
252: #ifdef DEBUG_INPUT
253: fprintf(stderr, "Shrink\n");
254: #endif
255: if (in->buf == NULL) return;
256: if (in->base == NULL) return;
257: if (in->cur == NULL) return;
258: if (in->buf->buffer == NULL) return;
259:
260: CHECK_BUFFER(in);
261:
262: used = in->cur - in->buf->buffer->content;
263: if (used > INPUT_CHUNK) {
1.110 daniel 264: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 265: if (ret > 0) {
266: in->cur -= ret;
267: in->consumed += ret;
268: }
269: }
270:
271: CHECK_BUFFER(in);
272:
273: if (in->buf->buffer->use > INPUT_CHUNK) {
274: return;
275: }
276: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
277: if (in->base != in->buf->buffer->content) {
278: /*
279: * the buffer has been realloced
280: */
281: index = in->cur - in->base;
282: in->base = in->buf->buffer->content;
283: in->cur = &in->buf->buffer->content[index];
284: }
285:
286: CHECK_BUFFER(in);
287: }
288:
1.45 daniel 289: /************************************************************************
290: * *
291: * Parser stacks related functions and macros *
292: * *
293: ************************************************************************/
1.79 daniel 294:
295: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 296: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 297: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 298: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
299: const xmlChar ** str);
1.79 daniel 300:
1.1 veillard 301: /*
1.40 daniel 302: * Generic function for accessing stacks in the Parser Context
1.1 veillard 303: */
304:
1.140 daniel 305: #define PUSH_AND_POP(scope, type, name) \
306: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 307: if (ctxt->name##Nr >= ctxt->name##Max) { \
308: ctxt->name##Max *= 2; \
1.119 daniel 309: ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 310: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
311: if (ctxt->name##Tab == NULL) { \
1.31 daniel 312: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 313: return(0); \
1.31 daniel 314: } \
315: } \
1.40 daniel 316: ctxt->name##Tab[ctxt->name##Nr] = value; \
317: ctxt->name = value; \
318: return(ctxt->name##Nr++); \
1.31 daniel 319: } \
1.140 daniel 320: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 321: type ret; \
1.40 daniel 322: if (ctxt->name##Nr <= 0) return(0); \
323: ctxt->name##Nr--; \
1.50 daniel 324: if (ctxt->name##Nr > 0) \
325: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
326: else \
327: ctxt->name = NULL; \
1.69 daniel 328: ret = ctxt->name##Tab[ctxt->name##Nr]; \
329: ctxt->name##Tab[ctxt->name##Nr] = 0; \
330: return(ret); \
1.31 daniel 331: } \
332:
1.140 daniel 333: PUSH_AND_POP(extern, xmlParserInputPtr, input)
334: PUSH_AND_POP(extern, xmlNodePtr, node)
335: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 336:
1.176 daniel 337: int spacePush(xmlParserCtxtPtr ctxt, int val) {
338: if (ctxt->spaceNr >= ctxt->spaceMax) {
339: ctxt->spaceMax *= 2;
340: ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
341: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
342: if (ctxt->spaceTab == NULL) {
343: fprintf(stderr, "realloc failed !\n");
344: return(0);
345: }
346: }
347: ctxt->spaceTab[ctxt->spaceNr] = val;
348: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
349: return(ctxt->spaceNr++);
350: }
351:
352: int spacePop(xmlParserCtxtPtr ctxt) {
353: int ret;
354: if (ctxt->spaceNr <= 0) return(0);
355: ctxt->spaceNr--;
356: if (ctxt->spaceNr > 0)
357: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
358: else
359: ctxt->space = NULL;
360: ret = ctxt->spaceTab[ctxt->spaceNr];
361: ctxt->spaceTab[ctxt->spaceNr] = -1;
362: return(ret);
363: }
364:
1.55 daniel 365: /*
366: * Macros for accessing the content. Those should be used only by the parser,
367: * and not exported.
368: *
369: * Dirty macros, i.e. one need to make assumption on the context to use them
370: *
1.123 daniel 371: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 372: * To be used with extreme caution since operations consuming
373: * characters may move the input buffer to a different location !
1.123 daniel 374: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 375: * in ISO-Latin or UTF-8.
1.151 daniel 376: * This should be used internally by the parser
1.55 daniel 377: * only to compare to ASCII values otherwise it would break when
378: * running with UTF-8 encoding.
1.123 daniel 379: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 380: * to compare on ASCII based substring.
1.123 daniel 381: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 382: * strings within the parser.
383: *
1.77 daniel 384: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 385: *
386: * NEXT Skip to the next character, this does the proper decoding
387: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 388: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 389: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 390: */
1.45 daniel 391:
1.152 daniel 392: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 393: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 394: #define NXT(val) ctxt->input->cur[(val)]
395: #define CUR_PTR ctxt->input->cur
1.154 daniel 396:
1.164 daniel 397: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
398: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 399: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
400: if ((*ctxt->input->cur == 0) && \
401: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
402: xmlPopInput(ctxt)
1.164 daniel 403:
1.97 daniel 404: #define SHRINK xmlParserInputShrink(ctxt->input); \
405: if ((*ctxt->input->cur == 0) && \
406: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
407: xmlPopInput(ctxt)
408:
409: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
410: if ((*ctxt->input->cur == 0) && \
411: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
412: xmlPopInput(ctxt)
1.55 daniel 413:
1.155 daniel 414: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 415:
1.151 daniel 416: #define NEXT xmlNextChar(ctxt);
1.154 daniel 417:
1.153 daniel 418: #define NEXTL(l) \
419: if (*(ctxt->input->cur) == '\n') { \
420: ctxt->input->line++; ctxt->input->col = 1; \
421: } else ctxt->input->col++; \
1.154 daniel 422: ctxt->token = 0; ctxt->input->cur += l; \
423: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
424: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
425:
1.152 daniel 426: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 427: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 428:
1.152 daniel 429: #define COPY_BUF(l,b,i,v) \
430: if (l == 1) b[i++] = (xmlChar) v; \
431: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 432:
433: /**
434: * xmlNextChar:
435: * @ctxt: the XML parser context
436: *
437: * Skip to the next char input char.
438: */
1.55 daniel 439:
1.151 daniel 440: void
441: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.176 daniel 442: /*
443: * TODO: 2.11 End-of-Line Handling
444: * the literal two-character sequence "#xD#xA" or a standalone
445: * literal #xD, an XML processor must pass to the application
446: * the single character #xA.
447: */
1.151 daniel 448: if (ctxt->token != 0) ctxt->token = 0;
449: else {
450: if ((*ctxt->input->cur == 0) &&
451: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
452: (ctxt->instate != XML_PARSER_COMMENT)) {
453: /*
454: * If we are at the end of the current entity and
455: * the context allows it, we pop consumed entities
456: * automatically.
457: * TODO: the auto closing should be blocked in other cases
458: */
459: xmlPopInput(ctxt);
460: } else {
461: if (*(ctxt->input->cur) == '\n') {
462: ctxt->input->line++; ctxt->input->col = 1;
463: } else ctxt->input->col++;
1.198 daniel 464: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 465: /*
466: * We are supposed to handle UTF8, check it's valid
467: * From rfc2044: encoding of the Unicode values on UTF-8:
468: *
469: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
470: * 0000 0000-0000 007F 0xxxxxxx
471: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
472: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
473: *
1.160 daniel 474: * Check for the 0x110000 limit too
1.151 daniel 475: */
476: const unsigned char *cur = ctxt->input->cur;
477: unsigned char c;
1.91 daniel 478:
1.151 daniel 479: c = *cur;
480: if (c & 0x80) {
481: if (cur[1] == 0)
482: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
483: if ((cur[1] & 0xc0) != 0x80)
484: goto encoding_error;
485: if ((c & 0xe0) == 0xe0) {
486: unsigned int val;
487:
488: if (cur[2] == 0)
489: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
490: if ((cur[2] & 0xc0) != 0x80)
491: goto encoding_error;
492: if ((c & 0xf0) == 0xf0) {
493: if (cur[3] == 0)
494: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
495: if (((c & 0xf8) != 0xf0) ||
496: ((cur[3] & 0xc0) != 0x80))
497: goto encoding_error;
498: /* 4-byte code */
499: ctxt->input->cur += 4;
500: val = (cur[0] & 0x7) << 18;
501: val |= (cur[1] & 0x3f) << 12;
502: val |= (cur[2] & 0x3f) << 6;
503: val |= cur[3] & 0x3f;
504: } else {
505: /* 3-byte code */
506: ctxt->input->cur += 3;
507: val = (cur[0] & 0xf) << 12;
508: val |= (cur[1] & 0x3f) << 6;
509: val |= cur[2] & 0x3f;
510: }
511: if (((val > 0xd7ff) && (val < 0xe000)) ||
512: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 513: (val >= 0x110000)) {
1.151 daniel 514: if ((ctxt->sax != NULL) &&
515: (ctxt->sax->error != NULL))
516: ctxt->sax->error(ctxt->userData,
1.196 daniel 517: "Char 0x%X out of allowed range\n", val);
1.151 daniel 518: ctxt->errNo = XML_ERR_INVALID_ENCODING;
519: ctxt->wellFormed = 0;
1.180 daniel 520: ctxt->disableSAX = 1;
1.151 daniel 521: }
522: } else
523: /* 2-byte code */
524: ctxt->input->cur += 2;
525: } else
526: /* 1-byte code */
527: ctxt->input->cur++;
528: } else {
529: /*
530: * Assume it's a fixed lenght encoding (1) with
531: * a compatibke encoding for the ASCII set, since
532: * XML constructs only use < 128 chars
533: */
534: ctxt->input->cur++;
535: }
536: ctxt->nbChars++;
537: if (*ctxt->input->cur == 0)
538: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
539: }
540: }
1.154 daniel 541: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
542: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
1.168 daniel 543: if ((*ctxt->input->cur == 0) &&
544: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
545: xmlPopInput(ctxt);
1.151 daniel 546: return;
547: encoding_error:
548: /*
549: * If we detect an UTF8 error that probably mean that the
550: * input encoding didn't get properly advertized in the
551: * declaration header. Report the error and switch the encoding
552: * to ISO-Latin-1 (if you don't like this policy, just declare the
553: * encoding !)
554: */
1.198 daniel 555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.151 daniel 556: ctxt->sax->error(ctxt->userData,
557: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 558: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
559: ctxt->input->cur[0], ctxt->input->cur[1],
560: ctxt->input->cur[2], ctxt->input->cur[3]);
561: }
1.151 daniel 562: ctxt->errNo = XML_ERR_INVALID_ENCODING;
563:
1.198 daniel 564: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.151 daniel 565: ctxt->input->cur++;
566: return;
567: }
1.42 daniel 568:
1.152 daniel 569: /**
570: * xmlCurrentChar:
571: * @ctxt: the XML parser context
572: * @len: pointer to the length of the char read
573: *
574: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 575: * bytes in the input buffer. Implement the end of line normalization:
576: * 2.11 End-of-Line Handling
577: * Wherever an external parsed entity or the literal entity value
578: * of an internal parsed entity contains either the literal two-character
579: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
580: * must pass to the application the single character #xA.
581: * This behavior can conveniently be produced by normalizing all
582: * line breaks to #xA on input, before parsing.)
1.152 daniel 583: *
584: * Returns the current char value and its lenght
585: */
586:
587: int
588: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
589: if (ctxt->token != 0) {
590: *len = 0;
591: return(ctxt->token);
592: }
1.198 daniel 593: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.152 daniel 594: /*
595: * We are supposed to handle UTF8, check it's valid
596: * From rfc2044: encoding of the Unicode values on UTF-8:
597: *
598: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
599: * 0000 0000-0000 007F 0xxxxxxx
600: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
601: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
602: *
1.160 daniel 603: * Check for the 0x110000 limit too
1.152 daniel 604: */
605: const unsigned char *cur = ctxt->input->cur;
606: unsigned char c;
607: unsigned int val;
608:
609: c = *cur;
610: if (c & 0x80) {
611: if (cur[1] == 0)
612: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
613: if ((cur[1] & 0xc0) != 0x80)
614: goto encoding_error;
615: if ((c & 0xe0) == 0xe0) {
616:
617: if (cur[2] == 0)
618: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
619: if ((cur[2] & 0xc0) != 0x80)
620: goto encoding_error;
621: if ((c & 0xf0) == 0xf0) {
622: if (cur[3] == 0)
623: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
624: if (((c & 0xf8) != 0xf0) ||
625: ((cur[3] & 0xc0) != 0x80))
626: goto encoding_error;
627: /* 4-byte code */
628: *len = 4;
629: val = (cur[0] & 0x7) << 18;
630: val |= (cur[1] & 0x3f) << 12;
631: val |= (cur[2] & 0x3f) << 6;
632: val |= cur[3] & 0x3f;
633: } else {
634: /* 3-byte code */
635: *len = 3;
636: val = (cur[0] & 0xf) << 12;
637: val |= (cur[1] & 0x3f) << 6;
638: val |= cur[2] & 0x3f;
639: }
640: } else {
641: /* 2-byte code */
642: *len = 2;
643: val = (cur[0] & 0x1f) << 6;
1.168 daniel 644: val |= cur[1] & 0x3f;
1.152 daniel 645: }
646: if (!IS_CHAR(val)) {
647: if ((ctxt->sax != NULL) &&
648: (ctxt->sax->error != NULL))
649: ctxt->sax->error(ctxt->userData,
1.196 daniel 650: "Char 0x%X out of allowed range\n", val);
1.152 daniel 651: ctxt->errNo = XML_ERR_INVALID_ENCODING;
652: ctxt->wellFormed = 0;
1.180 daniel 653: ctxt->disableSAX = 1;
1.152 daniel 654: }
655: return(val);
656: } else {
657: /* 1-byte code */
658: *len = 1;
1.180 daniel 659: if (*ctxt->input->cur == 0xD) {
660: if (ctxt->input->cur[1] == 0xA) {
661: ctxt->nbChars++;
662: ctxt->input->cur++;
663: }
664: return(0xA);
665: }
1.152 daniel 666: return((int) *ctxt->input->cur);
667: }
668: }
669: /*
670: * Assume it's a fixed lenght encoding (1) with
671: * a compatibke encoding for the ASCII set, since
672: * XML constructs only use < 128 chars
673: */
674: *len = 1;
1.180 daniel 675: if (*ctxt->input->cur == 0xD) {
676: if (ctxt->input->cur[1] == 0xA) {
677: ctxt->nbChars++;
678: ctxt->input->cur++;
679: }
680: return(0xA);
681: }
1.152 daniel 682: return((int) *ctxt->input->cur);
683: encoding_error:
684: /*
685: * If we detect an UTF8 error that probably mean that the
686: * input encoding didn't get properly advertized in the
687: * declaration header. Report the error and switch the encoding
688: * to ISO-Latin-1 (if you don't like this policy, just declare the
689: * encoding !)
690: */
1.198 daniel 691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.152 daniel 692: ctxt->sax->error(ctxt->userData,
693: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 694: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
695: ctxt->input->cur[0], ctxt->input->cur[1],
696: ctxt->input->cur[2], ctxt->input->cur[3]);
697: }
1.152 daniel 698: ctxt->errNo = XML_ERR_INVALID_ENCODING;
699:
1.198 daniel 700: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.152 daniel 701: *len = 1;
702: return((int) *ctxt->input->cur);
703: }
704:
705: /**
1.162 daniel 706: * xmlStringCurrentChar:
707: * @ctxt: the XML parser context
708: * @cur: pointer to the beginning of the char
709: * @len: pointer to the length of the char read
710: *
711: * The current char value, if using UTF-8 this may actaully span multiple
712: * bytes in the input buffer.
713: *
714: * Returns the current char value and its lenght
715: */
716:
717: int
718: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1.198 daniel 719: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.162 daniel 720: /*
721: * We are supposed to handle UTF8, check it's valid
722: * From rfc2044: encoding of the Unicode values on UTF-8:
723: *
724: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
725: * 0000 0000-0000 007F 0xxxxxxx
726: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
727: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
728: *
729: * Check for the 0x110000 limit too
730: */
731: unsigned char c;
732: unsigned int val;
733:
734: c = *cur;
735: if (c & 0x80) {
736: if ((cur[1] & 0xc0) != 0x80)
737: goto encoding_error;
738: if ((c & 0xe0) == 0xe0) {
739:
740: if ((cur[2] & 0xc0) != 0x80)
741: goto encoding_error;
742: if ((c & 0xf0) == 0xf0) {
743: if (((c & 0xf8) != 0xf0) ||
744: ((cur[3] & 0xc0) != 0x80))
745: goto encoding_error;
746: /* 4-byte code */
747: *len = 4;
748: val = (cur[0] & 0x7) << 18;
749: val |= (cur[1] & 0x3f) << 12;
750: val |= (cur[2] & 0x3f) << 6;
751: val |= cur[3] & 0x3f;
752: } else {
753: /* 3-byte code */
754: *len = 3;
755: val = (cur[0] & 0xf) << 12;
756: val |= (cur[1] & 0x3f) << 6;
757: val |= cur[2] & 0x3f;
758: }
759: } else {
760: /* 2-byte code */
761: *len = 2;
762: val = (cur[0] & 0x1f) << 6;
763: val |= cur[2] & 0x3f;
764: }
765: if (!IS_CHAR(val)) {
766: if ((ctxt->sax != NULL) &&
767: (ctxt->sax->error != NULL))
768: ctxt->sax->error(ctxt->userData,
1.196 daniel 769: "Char 0x%X out of allowed range\n", val);
1.162 daniel 770: ctxt->errNo = XML_ERR_INVALID_ENCODING;
771: ctxt->wellFormed = 0;
1.180 daniel 772: ctxt->disableSAX = 1;
1.162 daniel 773: }
774: return(val);
775: } else {
776: /* 1-byte code */
777: *len = 1;
778: return((int) *cur);
779: }
780: }
781: /*
782: * Assume it's a fixed lenght encoding (1) with
783: * a compatibke encoding for the ASCII set, since
784: * XML constructs only use < 128 chars
785: */
786: *len = 1;
787: return((int) *cur);
788: encoding_error:
789: /*
790: * If we detect an UTF8 error that probably mean that the
791: * input encoding didn't get properly advertized in the
792: * declaration header. Report the error and switch the encoding
793: * to ISO-Latin-1 (if you don't like this policy, just declare the
794: * encoding !)
795: */
1.198 daniel 796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.162 daniel 797: ctxt->sax->error(ctxt->userData,
798: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 799: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
800: ctxt->input->cur[0], ctxt->input->cur[1],
801: ctxt->input->cur[2], ctxt->input->cur[3]);
802: }
1.162 daniel 803: ctxt->errNo = XML_ERR_INVALID_ENCODING;
804:
805: *len = 1;
806: return((int) *cur);
807: }
808:
809: /**
1.152 daniel 810: * xmlCopyChar:
811: * @len: pointer to the length of the char read (or zero)
812: * @array: pointer to an arry of xmlChar
813: * @val: the char value
814: *
815: * append the char value in the array
816: *
817: * Returns the number of xmlChar written
818: */
819:
820: int
821: xmlCopyChar(int len, xmlChar *out, int val) {
822: /*
823: * We are supposed to handle UTF8, check it's valid
824: * From rfc2044: encoding of the Unicode values on UTF-8:
825: *
826: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
827: * 0000 0000-0000 007F 0xxxxxxx
828: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
829: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
830: */
831: if (len == 0) {
832: if (val < 0) len = 0;
1.160 daniel 833: else if (val < 0x80) len = 1;
834: else if (val < 0x800) len = 2;
835: else if (val < 0x10000) len = 3;
836: else if (val < 0x110000) len = 4;
1.152 daniel 837: if (len == 0) {
838: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
839: val);
840: return(0);
841: }
842: }
843: if (len > 1) {
844: int bits;
845:
846: if (val < 0x80) { *out++= val; bits= -6; }
847: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
848: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
849: else { *out++= (val >> 18) | 0xF0; bits= 12; }
850:
851: for ( ; bits >= 0; bits-= 6)
852: *out++= ((val >> bits) & 0x3F) | 0x80 ;
853:
854: return(len);
855: }
856: *out = (xmlChar) val;
857: return(1);
1.155 daniel 858: }
859:
860: /**
861: * xmlSkipBlankChars:
862: * @ctxt: the XML parser context
863: *
864: * skip all blanks character found at that point in the input streams.
865: * It pops up finished entities in the process if allowable at that point.
866: *
867: * Returns the number of space chars skipped
868: */
869:
870: int
871: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
872: int cur, res = 0;
873:
874: do {
875: cur = CUR;
876: while (IS_BLANK(cur)) {
877: NEXT;
878: cur = CUR;
879: res++;
880: }
881: while ((cur == 0) && (ctxt->inputNr > 1) &&
882: (ctxt->instate != XML_PARSER_COMMENT)) {
883: xmlPopInput(ctxt);
884: cur = CUR;
885: }
886: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
887: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
888: } while (IS_BLANK(cur));
889: return(res);
1.152 daniel 890: }
891:
1.97 daniel 892: /************************************************************************
893: * *
894: * Commodity functions to handle entities processing *
895: * *
896: ************************************************************************/
1.40 daniel 897:
1.50 daniel 898: /**
899: * xmlPopInput:
900: * @ctxt: an XML parser context
901: *
1.40 daniel 902: * xmlPopInput: the current input pointed by ctxt->input came to an end
903: * pop it and return the next char.
1.45 daniel 904: *
1.123 daniel 905: * Returns the current xmlChar in the parser context
1.40 daniel 906: */
1.123 daniel 907: xmlChar
1.55 daniel 908: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 909: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 910: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 911: if ((*ctxt->input->cur == 0) &&
912: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
913: return(xmlPopInput(ctxt));
1.40 daniel 914: return(CUR);
915: }
916:
1.50 daniel 917: /**
918: * xmlPushInput:
919: * @ctxt: an XML parser context
920: * @input: an XML parser input fragment (entity, XML fragment ...).
921: *
1.40 daniel 922: * xmlPushInput: switch to a new input stream which is stacked on top
923: * of the previous one(s).
924: */
1.55 daniel 925: void
926: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 927: if (input == NULL) return;
928: inputPush(ctxt, input);
1.164 daniel 929: GROW;
1.40 daniel 930: }
931:
1.50 daniel 932: /**
1.69 daniel 933: * xmlFreeInputStream:
1.127 daniel 934: * @input: an xmlParserInputPtr
1.69 daniel 935: *
936: * Free up an input stream.
937: */
938: void
939: xmlFreeInputStream(xmlParserInputPtr input) {
940: if (input == NULL) return;
941:
1.119 daniel 942: if (input->filename != NULL) xmlFree((char *) input->filename);
943: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 944: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 945: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 946: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 947: input->free((xmlChar *) input->base);
1.93 veillard 948: if (input->buf != NULL)
949: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 950: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 951: xmlFree(input);
1.69 daniel 952: }
953:
954: /**
1.96 daniel 955: * xmlNewInputStream:
956: * @ctxt: an XML parser context
957: *
958: * Create a new input stream structure
959: * Returns the new input stream or NULL
960: */
961: xmlParserInputPtr
962: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
963: xmlParserInputPtr input;
964:
1.119 daniel 965: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 966: if (input == NULL) {
1.190 daniel 967: if (ctxt != NULL) {
968: ctxt->errNo = XML_ERR_NO_MEMORY;
969: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
970: ctxt->sax->error(ctxt->userData,
971: "malloc: couldn't allocate a new input stream\n");
972: ctxt->errNo = XML_ERR_NO_MEMORY;
973: }
1.96 daniel 974: return(NULL);
975: }
1.165 daniel 976: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 977: input->line = 1;
978: input->col = 1;
1.167 daniel 979: input->standalone = -1;
1.96 daniel 980: return(input);
981: }
982:
983: /**
1.190 daniel 984: * xmlNewIOInputStream:
985: * @ctxt: an XML parser context
986: * @input: an I/O Input
987: * @enc: the charset encoding if known
988: *
989: * Create a new input stream structure encapsulating the @input into
990: * a stream suitable for the parser.
991: *
992: * Returns the new input stream or NULL
993: */
994: xmlParserInputPtr
995: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
996: xmlCharEncoding enc) {
997: xmlParserInputPtr inputStream;
998:
999: inputStream = xmlNewInputStream(ctxt);
1000: if (inputStream == NULL) {
1001: return(NULL);
1002: }
1003: inputStream->filename = NULL;
1004: inputStream->buf = input;
1005: inputStream->base = inputStream->buf->buffer->content;
1006: inputStream->cur = inputStream->buf->buffer->content;
1007: if (enc != XML_CHAR_ENCODING_NONE) {
1008: xmlSwitchEncoding(ctxt, enc);
1009: }
1010:
1011: return(inputStream);
1012: }
1013:
1014: /**
1.50 daniel 1015: * xmlNewEntityInputStream:
1016: * @ctxt: an XML parser context
1017: * @entity: an Entity pointer
1018: *
1.82 daniel 1019: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 1020: *
1021: * Returns the new input stream or NULL
1.45 daniel 1022: */
1.50 daniel 1023: xmlParserInputPtr
1024: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1025: xmlParserInputPtr input;
1026:
1027: if (entity == NULL) {
1.123 daniel 1028: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1029: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1030: ctxt->sax->error(ctxt->userData,
1.45 daniel 1031: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 1032: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 1033: return(NULL);
1.45 daniel 1034: }
1035: if (entity->content == NULL) {
1.159 daniel 1036: switch (entity->etype) {
1.113 daniel 1037: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 1038: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 1039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1040: ctxt->sax->error(ctxt->userData,
1041: "xmlNewEntityInputStream unparsed entity !\n");
1042: break;
1043: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1044: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1045: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1046: (char *) entity->ExternalID, ctxt));
1.113 daniel 1047: case XML_INTERNAL_GENERAL_ENTITY:
1048: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1049: ctxt->sax->error(ctxt->userData,
1050: "Internal entity %s without content !\n", entity->name);
1051: break;
1052: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1053: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1055: ctxt->sax->error(ctxt->userData,
1056: "Internal parameter entity %s without content !\n", entity->name);
1057: break;
1058: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1059: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1061: ctxt->sax->error(ctxt->userData,
1062: "Predefined entity %s without content !\n", entity->name);
1063: break;
1064: }
1.50 daniel 1065: return(NULL);
1.45 daniel 1066: }
1.96 daniel 1067: input = xmlNewInputStream(ctxt);
1.45 daniel 1068: if (input == NULL) {
1.50 daniel 1069: return(NULL);
1.45 daniel 1070: }
1.156 daniel 1071: input->filename = (char *) entity->SystemID;
1.45 daniel 1072: input->base = entity->content;
1073: input->cur = entity->content;
1.140 daniel 1074: input->length = entity->length;
1.50 daniel 1075: return(input);
1.45 daniel 1076: }
1077:
1.59 daniel 1078: /**
1079: * xmlNewStringInputStream:
1080: * @ctxt: an XML parser context
1.96 daniel 1081: * @buffer: an memory buffer
1.59 daniel 1082: *
1083: * Create a new input stream based on a memory buffer.
1.68 daniel 1084: * Returns the new input stream
1.59 daniel 1085: */
1086: xmlParserInputPtr
1.123 daniel 1087: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1088: xmlParserInputPtr input;
1089:
1.96 daniel 1090: if (buffer == NULL) {
1.123 daniel 1091: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1093: ctxt->sax->error(ctxt->userData,
1.59 daniel 1094: "internal: xmlNewStringInputStream string = NULL\n");
1095: return(NULL);
1096: }
1.96 daniel 1097: input = xmlNewInputStream(ctxt);
1.59 daniel 1098: if (input == NULL) {
1099: return(NULL);
1100: }
1.96 daniel 1101: input->base = buffer;
1102: input->cur = buffer;
1.140 daniel 1103: input->length = xmlStrlen(buffer);
1.59 daniel 1104: return(input);
1105: }
1106:
1.76 daniel 1107: /**
1108: * xmlNewInputFromFile:
1109: * @ctxt: an XML parser context
1110: * @filename: the filename to use as entity
1111: *
1112: * Create a new input stream based on a file.
1113: *
1114: * Returns the new input stream or NULL in case of error
1115: */
1116: xmlParserInputPtr
1.79 daniel 1117: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1118: xmlParserInputBufferPtr buf;
1.76 daniel 1119: xmlParserInputPtr inputStream;
1.111 daniel 1120: char *directory = NULL;
1.76 daniel 1121:
1.96 daniel 1122: if (ctxt == NULL) return(NULL);
1.91 daniel 1123: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1124: if (buf == NULL) {
1.140 daniel 1125: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1126:
1.94 daniel 1127: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1128: #ifdef WIN32
1129: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1130: #else
1131: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1132: #endif
1133: buf = xmlParserInputBufferCreateFilename(name,
1134: XML_CHAR_ENCODING_NONE);
1.106 daniel 1135: if (buf != NULL)
1.142 daniel 1136: directory = xmlParserGetDirectory(name);
1.106 daniel 1137: }
1138: if ((buf == NULL) && (ctxt->directory != NULL)) {
1139: #ifdef WIN32
1140: sprintf(name, "%s\\%s", ctxt->directory, filename);
1141: #else
1142: sprintf(name, "%s/%s", ctxt->directory, filename);
1143: #endif
1144: buf = xmlParserInputBufferCreateFilename(name,
1145: XML_CHAR_ENCODING_NONE);
1146: if (buf != NULL)
1.142 daniel 1147: directory = xmlParserGetDirectory(name);
1.106 daniel 1148: }
1149: if (buf == NULL)
1.94 daniel 1150: return(NULL);
1151: }
1152: if (directory == NULL)
1153: directory = xmlParserGetDirectory(filename);
1.76 daniel 1154:
1.96 daniel 1155: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1156: if (inputStream == NULL) {
1.119 daniel 1157: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1158: return(NULL);
1159: }
1160:
1.119 daniel 1161: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1162: inputStream->directory = directory;
1.91 daniel 1163: inputStream->buf = buf;
1.76 daniel 1164:
1.91 daniel 1165: inputStream->base = inputStream->buf->buffer->content;
1166: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1167: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1168: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1169: return(inputStream);
1170: }
1171:
1.77 daniel 1172: /************************************************************************
1173: * *
1.97 daniel 1174: * Commodity functions to handle parser contexts *
1175: * *
1176: ************************************************************************/
1177:
1178: /**
1179: * xmlInitParserCtxt:
1180: * @ctxt: an XML parser context
1181: *
1182: * Initialize a parser context
1183: */
1184:
1185: void
1186: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1187: {
1188: xmlSAXHandler *sax;
1189:
1.168 daniel 1190: xmlDefaultSAXHandlerInit();
1191:
1.119 daniel 1192: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1193: if (sax == NULL) {
1194: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1195: }
1.180 daniel 1196: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1197:
1198: /* Allocate the Input stack */
1.119 daniel 1199: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1200: ctxt->inputNr = 0;
1201: ctxt->inputMax = 5;
1202: ctxt->input = NULL;
1.165 daniel 1203:
1.97 daniel 1204: ctxt->version = NULL;
1205: ctxt->encoding = NULL;
1206: ctxt->standalone = -1;
1.98 daniel 1207: ctxt->hasExternalSubset = 0;
1208: ctxt->hasPErefs = 0;
1.97 daniel 1209: ctxt->html = 0;
1.98 daniel 1210: ctxt->external = 0;
1.140 daniel 1211: ctxt->instate = XML_PARSER_START;
1.97 daniel 1212: ctxt->token = 0;
1.106 daniel 1213: ctxt->directory = NULL;
1.97 daniel 1214:
1215: /* Allocate the Node stack */
1.119 daniel 1216: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1217: ctxt->nodeNr = 0;
1218: ctxt->nodeMax = 10;
1219: ctxt->node = NULL;
1220:
1.140 daniel 1221: /* Allocate the Name stack */
1222: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1223: ctxt->nameNr = 0;
1224: ctxt->nameMax = 10;
1225: ctxt->name = NULL;
1226:
1.176 daniel 1227: /* Allocate the space stack */
1228: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1229: ctxt->spaceNr = 1;
1230: ctxt->spaceMax = 10;
1231: ctxt->spaceTab[0] = -1;
1232: ctxt->space = &ctxt->spaceTab[0];
1233:
1.160 daniel 1234: if (sax == NULL) {
1235: ctxt->sax = &xmlDefaultSAXHandler;
1236: } else {
1.97 daniel 1237: ctxt->sax = sax;
1238: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1239: }
1240: ctxt->userData = ctxt;
1241: ctxt->myDoc = NULL;
1242: ctxt->wellFormed = 1;
1.99 daniel 1243: ctxt->valid = 1;
1.100 daniel 1244: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1245: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1246: ctxt->vctxt.userData = ctxt;
1.149 daniel 1247: if (ctxt->validate) {
1248: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1249: if (xmlGetWarningsDefaultValue == 0)
1250: ctxt->vctxt.warning = NULL;
1251: else
1252: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1253: /* Allocate the Node stack */
1254: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1255: ctxt->vctxt.nodeNr = 0;
1256: ctxt->vctxt.nodeMax = 4;
1257: ctxt->vctxt.node = NULL;
1.149 daniel 1258: } else {
1259: ctxt->vctxt.error = NULL;
1260: ctxt->vctxt.warning = NULL;
1261: }
1.97 daniel 1262: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1263: ctxt->record_info = 0;
1.135 daniel 1264: ctxt->nbChars = 0;
1.140 daniel 1265: ctxt->checkIndex = 0;
1.180 daniel 1266: ctxt->inSubset = 0;
1.140 daniel 1267: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1268: ctxt->depth = 0;
1.198 daniel 1269: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.97 daniel 1270: xmlInitNodeInfoSeq(&ctxt->node_seq);
1271: }
1272:
1273: /**
1274: * xmlFreeParserCtxt:
1275: * @ctxt: an XML parser context
1276: *
1277: * Free all the memory used by a parser context. However the parsed
1278: * document in ctxt->myDoc is not freed.
1279: */
1280:
1281: void
1282: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1283: {
1284: xmlParserInputPtr input;
1.140 daniel 1285: xmlChar *oldname;
1.97 daniel 1286:
1287: if (ctxt == NULL) return;
1288:
1289: while ((input = inputPop(ctxt)) != NULL) {
1290: xmlFreeInputStream(input);
1291: }
1.140 daniel 1292: while ((oldname = namePop(ctxt)) != NULL) {
1293: xmlFree(oldname);
1294: }
1.176 daniel 1295: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1296: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1297: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1298: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1299: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1300: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1301: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1302: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1303: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1304: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1305: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1306: xmlFree(ctxt->sax);
1307: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1308: xmlFree(ctxt);
1.97 daniel 1309: }
1310:
1311: /**
1312: * xmlNewParserCtxt:
1313: *
1314: * Allocate and initialize a new parser context.
1315: *
1316: * Returns the xmlParserCtxtPtr or NULL
1317: */
1318:
1319: xmlParserCtxtPtr
1320: xmlNewParserCtxt()
1321: {
1322: xmlParserCtxtPtr ctxt;
1323:
1.119 daniel 1324: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1325: if (ctxt == NULL) {
1326: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1327: perror("malloc");
1328: return(NULL);
1329: }
1.165 daniel 1330: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1331: xmlInitParserCtxt(ctxt);
1332: return(ctxt);
1333: }
1334:
1335: /**
1336: * xmlClearParserCtxt:
1337: * @ctxt: an XML parser context
1338: *
1339: * Clear (release owned resources) and reinitialize a parser context
1340: */
1341:
1342: void
1343: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1344: {
1345: xmlClearNodeInfoSeq(&ctxt->node_seq);
1346: xmlInitParserCtxt(ctxt);
1347: }
1348:
1349: /************************************************************************
1350: * *
1.77 daniel 1351: * Commodity functions to handle entities *
1352: * *
1353: ************************************************************************/
1354:
1.174 daniel 1355: /**
1356: * xmlCheckEntity:
1357: * @ctxt: an XML parser context
1358: * @content: the entity content string
1359: *
1360: * Parse an entity content and checks the WF constraints
1361: *
1362: */
1363:
1364: void
1365: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1366: }
1.97 daniel 1367:
1368: /**
1369: * xmlParseCharRef:
1370: * @ctxt: an XML parser context
1371: *
1372: * parse Reference declarations
1373: *
1374: * [66] CharRef ::= '&#' [0-9]+ ';' |
1375: * '&#x' [0-9a-fA-F]+ ';'
1376: *
1.98 daniel 1377: * [ WFC: Legal Character ]
1378: * Characters referred to using character references must match the
1379: * production for Char.
1380: *
1.135 daniel 1381: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1382: */
1.97 daniel 1383: int
1384: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1385: int val = 0;
1386:
1.111 daniel 1387: if (ctxt->token != 0) {
1388: val = ctxt->token;
1389: ctxt->token = 0;
1390: return(val);
1391: }
1.152 daniel 1392: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1393: (NXT(2) == 'x')) {
1394: SKIP(3);
1.152 daniel 1395: while (RAW != ';') {
1396: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1397: val = val * 16 + (CUR - '0');
1.152 daniel 1398: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1399: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1400: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1401: val = val * 16 + (CUR - 'A') + 10;
1402: else {
1.123 daniel 1403: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1405: ctxt->sax->error(ctxt->userData,
1406: "xmlParseCharRef: invalid hexadecimal value\n");
1407: ctxt->wellFormed = 0;
1.180 daniel 1408: ctxt->disableSAX = 1;
1.97 daniel 1409: val = 0;
1410: break;
1411: }
1412: NEXT;
1413: }
1.164 daniel 1414: if (RAW == ';') {
1415: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1416: ctxt->nbChars ++;
1417: ctxt->input->cur++;
1418: }
1.152 daniel 1419: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1420: SKIP(2);
1.152 daniel 1421: while (RAW != ';') {
1422: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1423: val = val * 10 + (CUR - '0');
1424: else {
1.123 daniel 1425: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1426: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1427: ctxt->sax->error(ctxt->userData,
1428: "xmlParseCharRef: invalid decimal value\n");
1429: ctxt->wellFormed = 0;
1.180 daniel 1430: ctxt->disableSAX = 1;
1.97 daniel 1431: val = 0;
1432: break;
1433: }
1434: NEXT;
1435: }
1.164 daniel 1436: if (RAW == ';') {
1437: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1438: ctxt->nbChars ++;
1439: ctxt->input->cur++;
1440: }
1.97 daniel 1441: } else {
1.123 daniel 1442: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1444: ctxt->sax->error(ctxt->userData,
1445: "xmlParseCharRef: invalid value\n");
1.97 daniel 1446: ctxt->wellFormed = 0;
1.180 daniel 1447: ctxt->disableSAX = 1;
1.97 daniel 1448: }
1.98 daniel 1449:
1.97 daniel 1450: /*
1.98 daniel 1451: * [ WFC: Legal Character ]
1452: * Characters referred to using character references must match the
1453: * production for Char.
1.97 daniel 1454: */
1455: if (IS_CHAR(val)) {
1456: return(val);
1457: } else {
1.123 daniel 1458: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1459: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1460: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1461: val);
1462: ctxt->wellFormed = 0;
1.180 daniel 1463: ctxt->disableSAX = 1;
1.97 daniel 1464: }
1465: return(0);
1.77 daniel 1466: }
1467:
1.96 daniel 1468: /**
1.135 daniel 1469: * xmlParseStringCharRef:
1470: * @ctxt: an XML parser context
1471: * @str: a pointer to an index in the string
1472: *
1473: * parse Reference declarations, variant parsing from a string rather
1474: * than an an input flow.
1475: *
1476: * [66] CharRef ::= '&#' [0-9]+ ';' |
1477: * '&#x' [0-9a-fA-F]+ ';'
1478: *
1479: * [ WFC: Legal Character ]
1480: * Characters referred to using character references must match the
1481: * production for Char.
1482: *
1483: * Returns the value parsed (as an int), 0 in case of error, str will be
1484: * updated to the current value of the index
1485: */
1486: int
1487: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1488: const xmlChar *ptr;
1489: xmlChar cur;
1490: int val = 0;
1491:
1492: if ((str == NULL) || (*str == NULL)) return(0);
1493: ptr = *str;
1494: cur = *ptr;
1.137 daniel 1495: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1496: ptr += 3;
1497: cur = *ptr;
1498: while (cur != ';') {
1499: if ((cur >= '0') && (cur <= '9'))
1500: val = val * 16 + (cur - '0');
1501: else if ((cur >= 'a') && (cur <= 'f'))
1502: val = val * 16 + (cur - 'a') + 10;
1503: else if ((cur >= 'A') && (cur <= 'F'))
1504: val = val * 16 + (cur - 'A') + 10;
1505: else {
1506: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1507: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1508: ctxt->sax->error(ctxt->userData,
1.198 daniel 1509: "xmlParseStringCharRef: invalid hexadecimal value\n");
1.135 daniel 1510: ctxt->wellFormed = 0;
1.180 daniel 1511: ctxt->disableSAX = 1;
1.135 daniel 1512: val = 0;
1513: break;
1514: }
1515: ptr++;
1516: cur = *ptr;
1517: }
1518: if (cur == ';')
1519: ptr++;
1.145 daniel 1520: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1521: ptr += 2;
1522: cur = *ptr;
1523: while (cur != ';') {
1524: if ((cur >= '0') && (cur <= '9'))
1525: val = val * 10 + (cur - '0');
1526: else {
1527: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1528: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1529: ctxt->sax->error(ctxt->userData,
1.198 daniel 1530: "xmlParseStringCharRef: invalid decimal value\n");
1.135 daniel 1531: ctxt->wellFormed = 0;
1.180 daniel 1532: ctxt->disableSAX = 1;
1.135 daniel 1533: val = 0;
1534: break;
1535: }
1536: ptr++;
1537: cur = *ptr;
1538: }
1539: if (cur == ';')
1540: ptr++;
1541: } else {
1542: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1543: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1544: ctxt->sax->error(ctxt->userData,
1545: "xmlParseCharRef: invalid value\n");
1546: ctxt->wellFormed = 0;
1.180 daniel 1547: ctxt->disableSAX = 1;
1.135 daniel 1548: return(0);
1549: }
1550: *str = ptr;
1551:
1552: /*
1553: * [ WFC: Legal Character ]
1554: * Characters referred to using character references must match the
1555: * production for Char.
1556: */
1557: if (IS_CHAR(val)) {
1558: return(val);
1559: } else {
1560: ctxt->errNo = XML_ERR_INVALID_CHAR;
1561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1562: ctxt->sax->error(ctxt->userData,
1563: "CharRef: invalid xmlChar value %d\n", val);
1564: ctxt->wellFormed = 0;
1.180 daniel 1565: ctxt->disableSAX = 1;
1.135 daniel 1566: }
1567: return(0);
1568: }
1569:
1570: /**
1.96 daniel 1571: * xmlParserHandleReference:
1572: * @ctxt: the parser context
1573: *
1.97 daniel 1574: * [67] Reference ::= EntityRef | CharRef
1575: *
1.96 daniel 1576: * [68] EntityRef ::= '&' Name ';'
1577: *
1.98 daniel 1578: * [ WFC: Entity Declared ]
1579: * the Name given in the entity reference must match that in an entity
1580: * declaration, except that well-formed documents need not declare any
1581: * of the following entities: amp, lt, gt, apos, quot.
1582: *
1583: * [ WFC: Parsed Entity ]
1584: * An entity reference must not contain the name of an unparsed entity
1585: *
1.97 daniel 1586: * [66] CharRef ::= '&#' [0-9]+ ';' |
1587: * '&#x' [0-9a-fA-F]+ ';'
1588: *
1.96 daniel 1589: * A PEReference may have been detectect in the current input stream
1590: * the handling is done accordingly to
1591: * http://www.w3.org/TR/REC-xml#entproc
1592: */
1593: void
1594: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1595: xmlParserInputPtr input;
1.123 daniel 1596: xmlChar *name;
1.97 daniel 1597: xmlEntityPtr ent = NULL;
1598:
1.126 daniel 1599: if (ctxt->token != 0) {
1600: return;
1601: }
1.152 daniel 1602: if (RAW != '&') return;
1.97 daniel 1603: GROW;
1.152 daniel 1604: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1605: switch(ctxt->instate) {
1.140 daniel 1606: case XML_PARSER_ENTITY_DECL:
1607: case XML_PARSER_PI:
1.109 daniel 1608: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1609: case XML_PARSER_COMMENT:
1.168 daniel 1610: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1611: /* we just ignore it there */
1612: return;
1613: case XML_PARSER_START_TAG:
1.109 daniel 1614: return;
1.140 daniel 1615: case XML_PARSER_END_TAG:
1.97 daniel 1616: return;
1617: case XML_PARSER_EOF:
1.123 daniel 1618: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1619: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1620: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1621: ctxt->wellFormed = 0;
1.180 daniel 1622: ctxt->disableSAX = 1;
1.97 daniel 1623: return;
1624: case XML_PARSER_PROLOG:
1.140 daniel 1625: case XML_PARSER_START:
1626: case XML_PARSER_MISC:
1.123 daniel 1627: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1629: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1630: ctxt->wellFormed = 0;
1.180 daniel 1631: ctxt->disableSAX = 1;
1.97 daniel 1632: return;
1633: case XML_PARSER_EPILOG:
1.123 daniel 1634: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1636: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1637: ctxt->wellFormed = 0;
1.180 daniel 1638: ctxt->disableSAX = 1;
1.97 daniel 1639: return;
1640: case XML_PARSER_DTD:
1.123 daniel 1641: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1643: ctxt->sax->error(ctxt->userData,
1644: "CharRef are forbiden in DTDs!\n");
1645: ctxt->wellFormed = 0;
1.180 daniel 1646: ctxt->disableSAX = 1;
1.97 daniel 1647: return;
1648: case XML_PARSER_ENTITY_VALUE:
1649: /*
1650: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1651: * substitution here since we need the literal
1.97 daniel 1652: * entity value to be able to save the internal
1653: * subset of the document.
1654: * This will be handled by xmlDecodeEntities
1655: */
1656: return;
1657: case XML_PARSER_CONTENT:
1658: case XML_PARSER_ATTRIBUTE_VALUE:
1659: ctxt->token = xmlParseCharRef(ctxt);
1660: return;
1661: }
1662: return;
1663: }
1664:
1665: switch(ctxt->instate) {
1.109 daniel 1666: case XML_PARSER_CDATA_SECTION:
1667: return;
1.140 daniel 1668: case XML_PARSER_PI:
1.97 daniel 1669: case XML_PARSER_COMMENT:
1.168 daniel 1670: case XML_PARSER_SYSTEM_LITERAL:
1671: case XML_PARSER_CONTENT:
1.97 daniel 1672: return;
1.140 daniel 1673: case XML_PARSER_START_TAG:
1674: return;
1675: case XML_PARSER_END_TAG:
1676: return;
1.97 daniel 1677: case XML_PARSER_EOF:
1.123 daniel 1678: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1680: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1681: ctxt->wellFormed = 0;
1.180 daniel 1682: ctxt->disableSAX = 1;
1.97 daniel 1683: return;
1684: case XML_PARSER_PROLOG:
1.140 daniel 1685: case XML_PARSER_START:
1686: case XML_PARSER_MISC:
1.123 daniel 1687: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1688: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1689: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1690: ctxt->wellFormed = 0;
1.180 daniel 1691: ctxt->disableSAX = 1;
1.97 daniel 1692: return;
1693: case XML_PARSER_EPILOG:
1.123 daniel 1694: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1696: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1697: ctxt->wellFormed = 0;
1.180 daniel 1698: ctxt->disableSAX = 1;
1.97 daniel 1699: return;
1700: case XML_PARSER_ENTITY_VALUE:
1701: /*
1702: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1703: * substitution here since we need the literal
1.97 daniel 1704: * entity value to be able to save the internal
1705: * subset of the document.
1706: * This will be handled by xmlDecodeEntities
1707: */
1708: return;
1709: case XML_PARSER_ATTRIBUTE_VALUE:
1710: /*
1711: * NOTE: in the case of attributes values, we don't do the
1712: * substitution here unless we are in a mode where
1713: * the parser is explicitely asked to substitute
1714: * entities. The SAX callback is called with values
1715: * without entity substitution.
1716: * This will then be handled by xmlDecodeEntities
1717: */
1.113 daniel 1718: return;
1.97 daniel 1719: case XML_PARSER_ENTITY_DECL:
1720: /*
1721: * we just ignore it there
1722: * the substitution will be done once the entity is referenced
1723: */
1724: return;
1725: case XML_PARSER_DTD:
1.123 daniel 1726: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1728: ctxt->sax->error(ctxt->userData,
1729: "Entity references are forbiden in DTDs!\n");
1730: ctxt->wellFormed = 0;
1.180 daniel 1731: ctxt->disableSAX = 1;
1.97 daniel 1732: return;
1733: }
1734:
1735: NEXT;
1736: name = xmlScanName(ctxt);
1737: if (name == NULL) {
1.123 daniel 1738: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1740: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1741: ctxt->wellFormed = 0;
1.180 daniel 1742: ctxt->disableSAX = 1;
1.97 daniel 1743: ctxt->token = '&';
1744: return;
1745: }
1746: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1747: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1749: ctxt->sax->error(ctxt->userData,
1750: "Entity reference: ';' expected\n");
1751: ctxt->wellFormed = 0;
1.180 daniel 1752: ctxt->disableSAX = 1;
1.97 daniel 1753: ctxt->token = '&';
1.119 daniel 1754: xmlFree(name);
1.97 daniel 1755: return;
1756: }
1757: SKIP(xmlStrlen(name) + 1);
1758: if (ctxt->sax != NULL) {
1759: if (ctxt->sax->getEntity != NULL)
1760: ent = ctxt->sax->getEntity(ctxt->userData, name);
1761: }
1.98 daniel 1762:
1763: /*
1764: * [ WFC: Entity Declared ]
1765: * the Name given in the entity reference must match that in an entity
1766: * declaration, except that well-formed documents need not declare any
1767: * of the following entities: amp, lt, gt, apos, quot.
1768: */
1.97 daniel 1769: if (ent == NULL)
1770: ent = xmlGetPredefinedEntity(name);
1771: if (ent == NULL) {
1.123 daniel 1772: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1774: ctxt->sax->error(ctxt->userData,
1.98 daniel 1775: "Entity reference: entity %s not declared\n",
1776: name);
1.97 daniel 1777: ctxt->wellFormed = 0;
1.180 daniel 1778: ctxt->disableSAX = 1;
1.119 daniel 1779: xmlFree(name);
1.97 daniel 1780: return;
1781: }
1.98 daniel 1782:
1783: /*
1784: * [ WFC: Parsed Entity ]
1785: * An entity reference must not contain the name of an unparsed entity
1786: */
1.159 daniel 1787: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1788: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1790: ctxt->sax->error(ctxt->userData,
1791: "Entity reference to unparsed entity %s\n", name);
1792: ctxt->wellFormed = 0;
1.180 daniel 1793: ctxt->disableSAX = 1;
1.98 daniel 1794: }
1795:
1.159 daniel 1796: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1797: ctxt->token = ent->content[0];
1.119 daniel 1798: xmlFree(name);
1.97 daniel 1799: return;
1800: }
1801: input = xmlNewEntityInputStream(ctxt, ent);
1802: xmlPushInput(ctxt, input);
1.119 daniel 1803: xmlFree(name);
1.96 daniel 1804: return;
1805: }
1806:
1807: /**
1808: * xmlParserHandlePEReference:
1809: * @ctxt: the parser context
1810: *
1811: * [69] PEReference ::= '%' Name ';'
1812: *
1.98 daniel 1813: * [ WFC: No Recursion ]
1814: * TODO A parsed entity must not contain a recursive
1815: * reference to itself, either directly or indirectly.
1816: *
1817: * [ WFC: Entity Declared ]
1818: * In a document without any DTD, a document with only an internal DTD
1819: * subset which contains no parameter entity references, or a document
1820: * with "standalone='yes'", ... ... The declaration of a parameter
1821: * entity must precede any reference to it...
1822: *
1823: * [ VC: Entity Declared ]
1824: * In a document with an external subset or external parameter entities
1825: * with "standalone='no'", ... ... The declaration of a parameter entity
1826: * must precede any reference to it...
1827: *
1828: * [ WFC: In DTD ]
1829: * Parameter-entity references may only appear in the DTD.
1830: * NOTE: misleading but this is handled.
1831: *
1832: * A PEReference may have been detected in the current input stream
1.96 daniel 1833: * the handling is done accordingly to
1834: * http://www.w3.org/TR/REC-xml#entproc
1835: * i.e.
1836: * - Included in literal in entity values
1837: * - Included as Paraemeter Entity reference within DTDs
1838: */
1839: void
1840: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1841: xmlChar *name;
1.96 daniel 1842: xmlEntityPtr entity = NULL;
1843: xmlParserInputPtr input;
1844:
1.126 daniel 1845: if (ctxt->token != 0) {
1846: return;
1847: }
1.152 daniel 1848: if (RAW != '%') return;
1.96 daniel 1849: switch(ctxt->instate) {
1.109 daniel 1850: case XML_PARSER_CDATA_SECTION:
1851: return;
1.97 daniel 1852: case XML_PARSER_COMMENT:
1853: return;
1.140 daniel 1854: case XML_PARSER_START_TAG:
1855: return;
1856: case XML_PARSER_END_TAG:
1857: return;
1.96 daniel 1858: case XML_PARSER_EOF:
1.123 daniel 1859: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1861: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1862: ctxt->wellFormed = 0;
1.180 daniel 1863: ctxt->disableSAX = 1;
1.96 daniel 1864: return;
1865: case XML_PARSER_PROLOG:
1.140 daniel 1866: case XML_PARSER_START:
1867: case XML_PARSER_MISC:
1.123 daniel 1868: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1870: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1871: ctxt->wellFormed = 0;
1.180 daniel 1872: ctxt->disableSAX = 1;
1.96 daniel 1873: return;
1.97 daniel 1874: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1875: case XML_PARSER_CONTENT:
1876: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1877: case XML_PARSER_PI:
1.168 daniel 1878: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1879: /* we just ignore it there */
1880: return;
1881: case XML_PARSER_EPILOG:
1.123 daniel 1882: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1883: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1884: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1885: ctxt->wellFormed = 0;
1.180 daniel 1886: ctxt->disableSAX = 1;
1.96 daniel 1887: return;
1.97 daniel 1888: case XML_PARSER_ENTITY_VALUE:
1889: /*
1890: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1891: * substitution here since we need the literal
1.97 daniel 1892: * entity value to be able to save the internal
1893: * subset of the document.
1894: * This will be handled by xmlDecodeEntities
1895: */
1896: return;
1.96 daniel 1897: case XML_PARSER_DTD:
1.98 daniel 1898: /*
1899: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1900: * In the internal DTD subset, parameter-entity references
1901: * can occur only where markup declarations can occur, not
1902: * within markup declarations.
1903: * In that case this is handled in xmlParseMarkupDecl
1904: */
1905: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1906: return;
1.96 daniel 1907: }
1908:
1909: NEXT;
1910: name = xmlParseName(ctxt);
1911: if (name == NULL) {
1.123 daniel 1912: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1914: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1915: ctxt->wellFormed = 0;
1.180 daniel 1916: ctxt->disableSAX = 1;
1.96 daniel 1917: } else {
1.152 daniel 1918: if (RAW == ';') {
1.96 daniel 1919: NEXT;
1.98 daniel 1920: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1921: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1922: if (entity == NULL) {
1.98 daniel 1923:
1924: /*
1925: * [ WFC: Entity Declared ]
1926: * In a document without any DTD, a document with only an
1927: * internal DTD subset which contains no parameter entity
1928: * references, or a document with "standalone='yes'", ...
1929: * ... The declaration of a parameter entity must precede
1930: * any reference to it...
1931: */
1932: if ((ctxt->standalone == 1) ||
1933: ((ctxt->hasExternalSubset == 0) &&
1934: (ctxt->hasPErefs == 0))) {
1935: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1936: ctxt->sax->error(ctxt->userData,
1937: "PEReference: %%%s; not found\n", name);
1938: ctxt->wellFormed = 0;
1.180 daniel 1939: ctxt->disableSAX = 1;
1.98 daniel 1940: } else {
1941: /*
1942: * [ VC: Entity Declared ]
1943: * In a document with an external subset or external
1944: * parameter entities with "standalone='no'", ...
1945: * ... The declaration of a parameter entity must precede
1946: * any reference to it...
1947: */
1948: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1949: ctxt->sax->warning(ctxt->userData,
1950: "PEReference: %%%s; not found\n", name);
1951: ctxt->valid = 0;
1952: }
1.96 daniel 1953: } else {
1.159 daniel 1954: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1955: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1956: /*
1.156 daniel 1957: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1958: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1959: */
1960: input = xmlNewEntityInputStream(ctxt, entity);
1961: xmlPushInput(ctxt, input);
1.164 daniel 1962: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1963: (RAW == '<') && (NXT(1) == '?') &&
1964: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1965: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1966: xmlParseTextDecl(ctxt);
1.164 daniel 1967: }
1968: if (ctxt->token == 0)
1969: ctxt->token = ' ';
1.96 daniel 1970: } else {
1971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1972: ctxt->sax->error(ctxt->userData,
1973: "xmlHandlePEReference: %s is not a parameter entity\n",
1974: name);
1975: ctxt->wellFormed = 0;
1.180 daniel 1976: ctxt->disableSAX = 1;
1.96 daniel 1977: }
1978: }
1979: } else {
1.123 daniel 1980: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1981: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1982: ctxt->sax->error(ctxt->userData,
1983: "xmlHandlePEReference: expecting ';'\n");
1984: ctxt->wellFormed = 0;
1.180 daniel 1985: ctxt->disableSAX = 1;
1.96 daniel 1986: }
1.119 daniel 1987: xmlFree(name);
1.97 daniel 1988: }
1989: }
1990:
1991: /*
1992: * Macro used to grow the current buffer.
1993: */
1994: #define growBuffer(buffer) { \
1995: buffer##_size *= 2; \
1.145 daniel 1996: buffer = (xmlChar *) \
1997: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 1998: if (buffer == NULL) { \
1999: perror("realloc failed"); \
1.145 daniel 2000: return(NULL); \
1.97 daniel 2001: } \
1.96 daniel 2002: }
1.77 daniel 2003:
2004: /**
2005: * xmlDecodeEntities:
2006: * @ctxt: the parser context
2007: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2008: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 2009: * @end: an end marker xmlChar, 0 if none
2010: * @end2: an end marker xmlChar, 0 if none
2011: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 2012: *
2013: * [67] Reference ::= EntityRef | CharRef
2014: *
2015: * [69] PEReference ::= '%' Name ';'
2016: *
2017: * Returns A newly allocated string with the substitution done. The caller
2018: * must deallocate it !
2019: */
1.123 daniel 2020: xmlChar *
1.77 daniel 2021: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 2022: xmlChar end, xmlChar end2, xmlChar end3) {
2023: xmlChar *buffer = NULL;
1.78 daniel 2024: int buffer_size = 0;
1.161 daniel 2025: int nbchars = 0;
1.78 daniel 2026:
1.123 daniel 2027: xmlChar *current = NULL;
1.77 daniel 2028: xmlEntityPtr ent;
2029: unsigned int max = (unsigned int) len;
1.161 daniel 2030: int c,l;
1.77 daniel 2031:
1.185 daniel 2032: if (ctxt->depth > 40) {
2033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2034: ctxt->sax->error(ctxt->userData,
2035: "Detected entity reference loop\n");
2036: ctxt->wellFormed = 0;
2037: ctxt->disableSAX = 1;
2038: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2039: return(NULL);
2040: }
2041:
1.77 daniel 2042: /*
2043: * allocate a translation buffer.
2044: */
1.140 daniel 2045: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2046: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2047: if (buffer == NULL) {
2048: perror("xmlDecodeEntities: malloc failed");
2049: return(NULL);
2050: }
2051:
1.78 daniel 2052: /*
2053: * Ok loop until we reach one of the ending char or a size limit.
2054: */
1.161 daniel 2055: c = CUR_CHAR(l);
2056: while ((nbchars < max) && (c != end) &&
2057: (c != end2) && (c != end3)) {
1.77 daniel 2058:
1.161 daniel 2059: if (c == 0) break;
2060: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2061: int val = xmlParseCharRef(ctxt);
1.161 daniel 2062: COPY_BUF(0,buffer,nbchars,val);
2063: NEXTL(l);
2064: } else if ((c == '&') && (ctxt->token != '&') &&
2065: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2066: ent = xmlParseEntityRef(ctxt);
2067: if ((ent != NULL) &&
2068: (ctxt->replaceEntities != 0)) {
2069: current = ent->content;
2070: while (*current != 0) {
1.161 daniel 2071: buffer[nbchars++] = *current++;
2072: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2073: growBuffer(buffer);
1.77 daniel 2074: }
2075: }
1.98 daniel 2076: } else if (ent != NULL) {
1.123 daniel 2077: const xmlChar *cur = ent->name;
1.98 daniel 2078:
1.161 daniel 2079: buffer[nbchars++] = '&';
2080: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2081: growBuffer(buffer);
2082: }
1.161 daniel 2083: while (*cur != 0) {
2084: buffer[nbchars++] = *cur++;
2085: }
2086: buffer[nbchars++] = ';';
1.77 daniel 2087: }
1.161 daniel 2088: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2089: /*
1.77 daniel 2090: * a PEReference induce to switch the entity flow,
2091: * we break here to flush the current set of chars
2092: * parsed if any. We will be called back later.
1.97 daniel 2093: */
1.91 daniel 2094: if (nbchars != 0) break;
1.77 daniel 2095:
2096: xmlParsePEReference(ctxt);
1.79 daniel 2097:
1.97 daniel 2098: /*
1.79 daniel 2099: * Pop-up of finished entities.
1.97 daniel 2100: */
1.152 daniel 2101: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2102: xmlPopInput(ctxt);
2103:
1.98 daniel 2104: break;
1.77 daniel 2105: } else {
1.161 daniel 2106: COPY_BUF(l,buffer,nbchars,c);
2107: NEXTL(l);
2108: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2109: growBuffer(buffer);
2110: }
1.77 daniel 2111: }
1.161 daniel 2112: c = CUR_CHAR(l);
1.77 daniel 2113: }
1.161 daniel 2114: buffer[nbchars++] = 0;
1.77 daniel 2115: return(buffer);
2116: }
2117:
1.135 daniel 2118: /**
2119: * xmlStringDecodeEntities:
2120: * @ctxt: the parser context
2121: * @str: the input string
2122: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2123: * @end: an end marker xmlChar, 0 if none
2124: * @end2: an end marker xmlChar, 0 if none
2125: * @end3: an end marker xmlChar, 0 if none
2126: *
2127: * [67] Reference ::= EntityRef | CharRef
2128: *
2129: * [69] PEReference ::= '%' Name ';'
2130: *
2131: * Returns A newly allocated string with the substitution done. The caller
2132: * must deallocate it !
2133: */
2134: xmlChar *
2135: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2136: xmlChar end, xmlChar end2, xmlChar end3) {
2137: xmlChar *buffer = NULL;
2138: int buffer_size = 0;
2139:
2140: xmlChar *current = NULL;
2141: xmlEntityPtr ent;
1.176 daniel 2142: int c,l;
2143: int nbchars = 0;
1.135 daniel 2144:
1.185 daniel 2145: if (ctxt->depth > 40) {
2146: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2147: ctxt->sax->error(ctxt->userData,
2148: "Detected entity reference loop\n");
2149: ctxt->wellFormed = 0;
2150: ctxt->disableSAX = 1;
2151: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2152: return(NULL);
2153: }
2154:
1.135 daniel 2155: /*
2156: * allocate a translation buffer.
2157: */
1.140 daniel 2158: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2159: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2160: if (buffer == NULL) {
2161: perror("xmlDecodeEntities: malloc failed");
2162: return(NULL);
2163: }
2164:
2165: /*
2166: * Ok loop until we reach one of the ending char or a size limit.
2167: */
1.176 daniel 2168: c = CUR_SCHAR(str, l);
2169: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2170:
1.176 daniel 2171: if (c == 0) break;
2172: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2173: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2174: if (val != 0) {
2175: COPY_BUF(0,buffer,nbchars,val);
2176: }
2177: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2178: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2179: if ((ent != NULL) && (ent->content != NULL)) {
2180: xmlChar *rep;
2181:
2182: ctxt->depth++;
2183: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2184: 0, 0, 0);
2185: ctxt->depth--;
2186: if (rep != NULL) {
2187: current = rep;
2188: while (*current != 0) {
2189: buffer[nbchars++] = *current++;
2190: if (nbchars >
2191: buffer_size - XML_PARSER_BUFFER_SIZE) {
2192: growBuffer(buffer);
2193: }
1.135 daniel 2194: }
1.185 daniel 2195: xmlFree(rep);
1.135 daniel 2196: }
2197: } else if (ent != NULL) {
2198: int i = xmlStrlen(ent->name);
2199: const xmlChar *cur = ent->name;
2200:
1.176 daniel 2201: buffer[nbchars++] = '&';
2202: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2203: growBuffer(buffer);
2204: }
2205: for (;i > 0;i--)
1.176 daniel 2206: buffer[nbchars++] = *cur++;
2207: buffer[nbchars++] = ';';
1.135 daniel 2208: }
1.176 daniel 2209: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2210: ent = xmlParseStringPEReference(ctxt, &str);
2211: if (ent != NULL) {
1.185 daniel 2212: xmlChar *rep;
2213:
2214: ctxt->depth++;
2215: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2216: 0, 0, 0);
2217: ctxt->depth--;
2218: if (rep != NULL) {
2219: current = rep;
2220: while (*current != 0) {
2221: buffer[nbchars++] = *current++;
2222: if (nbchars >
2223: buffer_size - XML_PARSER_BUFFER_SIZE) {
2224: growBuffer(buffer);
2225: }
1.135 daniel 2226: }
1.185 daniel 2227: xmlFree(rep);
1.135 daniel 2228: }
2229: }
2230: } else {
1.176 daniel 2231: COPY_BUF(l,buffer,nbchars,c);
2232: str += l;
2233: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2234: growBuffer(buffer);
2235: }
2236: }
1.176 daniel 2237: c = CUR_SCHAR(str, l);
1.135 daniel 2238: }
1.176 daniel 2239: buffer[nbchars++] = 0;
1.135 daniel 2240: return(buffer);
2241: }
2242:
1.1 veillard 2243:
1.28 daniel 2244: /************************************************************************
2245: * *
1.75 daniel 2246: * Commodity functions to handle encodings *
2247: * *
2248: ************************************************************************/
2249:
1.172 daniel 2250: /*
2251: * xmlCheckLanguageID
2252: * @lang: pointer to the string value
2253: *
2254: * Checks that the value conforms to the LanguageID production:
2255: *
2256: * [33] LanguageID ::= Langcode ('-' Subcode)*
2257: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2258: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2259: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2260: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2261: * [38] Subcode ::= ([a-z] | [A-Z])+
2262: *
2263: * Returns 1 if correct 0 otherwise
2264: **/
2265: int
2266: xmlCheckLanguageID(const xmlChar *lang) {
2267: const xmlChar *cur = lang;
2268:
2269: if (cur == NULL)
2270: return(0);
2271: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2272: ((cur[0] == 'I') && (cur[1] == '-'))) {
2273: /*
2274: * IANA code
2275: */
2276: cur += 2;
2277: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2278: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2279: cur++;
2280: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2281: ((cur[0] == 'X') && (cur[1] == '-'))) {
2282: /*
2283: * User code
2284: */
2285: cur += 2;
2286: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2287: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2288: cur++;
2289: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2290: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2291: /*
2292: * ISO639
2293: */
2294: cur++;
2295: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2296: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2297: cur++;
2298: else
2299: return(0);
2300: } else
2301: return(0);
2302: while (cur[0] != 0) {
2303: if (cur[0] != '-')
2304: return(0);
2305: cur++;
2306: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2307: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2308: cur++;
2309: else
2310: return(0);
2311: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2312: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2313: cur++;
2314: }
2315: return(1);
2316: }
2317:
1.75 daniel 2318: /**
2319: * xmlSwitchEncoding:
2320: * @ctxt: the parser context
1.124 daniel 2321: * @enc: the encoding value (number)
1.75 daniel 2322: *
2323: * change the input functions when discovering the character encoding
2324: * of a given entity.
1.193 daniel 2325: *
2326: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2327: */
1.193 daniel 2328: int
1.75 daniel 2329: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2330: {
1.156 daniel 2331: xmlCharEncodingHandlerPtr handler;
2332:
1.193 daniel 2333: switch (enc) {
2334: case XML_CHAR_ENCODING_ERROR:
2335: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2337: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2338: ctxt->wellFormed = 0;
2339: ctxt->disableSAX = 1;
2340: break;
2341: case XML_CHAR_ENCODING_NONE:
2342: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2343: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2344: return(0);
2345: case XML_CHAR_ENCODING_UTF8:
2346: /* default encoding, no conversion should be needed */
1.198 daniel 2347: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2348: return(0);
2349: default:
2350: break;
2351: }
1.156 daniel 2352: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2353: if (handler == NULL) {
2354: /*
2355: * Default handlers.
2356: */
2357: switch (enc) {
2358: case XML_CHAR_ENCODING_ERROR:
2359: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2360: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2361: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2362: ctxt->wellFormed = 0;
2363: ctxt->disableSAX = 1;
1.198 daniel 2364: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2365: break;
2366: case XML_CHAR_ENCODING_NONE:
2367: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2368: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2369: return(0);
2370: case XML_CHAR_ENCODING_UTF8:
2371: /* default encoding, no conversion should be needed */
1.198 daniel 2372: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2373: return(0);
2374: case XML_CHAR_ENCODING_UTF16LE:
2375: break;
2376: case XML_CHAR_ENCODING_UTF16BE:
2377: break;
2378: case XML_CHAR_ENCODING_UCS4LE:
2379: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2381: ctxt->sax->error(ctxt->userData,
2382: "char encoding USC4 little endian not supported\n");
2383: break;
2384: case XML_CHAR_ENCODING_UCS4BE:
2385: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2387: ctxt->sax->error(ctxt->userData,
2388: "char encoding USC4 big endian not supported\n");
2389: break;
2390: case XML_CHAR_ENCODING_EBCDIC:
2391: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2393: ctxt->sax->error(ctxt->userData,
2394: "char encoding EBCDIC not supported\n");
2395: break;
2396: case XML_CHAR_ENCODING_UCS4_2143:
2397: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2398: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2399: ctxt->sax->error(ctxt->userData,
2400: "char encoding UCS4 2143 not supported\n");
2401: break;
2402: case XML_CHAR_ENCODING_UCS4_3412:
2403: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2405: ctxt->sax->error(ctxt->userData,
2406: "char encoding UCS4 3412 not supported\n");
2407: break;
2408: case XML_CHAR_ENCODING_UCS2:
2409: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2411: ctxt->sax->error(ctxt->userData,
2412: "char encoding UCS2 not supported\n");
2413: break;
2414: case XML_CHAR_ENCODING_8859_1:
2415: case XML_CHAR_ENCODING_8859_2:
2416: case XML_CHAR_ENCODING_8859_3:
2417: case XML_CHAR_ENCODING_8859_4:
2418: case XML_CHAR_ENCODING_8859_5:
2419: case XML_CHAR_ENCODING_8859_6:
2420: case XML_CHAR_ENCODING_8859_7:
2421: case XML_CHAR_ENCODING_8859_8:
2422: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2423: /*
2424: * Keep the internal content in the document encoding
2425: */
2426: if ((ctxt->inputNr == 1) &&
2427: (ctxt->encoding == NULL) &&
2428: (ctxt->input->encoding != NULL)) {
2429: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2430: }
1.198 daniel 2431: ctxt->charset = enc;
1.195 daniel 2432: return(0);
1.193 daniel 2433: case XML_CHAR_ENCODING_2022_JP:
2434: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2436: ctxt->sax->error(ctxt->userData,
2437: "char encoding ISO-2022-JPnot supported\n");
2438: break;
2439: case XML_CHAR_ENCODING_SHIFT_JIS:
2440: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2442: ctxt->sax->error(ctxt->userData,
2443: "char encoding Shift_JIS not supported\n");
2444: break;
2445: case XML_CHAR_ENCODING_EUC_JP:
2446: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2448: ctxt->sax->error(ctxt->userData,
2449: "char encoding EUC-JPnot supported\n");
2450: break;
2451: }
2452: }
2453: if (handler == NULL)
2454: return(-1);
1.198 daniel 2455: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2456: return(xmlSwitchToEncoding(ctxt, handler));
2457: }
2458:
2459: /**
2460: * xmlSwitchToEncoding:
2461: * @ctxt: the parser context
2462: * @handler: the encoding handler
2463: *
2464: * change the input functions when discovering the character encoding
2465: * of a given entity.
2466: *
2467: * Returns 0 in case of success, -1 otherwise
2468: */
2469: int
2470: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2471: {
1.194 daniel 2472: int nbchars;
2473:
1.156 daniel 2474: if (handler != NULL) {
2475: if (ctxt->input != NULL) {
2476: if (ctxt->input->buf != NULL) {
2477: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2478: if (ctxt->input->buf->encoder == handler)
2479: return(0);
1.197 daniel 2480: /*
2481: * Note: this is a bit dangerous, but that's what it
2482: * takes to use nearly compatible signature for different
2483: * encodings.
2484: */
2485: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2486: ctxt->input->buf->encoder = handler;
2487: return(0);
1.156 daniel 2488: }
2489: ctxt->input->buf->encoder = handler;
2490:
2491: /*
1.194 daniel 2492: * Is there already some content down the pipe to convert ?
1.156 daniel 2493: */
2494: if ((ctxt->input->buf->buffer != NULL) &&
2495: (ctxt->input->buf->buffer->use > 0)) {
2496: int processed;
2497:
2498: /*
2499: * Specific handling of the Byte Order Mark for
2500: * UTF-16
2501: */
1.195 daniel 2502: if ((handler->name != NULL) &&
2503: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2504: (ctxt->input->cur[0] == 0xFF) &&
2505: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2506: ctxt->input->cur += 2;
1.156 daniel 2507: }
1.195 daniel 2508: if ((handler->name != NULL) &&
2509: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2510: (ctxt->input->cur[0] == 0xFE) &&
2511: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2512: ctxt->input->cur += 2;
1.156 daniel 2513: }
2514:
2515: /*
1.194 daniel 2516: * Shring the current input buffer.
2517: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2518: */
2519: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2520: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2521: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2522: ctxt->input->buf->buffer = xmlBufferCreate();
2523:
2524: /*
1.197 daniel 2525: * convert just enough to get
2526: * '<?xml version="1.0" encoding="xxx"?>'
2527: * parsed with the autodetected encoding
2528: * into the parser reading buffer.
1.194 daniel 2529: */
1.197 daniel 2530: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2531: ctxt->input->buf->buffer,
2532: ctxt->input->buf->raw);
1.194 daniel 2533: if (nbchars < 0) {
2534: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2535: return(-1);
1.156 daniel 2536: }
1.194 daniel 2537: ctxt->input->base =
2538: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2539: }
1.193 daniel 2540: return(0);
1.156 daniel 2541: } else {
2542: if (ctxt->input->length == 0) {
2543: /*
2544: * When parsing a static memory array one must know the
2545: * size to be able to convert the buffer.
2546: */
2547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2548: ctxt->sax->error(ctxt->userData,
2549: "xmlSwitchEncoding : no input\n");
1.193 daniel 2550: return(-1);
1.156 daniel 2551: } else {
1.194 daniel 2552: int processed;
2553:
2554: /*
2555: * Shring the current input buffer.
2556: * Move it as the raw buffer and create a new input buffer
2557: */
2558: processed = ctxt->input->cur - ctxt->input->base;
2559: ctxt->input->buf->raw = xmlBufferCreate();
2560: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
2561: ctxt->input->length - processed);
2562: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2563:
2564: /*
1.194 daniel 2565: * convert as much as possible of the raw input
2566: * to the parser reading buffer.
2567: */
2568: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2569: ctxt->input->buf->buffer,
2570: ctxt->input->buf->raw);
2571: if (nbchars < 0) {
2572: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2573: return(-1);
1.156 daniel 2574: }
1.194 daniel 2575:
1.156 daniel 2576: /*
2577: * Conversion succeeded, get rid of the old buffer
2578: */
2579: if ((ctxt->input->free != NULL) &&
2580: (ctxt->input->base != NULL))
2581: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2582: ctxt->input->base =
2583: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2584: }
2585: }
2586: } else {
2587: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2588: ctxt->sax->error(ctxt->userData,
2589: "xmlSwitchEncoding : no input\n");
1.193 daniel 2590: return(-1);
1.156 daniel 2591: }
1.195 daniel 2592: /*
2593: * The parsing is now done in UTF8 natively
2594: */
1.198 daniel 2595: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2596: } else
2597: return(-1);
2598: return(0);
1.156 daniel 2599:
1.75 daniel 2600: }
2601:
2602: /************************************************************************
2603: * *
1.123 daniel 2604: * Commodity functions to handle xmlChars *
1.28 daniel 2605: * *
2606: ************************************************************************/
2607:
1.50 daniel 2608: /**
2609: * xmlStrndup:
1.123 daniel 2610: * @cur: the input xmlChar *
1.50 daniel 2611: * @len: the len of @cur
2612: *
1.123 daniel 2613: * a strndup for array of xmlChar's
1.68 daniel 2614: *
1.123 daniel 2615: * Returns a new xmlChar * or NULL
1.1 veillard 2616: */
1.123 daniel 2617: xmlChar *
2618: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2619: xmlChar *ret;
2620:
2621: if ((cur == NULL) || (len < 0)) return(NULL);
2622: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2623: if (ret == NULL) {
1.86 daniel 2624: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2625: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2626: return(NULL);
2627: }
1.123 daniel 2628: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2629: ret[len] = 0;
2630: return(ret);
2631: }
2632:
1.50 daniel 2633: /**
2634: * xmlStrdup:
1.123 daniel 2635: * @cur: the input xmlChar *
1.50 daniel 2636: *
1.152 daniel 2637: * a strdup for array of xmlChar's. Since they are supposed to be
2638: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2639: * a termination mark of '0'.
1.68 daniel 2640: *
1.123 daniel 2641: * Returns a new xmlChar * or NULL
1.1 veillard 2642: */
1.123 daniel 2643: xmlChar *
2644: xmlStrdup(const xmlChar *cur) {
2645: const xmlChar *p = cur;
1.1 veillard 2646:
1.135 daniel 2647: if (cur == NULL) return(NULL);
1.152 daniel 2648: while (*p != 0) p++;
1.1 veillard 2649: return(xmlStrndup(cur, p - cur));
2650: }
2651:
1.50 daniel 2652: /**
2653: * xmlCharStrndup:
2654: * @cur: the input char *
2655: * @len: the len of @cur
2656: *
1.123 daniel 2657: * a strndup for char's to xmlChar's
1.68 daniel 2658: *
1.123 daniel 2659: * Returns a new xmlChar * or NULL
1.45 daniel 2660: */
2661:
1.123 daniel 2662: xmlChar *
1.55 daniel 2663: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2664: int i;
1.135 daniel 2665: xmlChar *ret;
2666:
2667: if ((cur == NULL) || (len < 0)) return(NULL);
2668: ret = xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2669: if (ret == NULL) {
1.86 daniel 2670: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2671: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2672: return(NULL);
2673: }
2674: for (i = 0;i < len;i++)
1.123 daniel 2675: ret[i] = (xmlChar) cur[i];
1.45 daniel 2676: ret[len] = 0;
2677: return(ret);
2678: }
2679:
1.50 daniel 2680: /**
2681: * xmlCharStrdup:
2682: * @cur: the input char *
2683: * @len: the len of @cur
2684: *
1.123 daniel 2685: * a strdup for char's to xmlChar's
1.68 daniel 2686: *
1.123 daniel 2687: * Returns a new xmlChar * or NULL
1.45 daniel 2688: */
2689:
1.123 daniel 2690: xmlChar *
1.55 daniel 2691: xmlCharStrdup(const char *cur) {
1.45 daniel 2692: const char *p = cur;
2693:
1.135 daniel 2694: if (cur == NULL) return(NULL);
1.45 daniel 2695: while (*p != '\0') p++;
2696: return(xmlCharStrndup(cur, p - cur));
2697: }
2698:
1.50 daniel 2699: /**
2700: * xmlStrcmp:
1.123 daniel 2701: * @str1: the first xmlChar *
2702: * @str2: the second xmlChar *
1.50 daniel 2703: *
1.123 daniel 2704: * a strcmp for xmlChar's
1.68 daniel 2705: *
2706: * Returns the integer result of the comparison
1.14 veillard 2707: */
2708:
1.55 daniel 2709: int
1.123 daniel 2710: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2711: register int tmp;
2712:
1.135 daniel 2713: if ((str1 == NULL) && (str2 == NULL)) return(0);
2714: if (str1 == NULL) return(-1);
2715: if (str2 == NULL) return(1);
1.14 veillard 2716: do {
2717: tmp = *str1++ - *str2++;
2718: if (tmp != 0) return(tmp);
2719: } while ((*str1 != 0) && (*str2 != 0));
2720: return (*str1 - *str2);
2721: }
2722:
1.50 daniel 2723: /**
2724: * xmlStrncmp:
1.123 daniel 2725: * @str1: the first xmlChar *
2726: * @str2: the second xmlChar *
1.50 daniel 2727: * @len: the max comparison length
2728: *
1.123 daniel 2729: * a strncmp for xmlChar's
1.68 daniel 2730: *
2731: * Returns the integer result of the comparison
1.14 veillard 2732: */
2733:
1.55 daniel 2734: int
1.123 daniel 2735: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2736: register int tmp;
2737:
2738: if (len <= 0) return(0);
1.135 daniel 2739: if ((str1 == NULL) && (str2 == NULL)) return(0);
2740: if (str1 == NULL) return(-1);
2741: if (str2 == NULL) return(1);
1.14 veillard 2742: do {
2743: tmp = *str1++ - *str2++;
2744: if (tmp != 0) return(tmp);
2745: len--;
2746: if (len <= 0) return(0);
2747: } while ((*str1 != 0) && (*str2 != 0));
2748: return (*str1 - *str2);
2749: }
2750:
1.50 daniel 2751: /**
2752: * xmlStrchr:
1.123 daniel 2753: * @str: the xmlChar * array
2754: * @val: the xmlChar to search
1.50 daniel 2755: *
1.123 daniel 2756: * a strchr for xmlChar's
1.68 daniel 2757: *
1.123 daniel 2758: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2759: */
2760:
1.123 daniel 2761: const xmlChar *
2762: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2763: if (str == NULL) return(NULL);
1.14 veillard 2764: while (*str != 0) {
1.123 daniel 2765: if (*str == val) return((xmlChar *) str);
1.14 veillard 2766: str++;
2767: }
2768: return(NULL);
1.89 daniel 2769: }
2770:
2771: /**
2772: * xmlStrstr:
1.123 daniel 2773: * @str: the xmlChar * array (haystack)
2774: * @val: the xmlChar to search (needle)
1.89 daniel 2775: *
1.123 daniel 2776: * a strstr for xmlChar's
1.89 daniel 2777: *
1.123 daniel 2778: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2779: */
2780:
1.123 daniel 2781: const xmlChar *
2782: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2783: int n;
2784:
2785: if (str == NULL) return(NULL);
2786: if (val == NULL) return(NULL);
2787: n = xmlStrlen(val);
2788:
2789: if (n == 0) return(str);
2790: while (*str != 0) {
2791: if (*str == *val) {
1.123 daniel 2792: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2793: }
2794: str++;
2795: }
2796: return(NULL);
2797: }
2798:
2799: /**
2800: * xmlStrsub:
1.123 daniel 2801: * @str: the xmlChar * array (haystack)
1.89 daniel 2802: * @start: the index of the first char (zero based)
2803: * @len: the length of the substring
2804: *
2805: * Extract a substring of a given string
2806: *
1.123 daniel 2807: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2808: */
2809:
1.123 daniel 2810: xmlChar *
2811: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2812: int i;
2813:
2814: if (str == NULL) return(NULL);
2815: if (start < 0) return(NULL);
1.90 daniel 2816: if (len < 0) return(NULL);
1.89 daniel 2817:
2818: for (i = 0;i < start;i++) {
2819: if (*str == 0) return(NULL);
2820: str++;
2821: }
2822: if (*str == 0) return(NULL);
2823: return(xmlStrndup(str, len));
1.14 veillard 2824: }
1.28 daniel 2825:
1.50 daniel 2826: /**
2827: * xmlStrlen:
1.123 daniel 2828: * @str: the xmlChar * array
1.50 daniel 2829: *
1.127 daniel 2830: * length of a xmlChar's string
1.68 daniel 2831: *
1.123 daniel 2832: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2833: */
2834:
1.55 daniel 2835: int
1.123 daniel 2836: xmlStrlen(const xmlChar *str) {
1.45 daniel 2837: int len = 0;
2838:
2839: if (str == NULL) return(0);
2840: while (*str != 0) {
2841: str++;
2842: len++;
2843: }
2844: return(len);
2845: }
2846:
1.50 daniel 2847: /**
2848: * xmlStrncat:
1.123 daniel 2849: * @cur: the original xmlChar * array
2850: * @add: the xmlChar * array added
1.50 daniel 2851: * @len: the length of @add
2852: *
1.123 daniel 2853: * a strncat for array of xmlChar's
1.68 daniel 2854: *
1.123 daniel 2855: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2856: */
2857:
1.123 daniel 2858: xmlChar *
2859: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2860: int size;
1.123 daniel 2861: xmlChar *ret;
1.45 daniel 2862:
2863: if ((add == NULL) || (len == 0))
2864: return(cur);
2865: if (cur == NULL)
2866: return(xmlStrndup(add, len));
2867:
2868: size = xmlStrlen(cur);
1.123 daniel 2869: ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2870: if (ret == NULL) {
1.86 daniel 2871: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2872: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2873: return(cur);
2874: }
1.123 daniel 2875: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2876: ret[size + len] = 0;
2877: return(ret);
2878: }
2879:
1.50 daniel 2880: /**
2881: * xmlStrcat:
1.123 daniel 2882: * @cur: the original xmlChar * array
2883: * @add: the xmlChar * array added
1.50 daniel 2884: *
1.152 daniel 2885: * a strcat for array of xmlChar's. Since they are supposed to be
2886: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2887: * a termination mark of '0'.
1.68 daniel 2888: *
1.123 daniel 2889: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2890: */
1.123 daniel 2891: xmlChar *
2892: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2893: const xmlChar *p = add;
1.45 daniel 2894:
2895: if (add == NULL) return(cur);
2896: if (cur == NULL)
2897: return(xmlStrdup(add));
2898:
1.152 daniel 2899: while (*p != 0) p++;
1.45 daniel 2900: return(xmlStrncat(cur, add, p - add));
2901: }
2902:
2903: /************************************************************************
2904: * *
2905: * Commodity functions, cleanup needed ? *
2906: * *
2907: ************************************************************************/
2908:
1.50 daniel 2909: /**
2910: * areBlanks:
2911: * @ctxt: an XML parser context
1.123 daniel 2912: * @str: a xmlChar *
1.50 daniel 2913: * @len: the size of @str
2914: *
1.45 daniel 2915: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2916: *
1.68 daniel 2917: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2918: */
2919:
1.123 daniel 2920: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2921: int i, ret;
1.45 daniel 2922: xmlNodePtr lastChild;
2923:
1.176 daniel 2924: /*
2925: * Check for xml:space value.
2926: */
2927: if (*(ctxt->space) == 1)
2928: return(0);
2929:
2930: /*
2931: * Check that the string is made of blanks
2932: */
1.45 daniel 2933: for (i = 0;i < len;i++)
2934: if (!(IS_BLANK(str[i]))) return(0);
2935:
1.176 daniel 2936: /*
2937: * Look if the element is mixed content in the Dtd if available
2938: */
1.104 daniel 2939: if (ctxt->myDoc != NULL) {
2940: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2941: if (ret == 0) return(1);
2942: if (ret == 1) return(0);
2943: }
1.176 daniel 2944:
1.104 daniel 2945: /*
1.176 daniel 2946: * Otherwise, heuristic :-\
1.104 daniel 2947: */
1.179 daniel 2948: if (ctxt->keepBlanks)
2949: return(0);
2950: if (RAW != '<') return(0);
2951: if (ctxt->node == NULL) return(0);
2952: if ((ctxt->node->children == NULL) &&
2953: (RAW == '<') && (NXT(1) == '/')) return(0);
2954:
1.45 daniel 2955: lastChild = xmlGetLastChild(ctxt->node);
2956: if (lastChild == NULL) {
2957: if (ctxt->node->content != NULL) return(0);
2958: } else if (xmlNodeIsText(lastChild))
2959: return(0);
1.157 daniel 2960: else if ((ctxt->node->children != NULL) &&
2961: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2962: return(0);
1.45 daniel 2963: return(1);
2964: }
2965:
1.50 daniel 2966: /**
2967: * xmlHandleEntity:
2968: * @ctxt: an XML parser context
2969: * @entity: an XML entity pointer.
2970: *
2971: * Default handling of defined entities, when should we define a new input
1.45 daniel 2972: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2973: *
2974: * OBSOLETE: to be removed at some point.
1.45 daniel 2975: */
2976:
1.55 daniel 2977: void
2978: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 2979: int len;
1.50 daniel 2980: xmlParserInputPtr input;
1.45 daniel 2981:
2982: if (entity->content == NULL) {
1.123 daniel 2983: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 2984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2985: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 2986: entity->name);
1.59 daniel 2987: ctxt->wellFormed = 0;
1.180 daniel 2988: ctxt->disableSAX = 1;
1.45 daniel 2989: return;
2990: }
2991: len = xmlStrlen(entity->content);
2992: if (len <= 2) goto handle_as_char;
2993:
2994: /*
2995: * Redefine its content as an input stream.
2996: */
1.50 daniel 2997: input = xmlNewEntityInputStream(ctxt, entity);
2998: xmlPushInput(ctxt, input);
1.45 daniel 2999: return;
3000:
3001: handle_as_char:
3002: /*
3003: * Just handle the content as a set of chars.
3004: */
1.171 daniel 3005: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3006: (ctxt->sax->characters != NULL))
1.74 daniel 3007: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 3008:
3009: }
3010:
3011: /*
3012: * Forward definition for recusive behaviour.
3013: */
1.77 daniel 3014: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3015: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 3016:
1.28 daniel 3017: /************************************************************************
3018: * *
3019: * Extra stuff for namespace support *
3020: * Relates to http://www.w3.org/TR/WD-xml-names *
3021: * *
3022: ************************************************************************/
3023:
1.50 daniel 3024: /**
3025: * xmlNamespaceParseNCName:
3026: * @ctxt: an XML parser context
3027: *
3028: * parse an XML namespace name.
1.28 daniel 3029: *
3030: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3031: *
3032: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3033: * CombiningChar | Extender
1.68 daniel 3034: *
3035: * Returns the namespace name or NULL
1.28 daniel 3036: */
3037:
1.123 daniel 3038: xmlChar *
1.55 daniel 3039: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3040: xmlChar buf[XML_MAX_NAMELEN + 5];
3041: int len = 0, l;
3042: int cur = CUR_CHAR(l);
1.28 daniel 3043:
1.156 daniel 3044: /* load first the value of the char !!! */
1.152 daniel 3045: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3046:
1.152 daniel 3047: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3048: (cur == '.') || (cur == '-') ||
3049: (cur == '_') ||
3050: (IS_COMBINING(cur)) ||
3051: (IS_EXTENDER(cur))) {
3052: COPY_BUF(l,buf,len,cur);
3053: NEXTL(l);
3054: cur = CUR_CHAR(l);
1.91 daniel 3055: if (len >= XML_MAX_NAMELEN) {
3056: fprintf(stderr,
3057: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3058: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3059: (cur == '.') || (cur == '-') ||
3060: (cur == '_') ||
3061: (IS_COMBINING(cur)) ||
3062: (IS_EXTENDER(cur))) {
3063: NEXTL(l);
3064: cur = CUR_CHAR(l);
3065: }
1.91 daniel 3066: break;
3067: }
3068: }
3069: return(xmlStrndup(buf, len));
1.28 daniel 3070: }
3071:
1.50 daniel 3072: /**
3073: * xmlNamespaceParseQName:
3074: * @ctxt: an XML parser context
1.123 daniel 3075: * @prefix: a xmlChar **
1.50 daniel 3076: *
3077: * parse an XML qualified name
1.28 daniel 3078: *
3079: * [NS 5] QName ::= (Prefix ':')? LocalPart
3080: *
3081: * [NS 6] Prefix ::= NCName
3082: *
3083: * [NS 7] LocalPart ::= NCName
1.68 daniel 3084: *
1.127 daniel 3085: * Returns the local part, and prefix is updated
1.50 daniel 3086: * to get the Prefix if any.
1.28 daniel 3087: */
3088:
1.123 daniel 3089: xmlChar *
3090: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3091: xmlChar *ret = NULL;
1.28 daniel 3092:
3093: *prefix = NULL;
3094: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3095: if (RAW == ':') {
1.28 daniel 3096: *prefix = ret;
1.40 daniel 3097: NEXT;
1.28 daniel 3098: ret = xmlNamespaceParseNCName(ctxt);
3099: }
3100:
3101: return(ret);
3102: }
3103:
1.50 daniel 3104: /**
1.72 daniel 3105: * xmlSplitQName:
1.162 daniel 3106: * @ctxt: an XML parser context
1.72 daniel 3107: * @name: an XML parser context
1.123 daniel 3108: * @prefix: a xmlChar **
1.72 daniel 3109: *
3110: * parse an XML qualified name string
3111: *
3112: * [NS 5] QName ::= (Prefix ':')? LocalPart
3113: *
3114: * [NS 6] Prefix ::= NCName
3115: *
3116: * [NS 7] LocalPart ::= NCName
3117: *
1.127 daniel 3118: * Returns the local part, and prefix is updated
1.72 daniel 3119: * to get the Prefix if any.
3120: */
3121:
1.123 daniel 3122: xmlChar *
1.162 daniel 3123: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3124: xmlChar buf[XML_MAX_NAMELEN + 5];
3125: int len = 0;
1.123 daniel 3126: xmlChar *ret = NULL;
3127: const xmlChar *cur = name;
1.162 daniel 3128: int c,l;
1.72 daniel 3129:
3130: *prefix = NULL;
1.113 daniel 3131:
3132: /* xml: prefix is not really a namespace */
3133: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3134: (cur[2] == 'l') && (cur[3] == ':'))
3135: return(xmlStrdup(name));
3136:
1.162 daniel 3137: /* nasty but valid */
3138: if (cur[0] == ':')
3139: return(xmlStrdup(name));
3140:
3141: c = CUR_SCHAR(cur, l);
3142: if (!IS_LETTER(c) && (c != '_')) return(NULL);
1.72 daniel 3143:
1.162 daniel 3144: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3145: (c == '.') || (c == '-') ||
3146: (c == '_') ||
3147: (IS_COMBINING(c)) ||
3148: (IS_EXTENDER(c))) {
3149: COPY_BUF(l,buf,len,c);
3150: cur += l;
3151: c = CUR_SCHAR(cur, l);
3152: }
1.72 daniel 3153:
1.162 daniel 3154: ret = xmlStrndup(buf, len);
1.72 daniel 3155:
1.162 daniel 3156: if (c == ':') {
3157: cur += l;
1.163 daniel 3158: c = CUR_SCHAR(cur, l);
1.162 daniel 3159: if (!IS_LETTER(c) && (c != '_')) return(ret);
1.72 daniel 3160: *prefix = ret;
1.162 daniel 3161: len = 0;
1.72 daniel 3162:
1.162 daniel 3163: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3164: (c == '.') || (c == '-') ||
3165: (c == '_') ||
3166: (IS_COMBINING(c)) ||
3167: (IS_EXTENDER(c))) {
3168: COPY_BUF(l,buf,len,c);
3169: cur += l;
3170: c = CUR_SCHAR(cur, l);
3171: }
1.72 daniel 3172:
1.162 daniel 3173: ret = xmlStrndup(buf, len);
1.72 daniel 3174: }
3175:
3176: return(ret);
3177: }
3178: /**
1.50 daniel 3179: * xmlNamespaceParseNSDef:
3180: * @ctxt: an XML parser context
3181: *
3182: * parse a namespace prefix declaration
1.28 daniel 3183: *
3184: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3185: *
3186: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3187: *
3188: * Returns the namespace name
1.28 daniel 3189: */
3190:
1.123 daniel 3191: xmlChar *
1.55 daniel 3192: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3193: xmlChar *name = NULL;
1.28 daniel 3194:
1.152 daniel 3195: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3196: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3197: (NXT(4) == 's')) {
3198: SKIP(5);
1.152 daniel 3199: if (RAW == ':') {
1.40 daniel 3200: NEXT;
1.28 daniel 3201: name = xmlNamespaceParseNCName(ctxt);
3202: }
3203: }
1.39 daniel 3204: return(name);
1.28 daniel 3205: }
3206:
1.50 daniel 3207: /**
3208: * xmlParseQuotedString:
3209: * @ctxt: an XML parser context
3210: *
1.45 daniel 3211: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3212: * To be removed at next drop of binary compatibility
1.68 daniel 3213: *
3214: * Returns the string parser or NULL.
1.45 daniel 3215: */
1.123 daniel 3216: xmlChar *
1.55 daniel 3217: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3218: xmlChar *buf = NULL;
1.152 daniel 3219: int len = 0,l;
1.140 daniel 3220: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3221: int c;
1.45 daniel 3222:
1.135 daniel 3223: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3224: if (buf == NULL) {
3225: fprintf(stderr, "malloc of %d byte failed\n", size);
3226: return(NULL);
3227: }
1.152 daniel 3228: if (RAW == '"') {
1.45 daniel 3229: NEXT;
1.152 daniel 3230: c = CUR_CHAR(l);
1.135 daniel 3231: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3232: if (len + 5 >= size) {
1.135 daniel 3233: size *= 2;
3234: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3235: if (buf == NULL) {
3236: fprintf(stderr, "realloc of %d byte failed\n", size);
3237: return(NULL);
3238: }
3239: }
1.152 daniel 3240: COPY_BUF(l,buf,len,c);
3241: NEXTL(l);
3242: c = CUR_CHAR(l);
1.135 daniel 3243: }
3244: if (c != '"') {
1.123 daniel 3245: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3246: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3247: ctxt->sax->error(ctxt->userData,
3248: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3249: ctxt->wellFormed = 0;
1.180 daniel 3250: ctxt->disableSAX = 1;
1.55 daniel 3251: } else {
1.45 daniel 3252: NEXT;
3253: }
1.152 daniel 3254: } else if (RAW == '\''){
1.45 daniel 3255: NEXT;
1.135 daniel 3256: c = CUR;
3257: while (IS_CHAR(c) && (c != '\'')) {
3258: if (len + 1 >= size) {
3259: size *= 2;
3260: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3261: if (buf == NULL) {
3262: fprintf(stderr, "realloc of %d byte failed\n", size);
3263: return(NULL);
3264: }
3265: }
3266: buf[len++] = c;
3267: NEXT;
3268: c = CUR;
3269: }
1.152 daniel 3270: if (RAW != '\'') {
1.123 daniel 3271: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3273: ctxt->sax->error(ctxt->userData,
3274: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3275: ctxt->wellFormed = 0;
1.180 daniel 3276: ctxt->disableSAX = 1;
1.55 daniel 3277: } else {
1.45 daniel 3278: NEXT;
3279: }
3280: }
1.135 daniel 3281: return(buf);
1.45 daniel 3282: }
3283:
1.50 daniel 3284: /**
3285: * xmlParseNamespace:
3286: * @ctxt: an XML parser context
3287: *
1.45 daniel 3288: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3289: *
3290: * This is what the older xml-name Working Draft specified, a bunch of
3291: * other stuff may still rely on it, so support is still here as
1.127 daniel 3292: * if it was declared on the root of the Tree:-(
1.110 daniel 3293: *
3294: * To be removed at next drop of binary compatibility
1.45 daniel 3295: */
3296:
1.55 daniel 3297: void
3298: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3299: xmlChar *href = NULL;
3300: xmlChar *prefix = NULL;
1.45 daniel 3301: int garbage = 0;
3302:
3303: /*
3304: * We just skipped "namespace" or "xml:namespace"
3305: */
3306: SKIP_BLANKS;
3307:
1.153 daniel 3308: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3309: /*
3310: * We can have "ns" or "prefix" attributes
3311: * Old encoding as 'href' or 'AS' attributes is still supported
3312: */
1.152 daniel 3313: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3314: garbage = 0;
3315: SKIP(2);
3316: SKIP_BLANKS;
3317:
1.152 daniel 3318: if (RAW != '=') continue;
1.45 daniel 3319: NEXT;
3320: SKIP_BLANKS;
3321:
3322: href = xmlParseQuotedString(ctxt);
3323: SKIP_BLANKS;
1.152 daniel 3324: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3325: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3326: garbage = 0;
3327: SKIP(4);
3328: SKIP_BLANKS;
3329:
1.152 daniel 3330: if (RAW != '=') continue;
1.45 daniel 3331: NEXT;
3332: SKIP_BLANKS;
3333:
3334: href = xmlParseQuotedString(ctxt);
3335: SKIP_BLANKS;
1.152 daniel 3336: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3337: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3338: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3339: garbage = 0;
3340: SKIP(6);
3341: SKIP_BLANKS;
3342:
1.152 daniel 3343: if (RAW != '=') continue;
1.45 daniel 3344: NEXT;
3345: SKIP_BLANKS;
3346:
3347: prefix = xmlParseQuotedString(ctxt);
3348: SKIP_BLANKS;
1.152 daniel 3349: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3350: garbage = 0;
3351: SKIP(2);
3352: SKIP_BLANKS;
3353:
1.152 daniel 3354: if (RAW != '=') continue;
1.45 daniel 3355: NEXT;
3356: SKIP_BLANKS;
3357:
3358: prefix = xmlParseQuotedString(ctxt);
3359: SKIP_BLANKS;
1.152 daniel 3360: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3361: garbage = 0;
1.91 daniel 3362: NEXT;
1.45 daniel 3363: } else {
3364: /*
3365: * Found garbage when parsing the namespace
3366: */
1.122 daniel 3367: if (!garbage) {
1.55 daniel 3368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3369: ctxt->sax->error(ctxt->userData,
3370: "xmlParseNamespace found garbage\n");
3371: }
1.123 daniel 3372: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3373: ctxt->wellFormed = 0;
1.180 daniel 3374: ctxt->disableSAX = 1;
1.45 daniel 3375: NEXT;
3376: }
3377: }
3378:
3379: MOVETO_ENDTAG(CUR_PTR);
3380: NEXT;
3381:
3382: /*
3383: * Register the DTD.
1.72 daniel 3384: if (href != NULL)
3385: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3386: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3387: */
3388:
1.119 daniel 3389: if (prefix != NULL) xmlFree(prefix);
3390: if (href != NULL) xmlFree(href);
1.45 daniel 3391: }
3392:
1.28 daniel 3393: /************************************************************************
3394: * *
3395: * The parser itself *
3396: * Relates to http://www.w3.org/TR/REC-xml *
3397: * *
3398: ************************************************************************/
1.14 veillard 3399:
1.50 daniel 3400: /**
1.97 daniel 3401: * xmlScanName:
3402: * @ctxt: an XML parser context
3403: *
3404: * Trickery: parse an XML name but without consuming the input flow
3405: * Needed for rollback cases.
3406: *
3407: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3408: * CombiningChar | Extender
3409: *
3410: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3411: *
3412: * [6] Names ::= Name (S Name)*
3413: *
3414: * Returns the Name parsed or NULL
3415: */
3416:
1.123 daniel 3417: xmlChar *
1.97 daniel 3418: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3419: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3420: int len = 0;
3421:
3422: GROW;
1.152 daniel 3423: if (!IS_LETTER(RAW) && (RAW != '_') &&
3424: (RAW != ':')) {
1.97 daniel 3425: return(NULL);
3426: }
3427:
3428: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3429: (NXT(len) == '.') || (NXT(len) == '-') ||
3430: (NXT(len) == '_') || (NXT(len) == ':') ||
3431: (IS_COMBINING(NXT(len))) ||
3432: (IS_EXTENDER(NXT(len)))) {
3433: buf[len] = NXT(len);
3434: len++;
3435: if (len >= XML_MAX_NAMELEN) {
3436: fprintf(stderr,
3437: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3438: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3439: (NXT(len) == '.') || (NXT(len) == '-') ||
3440: (NXT(len) == '_') || (NXT(len) == ':') ||
3441: (IS_COMBINING(NXT(len))) ||
3442: (IS_EXTENDER(NXT(len))))
3443: len++;
3444: break;
3445: }
3446: }
3447: return(xmlStrndup(buf, len));
3448: }
3449:
3450: /**
1.50 daniel 3451: * xmlParseName:
3452: * @ctxt: an XML parser context
3453: *
3454: * parse an XML name.
1.22 daniel 3455: *
3456: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3457: * CombiningChar | Extender
3458: *
3459: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3460: *
3461: * [6] Names ::= Name (S Name)*
1.68 daniel 3462: *
3463: * Returns the Name parsed or NULL
1.1 veillard 3464: */
3465:
1.123 daniel 3466: xmlChar *
1.55 daniel 3467: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3468: xmlChar buf[XML_MAX_NAMELEN + 5];
3469: int len = 0, l;
3470: int c;
1.1 veillard 3471:
1.91 daniel 3472: GROW;
1.160 daniel 3473: c = CUR_CHAR(l);
1.190 daniel 3474: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3475: (!IS_LETTER(c) && (c != '_') &&
3476: (c != ':'))) {
1.91 daniel 3477: return(NULL);
3478: }
1.40 daniel 3479:
1.190 daniel 3480: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3481: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3482: (c == '.') || (c == '-') ||
3483: (c == '_') || (c == ':') ||
3484: (IS_COMBINING(c)) ||
3485: (IS_EXTENDER(c)))) {
1.160 daniel 3486: COPY_BUF(l,buf,len,c);
3487: NEXTL(l);
3488: c = CUR_CHAR(l);
1.91 daniel 3489: if (len >= XML_MAX_NAMELEN) {
3490: fprintf(stderr,
3491: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3492: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3493: (c == '.') || (c == '-') ||
3494: (c == '_') || (c == ':') ||
3495: (IS_COMBINING(c)) ||
3496: (IS_EXTENDER(c))) {
3497: NEXTL(l);
3498: c = CUR_CHAR(l);
1.97 daniel 3499: }
1.91 daniel 3500: break;
3501: }
3502: }
3503: return(xmlStrndup(buf, len));
1.22 daniel 3504: }
3505:
1.50 daniel 3506: /**
1.135 daniel 3507: * xmlParseStringName:
3508: * @ctxt: an XML parser context
3509: * @str: a pointer to an index in the string
3510: *
3511: * parse an XML name.
3512: *
3513: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3514: * CombiningChar | Extender
3515: *
3516: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3517: *
3518: * [6] Names ::= Name (S Name)*
3519: *
3520: * Returns the Name parsed or NULL. The str pointer
3521: * is updated to the current location in the string.
3522: */
3523:
3524: xmlChar *
3525: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3526: xmlChar buf[XML_MAX_NAMELEN + 5];
3527: const xmlChar *cur = *str;
3528: int len = 0, l;
3529: int c;
1.135 daniel 3530:
1.176 daniel 3531: c = CUR_SCHAR(cur, l);
3532: if (!IS_LETTER(c) && (c != '_') &&
3533: (c != ':')) {
1.135 daniel 3534: return(NULL);
3535: }
3536:
1.176 daniel 3537: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3538: (c == '.') || (c == '-') ||
3539: (c == '_') || (c == ':') ||
3540: (IS_COMBINING(c)) ||
3541: (IS_EXTENDER(c))) {
3542: COPY_BUF(l,buf,len,c);
3543: cur += l;
3544: c = CUR_SCHAR(cur, l);
3545: if (len >= XML_MAX_NAMELEN) {
3546: fprintf(stderr,
3547: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3548: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3549: (c == '.') || (c == '-') ||
3550: (c == '_') || (c == ':') ||
3551: (IS_COMBINING(c)) ||
3552: (IS_EXTENDER(c))) {
3553: cur += l;
3554: c = CUR_SCHAR(cur, l);
3555: }
3556: break;
3557: }
1.135 daniel 3558: }
1.176 daniel 3559: *str = cur;
3560: return(xmlStrndup(buf, len));
1.135 daniel 3561: }
3562:
3563: /**
1.50 daniel 3564: * xmlParseNmtoken:
3565: * @ctxt: an XML parser context
3566: *
3567: * parse an XML Nmtoken.
1.22 daniel 3568: *
3569: * [7] Nmtoken ::= (NameChar)+
3570: *
3571: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3572: *
3573: * Returns the Nmtoken parsed or NULL
1.22 daniel 3574: */
3575:
1.123 daniel 3576: xmlChar *
1.55 daniel 3577: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3578: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3579: int len = 0;
1.160 daniel 3580: int c,l;
1.22 daniel 3581:
1.91 daniel 3582: GROW;
1.160 daniel 3583: c = CUR_CHAR(l);
3584: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3585: (c == '.') || (c == '-') ||
3586: (c == '_') || (c == ':') ||
3587: (IS_COMBINING(c)) ||
3588: (IS_EXTENDER(c))) {
3589: COPY_BUF(l,buf,len,c);
3590: NEXTL(l);
3591: c = CUR_CHAR(l);
1.91 daniel 3592: if (len >= XML_MAX_NAMELEN) {
3593: fprintf(stderr,
3594: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3595: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3596: (c == '.') || (c == '-') ||
3597: (c == '_') || (c == ':') ||
3598: (IS_COMBINING(c)) ||
3599: (IS_EXTENDER(c))) {
3600: NEXTL(l);
3601: c = CUR_CHAR(l);
3602: }
1.91 daniel 3603: break;
3604: }
3605: }
1.168 daniel 3606: if (len == 0)
3607: return(NULL);
1.91 daniel 3608: return(xmlStrndup(buf, len));
1.1 veillard 3609: }
3610:
1.50 daniel 3611: /**
3612: * xmlParseEntityValue:
3613: * @ctxt: an XML parser context
1.78 daniel 3614: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3615: *
3616: * parse a value for ENTITY decl.
1.24 daniel 3617: *
3618: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3619: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3620: *
1.78 daniel 3621: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3622: */
3623:
1.123 daniel 3624: xmlChar *
3625: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3626: xmlChar *buf = NULL;
3627: int len = 0;
1.140 daniel 3628: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3629: int c, l;
1.135 daniel 3630: xmlChar stop;
1.123 daniel 3631: xmlChar *ret = NULL;
1.176 daniel 3632: const xmlChar *cur = NULL;
1.98 daniel 3633: xmlParserInputPtr input;
1.24 daniel 3634:
1.152 daniel 3635: if (RAW == '"') stop = '"';
3636: else if (RAW == '\'') stop = '\'';
1.135 daniel 3637: else {
3638: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3639: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3640: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3641: ctxt->wellFormed = 0;
1.180 daniel 3642: ctxt->disableSAX = 1;
1.135 daniel 3643: return(NULL);
3644: }
3645: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3646: if (buf == NULL) {
3647: fprintf(stderr, "malloc of %d byte failed\n", size);
3648: return(NULL);
3649: }
1.94 daniel 3650:
1.135 daniel 3651: /*
3652: * The content of the entity definition is copied in a buffer.
3653: */
1.94 daniel 3654:
1.135 daniel 3655: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3656: input = ctxt->input;
3657: GROW;
3658: NEXT;
1.152 daniel 3659: c = CUR_CHAR(l);
1.135 daniel 3660: /*
3661: * NOTE: 4.4.5 Included in Literal
3662: * When a parameter entity reference appears in a literal entity
3663: * value, ... a single or double quote character in the replacement
3664: * text is always treated as a normal data character and will not
3665: * terminate the literal.
3666: * In practice it means we stop the loop only when back at parsing
3667: * the initial entity and the quote is found
3668: */
3669: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3670: if (len + 5 >= size) {
1.135 daniel 3671: size *= 2;
3672: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3673: if (buf == NULL) {
3674: fprintf(stderr, "realloc of %d byte failed\n", size);
3675: return(NULL);
1.94 daniel 3676: }
1.79 daniel 3677: }
1.152 daniel 3678: COPY_BUF(l,buf,len,c);
3679: NEXTL(l);
1.98 daniel 3680: /*
1.135 daniel 3681: * Pop-up of finished entities.
1.98 daniel 3682: */
1.152 daniel 3683: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3684: xmlPopInput(ctxt);
1.152 daniel 3685:
3686: c = CUR_CHAR(l);
1.135 daniel 3687: if (c == 0) {
1.94 daniel 3688: GROW;
1.152 daniel 3689: c = CUR_CHAR(l);
1.79 daniel 3690: }
1.135 daniel 3691: }
3692: buf[len] = 0;
3693:
3694: /*
1.176 daniel 3695: * Raise problem w.r.t. '&' and '%' being used in non-entities
3696: * reference constructs. Note Charref will be handled in
3697: * xmlStringDecodeEntities()
3698: */
3699: cur = buf;
3700: while (*cur != 0) {
3701: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3702: xmlChar *name;
3703: xmlChar tmp = *cur;
3704:
3705: cur++;
3706: name = xmlParseStringName(ctxt, &cur);
3707: if ((name == NULL) || (*cur != ';')) {
3708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3709: ctxt->sax->error(ctxt->userData,
3710: "EntityValue: '%c' forbidden except for entities references\n",
3711: tmp);
3712: ctxt->wellFormed = 0;
1.180 daniel 3713: ctxt->disableSAX = 1;
1.176 daniel 3714: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3715: }
3716: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3718: ctxt->sax->error(ctxt->userData,
3719: "EntityValue: PEReferences forbidden in internal subset\n",
3720: tmp);
3721: ctxt->wellFormed = 0;
1.180 daniel 3722: ctxt->disableSAX = 1;
1.176 daniel 3723: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3724: }
3725: if (name != NULL)
3726: xmlFree(name);
3727: }
3728: cur++;
3729: }
3730:
3731: /*
1.135 daniel 3732: * Then PEReference entities are substituted.
3733: */
3734: if (c != stop) {
3735: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3736: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3737: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3738: ctxt->wellFormed = 0;
1.180 daniel 3739: ctxt->disableSAX = 1;
1.170 daniel 3740: xmlFree(buf);
1.135 daniel 3741: } else {
3742: NEXT;
3743: /*
3744: * NOTE: 4.4.7 Bypassed
3745: * When a general entity reference appears in the EntityValue in
3746: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3747: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3748: */
3749: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3750: 0, 0, 0);
3751: if (orig != NULL)
3752: *orig = buf;
3753: else
3754: xmlFree(buf);
1.24 daniel 3755: }
3756:
3757: return(ret);
3758: }
3759:
1.50 daniel 3760: /**
3761: * xmlParseAttValue:
3762: * @ctxt: an XML parser context
3763: *
3764: * parse a value for an attribute
1.78 daniel 3765: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3766: * will be handled later in xmlStringGetNodeList
1.29 daniel 3767: *
3768: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3769: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3770: *
1.129 daniel 3771: * 3.3.3 Attribute-Value Normalization:
3772: * Before the value of an attribute is passed to the application or
3773: * checked for validity, the XML processor must normalize it as follows:
3774: * - a character reference is processed by appending the referenced
3775: * character to the attribute value
3776: * - an entity reference is processed by recursively processing the
3777: * replacement text of the entity
3778: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3779: * appending #x20 to the normalized value, except that only a single
3780: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3781: * parsed entity or the literal entity value of an internal parsed entity
3782: * - other characters are processed by appending them to the normalized value
1.130 daniel 3783: * If the declared value is not CDATA, then the XML processor must further
3784: * process the normalized attribute value by discarding any leading and
3785: * trailing space (#x20) characters, and by replacing sequences of space
3786: * (#x20) characters by a single space (#x20) character.
3787: * All attributes for which no declaration has been read should be treated
3788: * by a non-validating parser as if declared CDATA.
1.129 daniel 3789: *
3790: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3791: */
3792:
1.123 daniel 3793: xmlChar *
1.55 daniel 3794: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3795: xmlChar limit = 0;
1.198 daniel 3796: xmlChar *buf = NULL;
3797: int len = 0;
3798: int buf_size = 0;
3799: int c, l;
1.129 daniel 3800: xmlChar *current = NULL;
3801: xmlEntityPtr ent;
3802:
1.29 daniel 3803:
1.91 daniel 3804: SHRINK;
1.151 daniel 3805: if (NXT(0) == '"') {
1.96 daniel 3806: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3807: limit = '"';
1.40 daniel 3808: NEXT;
1.151 daniel 3809: } else if (NXT(0) == '\'') {
1.129 daniel 3810: limit = '\'';
1.96 daniel 3811: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3812: NEXT;
1.29 daniel 3813: } else {
1.123 daniel 3814: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3815: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3816: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3817: ctxt->wellFormed = 0;
1.180 daniel 3818: ctxt->disableSAX = 1;
1.129 daniel 3819: return(NULL);
1.29 daniel 3820: }
3821:
1.129 daniel 3822: /*
3823: * allocate a translation buffer.
3824: */
1.198 daniel 3825: buf_size = XML_PARSER_BUFFER_SIZE;
3826: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
3827: if (buf == NULL) {
1.129 daniel 3828: perror("xmlParseAttValue: malloc failed");
3829: return(NULL);
3830: }
3831:
3832: /*
3833: * Ok loop until we reach one of the ending char or a size limit.
3834: */
1.198 daniel 3835: c = CUR_CHAR(l);
3836: while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
3837: if (c == 0) break;
3838: if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 3839: int val = xmlParseCharRef(ctxt);
1.198 daniel 3840: COPY_BUF(l,buf,len,val);
3841: NEXTL(l);
3842: } else if (c == '&') {
1.129 daniel 3843: ent = xmlParseEntityRef(ctxt);
3844: if ((ent != NULL) &&
3845: (ctxt->replaceEntities != 0)) {
1.185 daniel 3846: xmlChar *rep;
3847:
1.186 daniel 3848: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3849: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3850: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3851: if (rep != NULL) {
3852: current = rep;
3853: while (*current != 0) {
1.198 daniel 3854: buf[len++] = *current++;
3855: if (len > buf_size - 10) {
3856: growBuffer(buf);
1.186 daniel 3857: }
1.185 daniel 3858: }
1.186 daniel 3859: xmlFree(rep);
1.129 daniel 3860: }
1.186 daniel 3861: } else {
3862: if (ent->content != NULL)
1.198 daniel 3863: buf[len++] = ent->content[0];
1.129 daniel 3864: }
3865: } else if (ent != NULL) {
3866: int i = xmlStrlen(ent->name);
3867: const xmlChar *cur = ent->name;
3868:
1.186 daniel 3869: /*
3870: * This may look absurd but is needed to detect
3871: * entities problems
3872: */
3873: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3874: xmlChar *rep;
3875: rep = xmlStringDecodeEntities(ctxt, ent->content,
3876: XML_SUBSTITUTE_REF, 0, 0, 0);
3877: if (rep != NULL)
3878: xmlFree(rep);
3879: }
3880:
3881: /*
3882: * Just output the reference
3883: */
1.198 daniel 3884: buf[len++] = '&';
3885: if (len > buf_size - i - 10) {
3886: growBuffer(buf);
1.129 daniel 3887: }
3888: for (;i > 0;i--)
1.198 daniel 3889: buf[len++] = *cur++;
3890: buf[len++] = ';';
1.129 daniel 3891: }
3892: } else {
1.198 daniel 3893: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3894: COPY_BUF(l,buf,len,0x20);
3895: if (len > buf_size - 10) {
3896: growBuffer(buf);
1.129 daniel 3897: }
3898: } else {
1.198 daniel 3899: COPY_BUF(l,buf,len,c);
3900: if (len > buf_size - 10) {
3901: growBuffer(buf);
1.129 daniel 3902: }
3903: }
1.198 daniel 3904: NEXTL(l);
1.129 daniel 3905: }
1.198 daniel 3906: GROW;
3907: c = CUR_CHAR(l);
1.129 daniel 3908: }
1.198 daniel 3909: buf[len++] = 0;
1.152 daniel 3910: if (RAW == '<') {
1.129 daniel 3911: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3912: ctxt->sax->error(ctxt->userData,
3913: "Unescaped '<' not allowed in attributes values\n");
3914: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3915: ctxt->wellFormed = 0;
1.180 daniel 3916: ctxt->disableSAX = 1;
1.152 daniel 3917: } else if (RAW != limit) {
1.129 daniel 3918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3919: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3920: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3921: ctxt->wellFormed = 0;
1.180 daniel 3922: ctxt->disableSAX = 1;
1.129 daniel 3923: } else
3924: NEXT;
1.198 daniel 3925: return(buf);
1.29 daniel 3926: }
3927:
1.50 daniel 3928: /**
3929: * xmlParseSystemLiteral:
3930: * @ctxt: an XML parser context
3931: *
3932: * parse an XML Literal
1.21 daniel 3933: *
1.22 daniel 3934: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3935: *
3936: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3937: */
3938:
1.123 daniel 3939: xmlChar *
1.55 daniel 3940: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3941: xmlChar *buf = NULL;
3942: int len = 0;
1.140 daniel 3943: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3944: int cur, l;
1.135 daniel 3945: xmlChar stop;
1.168 daniel 3946: int state = ctxt->instate;
1.21 daniel 3947:
1.91 daniel 3948: SHRINK;
1.152 daniel 3949: if (RAW == '"') {
1.40 daniel 3950: NEXT;
1.135 daniel 3951: stop = '"';
1.152 daniel 3952: } else if (RAW == '\'') {
1.40 daniel 3953: NEXT;
1.135 daniel 3954: stop = '\'';
1.21 daniel 3955: } else {
1.55 daniel 3956: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3957: ctxt->sax->error(ctxt->userData,
3958: "SystemLiteral \" or ' expected\n");
1.123 daniel 3959: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3960: ctxt->wellFormed = 0;
1.180 daniel 3961: ctxt->disableSAX = 1;
1.135 daniel 3962: return(NULL);
1.21 daniel 3963: }
3964:
1.135 daniel 3965: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3966: if (buf == NULL) {
3967: fprintf(stderr, "malloc of %d byte failed\n", size);
3968: return(NULL);
3969: }
1.168 daniel 3970: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3971: cur = CUR_CHAR(l);
1.135 daniel 3972: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3973: if (len + 5 >= size) {
1.135 daniel 3974: size *= 2;
3975: buf = xmlRealloc(buf, size * sizeof(xmlChar));
3976: if (buf == NULL) {
3977: fprintf(stderr, "realloc of %d byte failed\n", size);
1.168 daniel 3978: ctxt->instate = state;
1.135 daniel 3979: return(NULL);
3980: }
3981: }
1.152 daniel 3982: COPY_BUF(l,buf,len,cur);
3983: NEXTL(l);
3984: cur = CUR_CHAR(l);
1.135 daniel 3985: if (cur == 0) {
3986: GROW;
3987: SHRINK;
1.152 daniel 3988: cur = CUR_CHAR(l);
1.135 daniel 3989: }
3990: }
3991: buf[len] = 0;
1.168 daniel 3992: ctxt->instate = state;
1.135 daniel 3993: if (!IS_CHAR(cur)) {
3994: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3995: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
3996: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
3997: ctxt->wellFormed = 0;
1.180 daniel 3998: ctxt->disableSAX = 1;
1.135 daniel 3999: } else {
4000: NEXT;
4001: }
4002: return(buf);
1.21 daniel 4003: }
4004:
1.50 daniel 4005: /**
4006: * xmlParsePubidLiteral:
4007: * @ctxt: an XML parser context
1.21 daniel 4008: *
1.50 daniel 4009: * parse an XML public literal
1.68 daniel 4010: *
4011: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4012: *
4013: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 4014: */
4015:
1.123 daniel 4016: xmlChar *
1.55 daniel 4017: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4018: xmlChar *buf = NULL;
4019: int len = 0;
1.140 daniel 4020: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 4021: xmlChar cur;
4022: xmlChar stop;
1.125 daniel 4023:
1.91 daniel 4024: SHRINK;
1.152 daniel 4025: if (RAW == '"') {
1.40 daniel 4026: NEXT;
1.135 daniel 4027: stop = '"';
1.152 daniel 4028: } else if (RAW == '\'') {
1.40 daniel 4029: NEXT;
1.135 daniel 4030: stop = '\'';
1.21 daniel 4031: } else {
1.55 daniel 4032: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4033: ctxt->sax->error(ctxt->userData,
4034: "SystemLiteral \" or ' expected\n");
1.123 daniel 4035: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4036: ctxt->wellFormed = 0;
1.180 daniel 4037: ctxt->disableSAX = 1;
1.135 daniel 4038: return(NULL);
4039: }
4040: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4041: if (buf == NULL) {
4042: fprintf(stderr, "malloc of %d byte failed\n", size);
4043: return(NULL);
4044: }
4045: cur = CUR;
4046: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4047: if (len + 1 >= size) {
4048: size *= 2;
4049: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4050: if (buf == NULL) {
4051: fprintf(stderr, "realloc of %d byte failed\n", size);
4052: return(NULL);
4053: }
4054: }
4055: buf[len++] = cur;
4056: NEXT;
4057: cur = CUR;
4058: if (cur == 0) {
4059: GROW;
4060: SHRINK;
4061: cur = CUR;
4062: }
4063: }
4064: buf[len] = 0;
4065: if (cur != stop) {
4066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4067: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4068: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4069: ctxt->wellFormed = 0;
1.180 daniel 4070: ctxt->disableSAX = 1;
1.135 daniel 4071: } else {
4072: NEXT;
1.21 daniel 4073: }
1.135 daniel 4074: return(buf);
1.21 daniel 4075: }
4076:
1.50 daniel 4077: /**
4078: * xmlParseCharData:
4079: * @ctxt: an XML parser context
4080: * @cdata: int indicating whether we are within a CDATA section
4081: *
4082: * parse a CharData section.
4083: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4084: *
1.151 daniel 4085: * The right angle bracket (>) may be represented using the string ">",
4086: * and must, for compatibility, be escaped using ">" or a character
4087: * reference when it appears in the string "]]>" in content, when that
4088: * string is not marking the end of a CDATA section.
4089: *
1.27 daniel 4090: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4091: */
4092:
1.55 daniel 4093: void
4094: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4095: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4096: int nbchar = 0;
1.152 daniel 4097: int cur, l;
1.27 daniel 4098:
1.91 daniel 4099: SHRINK;
1.152 daniel 4100: cur = CUR_CHAR(l);
1.190 daniel 4101: while (((cur != '<') || (ctxt->token == '<')) &&
4102: ((cur != '&') || (ctxt->token == '&')) &&
4103: (IS_CHAR(cur))) {
1.97 daniel 4104: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4105: (NXT(2) == '>')) {
4106: if (cdata) break;
4107: else {
4108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4109: ctxt->sax->error(ctxt->userData,
1.59 daniel 4110: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4111: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4112: /* Should this be relaxed ??? I see a "must here */
4113: ctxt->wellFormed = 0;
1.180 daniel 4114: ctxt->disableSAX = 1;
1.59 daniel 4115: }
4116: }
1.152 daniel 4117: COPY_BUF(l,buf,nbchar,cur);
4118: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4119: /*
4120: * Ok the segment is to be consumed as chars.
4121: */
1.171 daniel 4122: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4123: if (areBlanks(ctxt, buf, nbchar)) {
4124: if (ctxt->sax->ignorableWhitespace != NULL)
4125: ctxt->sax->ignorableWhitespace(ctxt->userData,
4126: buf, nbchar);
4127: } else {
4128: if (ctxt->sax->characters != NULL)
4129: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4130: }
4131: }
4132: nbchar = 0;
4133: }
1.152 daniel 4134: NEXTL(l);
4135: cur = CUR_CHAR(l);
1.27 daniel 4136: }
1.91 daniel 4137: if (nbchar != 0) {
4138: /*
4139: * Ok the segment is to be consumed as chars.
4140: */
1.171 daniel 4141: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4142: if (areBlanks(ctxt, buf, nbchar)) {
4143: if (ctxt->sax->ignorableWhitespace != NULL)
4144: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4145: } else {
4146: if (ctxt->sax->characters != NULL)
4147: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4148: }
4149: }
1.45 daniel 4150: }
1.27 daniel 4151: }
4152:
1.50 daniel 4153: /**
4154: * xmlParseExternalID:
4155: * @ctxt: an XML parser context
1.123 daniel 4156: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4157: * @strict: indicate whether we should restrict parsing to only
4158: * production [75], see NOTE below
1.50 daniel 4159: *
1.67 daniel 4160: * Parse an External ID or a Public ID
4161: *
4162: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4163: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4164: *
4165: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4166: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4167: *
4168: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4169: *
1.68 daniel 4170: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4171: * case publicID receives PubidLiteral, is strict is off
4172: * it is possible to return NULL and have publicID set.
1.22 daniel 4173: */
4174:
1.123 daniel 4175: xmlChar *
4176: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4177: xmlChar *URI = NULL;
1.22 daniel 4178:
1.91 daniel 4179: SHRINK;
1.152 daniel 4180: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4181: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4182: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4183: SKIP(6);
1.59 daniel 4184: if (!IS_BLANK(CUR)) {
4185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4186: ctxt->sax->error(ctxt->userData,
1.59 daniel 4187: "Space required after 'SYSTEM'\n");
1.123 daniel 4188: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4189: ctxt->wellFormed = 0;
1.180 daniel 4190: ctxt->disableSAX = 1;
1.59 daniel 4191: }
1.42 daniel 4192: SKIP_BLANKS;
1.39 daniel 4193: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4194: if (URI == NULL) {
1.55 daniel 4195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4196: ctxt->sax->error(ctxt->userData,
1.39 daniel 4197: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4198: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4199: ctxt->wellFormed = 0;
1.180 daniel 4200: ctxt->disableSAX = 1;
1.59 daniel 4201: }
1.152 daniel 4202: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4203: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4204: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4205: SKIP(6);
1.59 daniel 4206: if (!IS_BLANK(CUR)) {
4207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4208: ctxt->sax->error(ctxt->userData,
1.59 daniel 4209: "Space required after 'PUBLIC'\n");
1.123 daniel 4210: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4211: ctxt->wellFormed = 0;
1.180 daniel 4212: ctxt->disableSAX = 1;
1.59 daniel 4213: }
1.42 daniel 4214: SKIP_BLANKS;
1.39 daniel 4215: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4216: if (*publicID == NULL) {
1.55 daniel 4217: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4218: ctxt->sax->error(ctxt->userData,
1.39 daniel 4219: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4220: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4221: ctxt->wellFormed = 0;
1.180 daniel 4222: ctxt->disableSAX = 1;
1.59 daniel 4223: }
1.67 daniel 4224: if (strict) {
4225: /*
4226: * We don't handle [83] so "S SystemLiteral" is required.
4227: */
4228: if (!IS_BLANK(CUR)) {
4229: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4230: ctxt->sax->error(ctxt->userData,
1.67 daniel 4231: "Space required after the Public Identifier\n");
1.123 daniel 4232: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4233: ctxt->wellFormed = 0;
1.180 daniel 4234: ctxt->disableSAX = 1;
1.67 daniel 4235: }
4236: } else {
4237: /*
4238: * We handle [83] so we return immediately, if
4239: * "S SystemLiteral" is not detected. From a purely parsing
4240: * point of view that's a nice mess.
4241: */
1.135 daniel 4242: const xmlChar *ptr;
4243: GROW;
4244:
4245: ptr = CUR_PTR;
1.67 daniel 4246: if (!IS_BLANK(*ptr)) return(NULL);
4247:
4248: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4249: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4250: }
1.42 daniel 4251: SKIP_BLANKS;
1.39 daniel 4252: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4253: if (URI == NULL) {
1.55 daniel 4254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4255: ctxt->sax->error(ctxt->userData,
1.39 daniel 4256: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4257: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4258: ctxt->wellFormed = 0;
1.180 daniel 4259: ctxt->disableSAX = 1;
1.59 daniel 4260: }
1.22 daniel 4261: }
1.39 daniel 4262: return(URI);
1.22 daniel 4263: }
4264:
1.50 daniel 4265: /**
4266: * xmlParseComment:
1.69 daniel 4267: * @ctxt: an XML parser context
1.50 daniel 4268: *
1.3 veillard 4269: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4270: * The spec says that "For compatibility, the string "--" (double-hyphen)
4271: * must not occur within comments. "
1.22 daniel 4272: *
4273: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4274: */
1.72 daniel 4275: void
1.114 daniel 4276: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4277: xmlChar *buf = NULL;
1.195 daniel 4278: int len;
1.140 daniel 4279: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4280: int q, ql;
4281: int r, rl;
4282: int cur, l;
1.140 daniel 4283: xmlParserInputState state;
1.187 daniel 4284: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4285:
4286: /*
1.22 daniel 4287: * Check that there is a comment right here.
1.3 veillard 4288: */
1.152 daniel 4289: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4290: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4291:
1.140 daniel 4292: state = ctxt->instate;
1.97 daniel 4293: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4294: SHRINK;
1.40 daniel 4295: SKIP(4);
1.135 daniel 4296: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4297: if (buf == NULL) {
4298: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4299: ctxt->instate = state;
1.135 daniel 4300: return;
4301: }
1.152 daniel 4302: q = CUR_CHAR(ql);
4303: NEXTL(ql);
4304: r = CUR_CHAR(rl);
4305: NEXTL(rl);
4306: cur = CUR_CHAR(l);
1.195 daniel 4307: len = 0;
1.135 daniel 4308: while (IS_CHAR(cur) &&
4309: ((cur != '>') ||
4310: (r != '-') || (q != '-'))) {
1.195 daniel 4311: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4313: ctxt->sax->error(ctxt->userData,
1.38 daniel 4314: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4315: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4316: ctxt->wellFormed = 0;
1.180 daniel 4317: ctxt->disableSAX = 1;
1.59 daniel 4318: }
1.152 daniel 4319: if (len + 5 >= size) {
1.135 daniel 4320: size *= 2;
4321: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4322: if (buf == NULL) {
4323: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4324: ctxt->instate = state;
1.135 daniel 4325: return;
4326: }
4327: }
1.152 daniel 4328: COPY_BUF(ql,buf,len,q);
1.135 daniel 4329: q = r;
1.152 daniel 4330: ql = rl;
1.135 daniel 4331: r = cur;
1.152 daniel 4332: rl = l;
4333: NEXTL(l);
4334: cur = CUR_CHAR(l);
1.135 daniel 4335: if (cur == 0) {
4336: SHRINK;
4337: GROW;
1.152 daniel 4338: cur = CUR_CHAR(l);
1.135 daniel 4339: }
1.3 veillard 4340: }
1.135 daniel 4341: buf[len] = 0;
4342: if (!IS_CHAR(cur)) {
1.55 daniel 4343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4344: ctxt->sax->error(ctxt->userData,
1.135 daniel 4345: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4346: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4347: ctxt->wellFormed = 0;
1.180 daniel 4348: ctxt->disableSAX = 1;
1.178 daniel 4349: xmlFree(buf);
1.3 veillard 4350: } else {
1.187 daniel 4351: if (input != ctxt->input) {
4352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4353: ctxt->sax->error(ctxt->userData,
4354: "Comment doesn't start and stop in the same entity\n");
4355: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4356: ctxt->wellFormed = 0;
4357: ctxt->disableSAX = 1;
4358: }
1.40 daniel 4359: NEXT;
1.171 daniel 4360: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4361: (!ctxt->disableSAX))
1.135 daniel 4362: ctxt->sax->comment(ctxt->userData, buf);
4363: xmlFree(buf);
1.3 veillard 4364: }
1.140 daniel 4365: ctxt->instate = state;
1.3 veillard 4366: }
4367:
1.50 daniel 4368: /**
4369: * xmlParsePITarget:
4370: * @ctxt: an XML parser context
4371: *
4372: * parse the name of a PI
1.22 daniel 4373: *
4374: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4375: *
4376: * Returns the PITarget name or NULL
1.22 daniel 4377: */
4378:
1.123 daniel 4379: xmlChar *
1.55 daniel 4380: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4381: xmlChar *name;
1.22 daniel 4382:
4383: name = xmlParseName(ctxt);
1.139 daniel 4384: if ((name != NULL) &&
1.22 daniel 4385: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4386: ((name[1] == 'm') || (name[1] == 'M')) &&
4387: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4388: int i;
1.177 daniel 4389: if ((name[0] == 'x') && (name[1] == 'm') &&
4390: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4392: ctxt->sax->error(ctxt->userData,
4393: "XML declaration allowed only at the start of the document\n");
4394: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4395: ctxt->wellFormed = 0;
1.180 daniel 4396: ctxt->disableSAX = 1;
1.151 daniel 4397: return(name);
4398: } else if (name[3] == 0) {
4399: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4400: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4401: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4402: ctxt->wellFormed = 0;
1.180 daniel 4403: ctxt->disableSAX = 1;
1.151 daniel 4404: return(name);
4405: }
1.139 daniel 4406: for (i = 0;;i++) {
4407: if (xmlW3CPIs[i] == NULL) break;
4408: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4409: return(name);
4410: }
4411: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4412: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4413: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4414: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4415: }
1.22 daniel 4416: }
4417: return(name);
4418: }
4419:
1.50 daniel 4420: /**
4421: * xmlParsePI:
4422: * @ctxt: an XML parser context
4423: *
4424: * parse an XML Processing Instruction.
1.22 daniel 4425: *
4426: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4427: *
1.69 daniel 4428: * The processing is transfered to SAX once parsed.
1.3 veillard 4429: */
4430:
1.55 daniel 4431: void
4432: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4433: xmlChar *buf = NULL;
4434: int len = 0;
1.140 daniel 4435: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4436: int cur, l;
1.123 daniel 4437: xmlChar *target;
1.140 daniel 4438: xmlParserInputState state;
1.22 daniel 4439:
1.152 daniel 4440: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4441: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4442: state = ctxt->instate;
4443: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4444: /*
4445: * this is a Processing Instruction.
4446: */
1.40 daniel 4447: SKIP(2);
1.91 daniel 4448: SHRINK;
1.3 veillard 4449:
4450: /*
1.22 daniel 4451: * Parse the target name and check for special support like
4452: * namespace.
1.3 veillard 4453: */
1.22 daniel 4454: target = xmlParsePITarget(ctxt);
4455: if (target != NULL) {
1.156 daniel 4456: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4457: if (input != ctxt->input) {
4458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4459: ctxt->sax->error(ctxt->userData,
4460: "PI declaration doesn't start and stop in the same entity\n");
4461: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4462: ctxt->wellFormed = 0;
4463: ctxt->disableSAX = 1;
4464: }
1.156 daniel 4465: SKIP(2);
4466:
4467: /*
4468: * SAX: PI detected.
4469: */
1.171 daniel 4470: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4471: (ctxt->sax->processingInstruction != NULL))
4472: ctxt->sax->processingInstruction(ctxt->userData,
4473: target, NULL);
4474: ctxt->instate = state;
1.170 daniel 4475: xmlFree(target);
1.156 daniel 4476: return;
4477: }
1.135 daniel 4478: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4479: if (buf == NULL) {
4480: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4481: ctxt->instate = state;
1.135 daniel 4482: return;
4483: }
4484: cur = CUR;
4485: if (!IS_BLANK(cur)) {
1.114 daniel 4486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487: ctxt->sax->error(ctxt->userData,
4488: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4489: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4490: ctxt->wellFormed = 0;
1.180 daniel 4491: ctxt->disableSAX = 1;
1.114 daniel 4492: }
4493: SKIP_BLANKS;
1.152 daniel 4494: cur = CUR_CHAR(l);
1.135 daniel 4495: while (IS_CHAR(cur) &&
4496: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4497: if (len + 5 >= size) {
1.135 daniel 4498: size *= 2;
4499: buf = xmlRealloc(buf, size * sizeof(xmlChar));
4500: if (buf == NULL) {
4501: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4502: ctxt->instate = state;
1.135 daniel 4503: return;
4504: }
4505: }
1.152 daniel 4506: COPY_BUF(l,buf,len,cur);
4507: NEXTL(l);
4508: cur = CUR_CHAR(l);
1.135 daniel 4509: if (cur == 0) {
4510: SHRINK;
4511: GROW;
1.152 daniel 4512: cur = CUR_CHAR(l);
1.135 daniel 4513: }
4514: }
4515: buf[len] = 0;
1.152 daniel 4516: if (cur != '?') {
1.72 daniel 4517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4518: ctxt->sax->error(ctxt->userData,
1.72 daniel 4519: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4520: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4521: ctxt->wellFormed = 0;
1.180 daniel 4522: ctxt->disableSAX = 1;
1.22 daniel 4523: } else {
1.187 daniel 4524: if (input != ctxt->input) {
4525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4526: ctxt->sax->error(ctxt->userData,
4527: "PI declaration doesn't start and stop in the same entity\n");
4528: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4529: ctxt->wellFormed = 0;
4530: ctxt->disableSAX = 1;
4531: }
1.72 daniel 4532: SKIP(2);
1.44 daniel 4533:
1.72 daniel 4534: /*
4535: * SAX: PI detected.
4536: */
1.171 daniel 4537: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4538: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4539: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4540: target, buf);
1.22 daniel 4541: }
1.135 daniel 4542: xmlFree(buf);
1.119 daniel 4543: xmlFree(target);
1.3 veillard 4544: } else {
1.55 daniel 4545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4546: ctxt->sax->error(ctxt->userData,
4547: "xmlParsePI : no target name\n");
1.123 daniel 4548: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4549: ctxt->wellFormed = 0;
1.180 daniel 4550: ctxt->disableSAX = 1;
1.22 daniel 4551: }
1.140 daniel 4552: ctxt->instate = state;
1.22 daniel 4553: }
4554: }
4555:
1.50 daniel 4556: /**
4557: * xmlParseNotationDecl:
4558: * @ctxt: an XML parser context
4559: *
4560: * parse a notation declaration
1.22 daniel 4561: *
4562: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4563: *
4564: * Hence there is actually 3 choices:
4565: * 'PUBLIC' S PubidLiteral
4566: * 'PUBLIC' S PubidLiteral S SystemLiteral
4567: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4568: *
1.67 daniel 4569: * See the NOTE on xmlParseExternalID().
1.22 daniel 4570: */
4571:
1.55 daniel 4572: void
4573: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4574: xmlChar *name;
4575: xmlChar *Pubid;
4576: xmlChar *Systemid;
1.22 daniel 4577:
1.152 daniel 4578: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4579: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4580: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4581: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4582: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4583: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4584: SHRINK;
1.40 daniel 4585: SKIP(10);
1.67 daniel 4586: if (!IS_BLANK(CUR)) {
4587: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4588: ctxt->sax->error(ctxt->userData,
4589: "Space required after '<!NOTATION'\n");
1.123 daniel 4590: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4591: ctxt->wellFormed = 0;
1.180 daniel 4592: ctxt->disableSAX = 1;
1.67 daniel 4593: return;
4594: }
4595: SKIP_BLANKS;
1.22 daniel 4596:
4597: name = xmlParseName(ctxt);
4598: if (name == NULL) {
1.55 daniel 4599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4600: ctxt->sax->error(ctxt->userData,
4601: "NOTATION: Name expected here\n");
1.123 daniel 4602: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4603: ctxt->wellFormed = 0;
1.180 daniel 4604: ctxt->disableSAX = 1;
1.67 daniel 4605: return;
4606: }
4607: if (!IS_BLANK(CUR)) {
4608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4609: ctxt->sax->error(ctxt->userData,
1.67 daniel 4610: "Space required after the NOTATION name'\n");
1.123 daniel 4611: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4612: ctxt->wellFormed = 0;
1.180 daniel 4613: ctxt->disableSAX = 1;
1.22 daniel 4614: return;
4615: }
1.42 daniel 4616: SKIP_BLANKS;
1.67 daniel 4617:
1.22 daniel 4618: /*
1.67 daniel 4619: * Parse the IDs.
1.22 daniel 4620: */
1.160 daniel 4621: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4622: SKIP_BLANKS;
4623:
1.152 daniel 4624: if (RAW == '>') {
1.187 daniel 4625: if (input != ctxt->input) {
4626: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4627: ctxt->sax->error(ctxt->userData,
4628: "Notation declaration doesn't start and stop in the same entity\n");
4629: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4630: ctxt->wellFormed = 0;
4631: ctxt->disableSAX = 1;
4632: }
1.40 daniel 4633: NEXT;
1.171 daniel 4634: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4635: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4636: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4637: } else {
4638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4639: ctxt->sax->error(ctxt->userData,
1.67 daniel 4640: "'>' required to close NOTATION declaration\n");
1.123 daniel 4641: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4642: ctxt->wellFormed = 0;
1.180 daniel 4643: ctxt->disableSAX = 1;
1.67 daniel 4644: }
1.119 daniel 4645: xmlFree(name);
4646: if (Systemid != NULL) xmlFree(Systemid);
4647: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4648: }
4649: }
4650:
1.50 daniel 4651: /**
4652: * xmlParseEntityDecl:
4653: * @ctxt: an XML parser context
4654: *
4655: * parse <!ENTITY declarations
1.22 daniel 4656: *
4657: * [70] EntityDecl ::= GEDecl | PEDecl
4658: *
4659: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4660: *
4661: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4662: *
4663: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4664: *
4665: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4666: *
4667: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4668: *
4669: * [ VC: Notation Declared ]
1.116 daniel 4670: * The Name must match the declared name of a notation.
1.22 daniel 4671: */
4672:
1.55 daniel 4673: void
4674: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4675: xmlChar *name = NULL;
4676: xmlChar *value = NULL;
4677: xmlChar *URI = NULL, *literal = NULL;
4678: xmlChar *ndata = NULL;
1.39 daniel 4679: int isParameter = 0;
1.123 daniel 4680: xmlChar *orig = NULL;
1.22 daniel 4681:
1.94 daniel 4682: GROW;
1.152 daniel 4683: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4684: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4685: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4686: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4687: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4688: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4689: SHRINK;
1.40 daniel 4690: SKIP(8);
1.59 daniel 4691: if (!IS_BLANK(CUR)) {
4692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4693: ctxt->sax->error(ctxt->userData,
4694: "Space required after '<!ENTITY'\n");
1.123 daniel 4695: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4696: ctxt->wellFormed = 0;
1.180 daniel 4697: ctxt->disableSAX = 1;
1.59 daniel 4698: }
4699: SKIP_BLANKS;
1.40 daniel 4700:
1.152 daniel 4701: if (RAW == '%') {
1.40 daniel 4702: NEXT;
1.59 daniel 4703: if (!IS_BLANK(CUR)) {
4704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4705: ctxt->sax->error(ctxt->userData,
4706: "Space required after '%'\n");
1.123 daniel 4707: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4708: ctxt->wellFormed = 0;
1.180 daniel 4709: ctxt->disableSAX = 1;
1.59 daniel 4710: }
1.42 daniel 4711: SKIP_BLANKS;
1.39 daniel 4712: isParameter = 1;
1.22 daniel 4713: }
4714:
4715: name = xmlParseName(ctxt);
1.24 daniel 4716: if (name == NULL) {
1.55 daniel 4717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4718: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4719: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4720: ctxt->wellFormed = 0;
1.180 daniel 4721: ctxt->disableSAX = 1;
1.24 daniel 4722: return;
4723: }
1.59 daniel 4724: if (!IS_BLANK(CUR)) {
4725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4726: ctxt->sax->error(ctxt->userData,
1.59 daniel 4727: "Space required after the entity name\n");
1.123 daniel 4728: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4729: ctxt->wellFormed = 0;
1.180 daniel 4730: ctxt->disableSAX = 1;
1.59 daniel 4731: }
1.42 daniel 4732: SKIP_BLANKS;
1.24 daniel 4733:
1.22 daniel 4734: /*
1.68 daniel 4735: * handle the various case of definitions...
1.22 daniel 4736: */
1.39 daniel 4737: if (isParameter) {
1.152 daniel 4738: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4739: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4740: if (value) {
1.171 daniel 4741: if ((ctxt->sax != NULL) &&
4742: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4743: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4744: XML_INTERNAL_PARAMETER_ENTITY,
4745: NULL, NULL, value);
4746: }
1.24 daniel 4747: else {
1.67 daniel 4748: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4749: if ((URI == NULL) && (literal == NULL)) {
4750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4751: ctxt->sax->error(ctxt->userData,
4752: "Entity value required\n");
4753: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4754: ctxt->wellFormed = 0;
1.180 daniel 4755: ctxt->disableSAX = 1;
1.169 daniel 4756: }
1.39 daniel 4757: if (URI) {
1.193 daniel 4758: xmlURIPtr uri;
4759:
4760: uri = xmlParseURI((const char *) URI);
4761: if (uri == NULL) {
4762: if ((ctxt->sax != NULL) &&
4763: (!ctxt->disableSAX) &&
4764: (ctxt->sax->error != NULL))
4765: ctxt->sax->error(ctxt->userData,
4766: "Invalid URI: %s\n", URI);
4767: ctxt->wellFormed = 0;
4768: ctxt->errNo = XML_ERR_INVALID_URI;
4769: } else {
4770: if (uri->fragment != NULL) {
4771: if ((ctxt->sax != NULL) &&
4772: (!ctxt->disableSAX) &&
4773: (ctxt->sax->error != NULL))
4774: ctxt->sax->error(ctxt->userData,
4775: "Fragment not allowed: %s\n", URI);
4776: ctxt->wellFormed = 0;
4777: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4778: } else {
4779: if ((ctxt->sax != NULL) &&
4780: (!ctxt->disableSAX) &&
4781: (ctxt->sax->entityDecl != NULL))
4782: ctxt->sax->entityDecl(ctxt->userData, name,
4783: XML_EXTERNAL_PARAMETER_ENTITY,
4784: literal, URI, NULL);
4785: }
4786: xmlFreeURI(uri);
4787: }
1.39 daniel 4788: }
1.24 daniel 4789: }
4790: } else {
1.152 daniel 4791: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4792: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4793: if ((ctxt->sax != NULL) &&
4794: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4795: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4796: XML_INTERNAL_GENERAL_ENTITY,
4797: NULL, NULL, value);
4798: } else {
1.67 daniel 4799: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4800: if ((URI == NULL) && (literal == NULL)) {
4801: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4802: ctxt->sax->error(ctxt->userData,
4803: "Entity value required\n");
4804: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4805: ctxt->wellFormed = 0;
1.180 daniel 4806: ctxt->disableSAX = 1;
1.169 daniel 4807: }
1.193 daniel 4808: if (URI) {
4809: xmlURIPtr uri;
4810:
4811: uri = xmlParseURI((const char *)URI);
4812: if (uri == NULL) {
4813: if ((ctxt->sax != NULL) &&
4814: (!ctxt->disableSAX) &&
4815: (ctxt->sax->error != NULL))
4816: ctxt->sax->error(ctxt->userData,
4817: "Invalid URI: %s\n", URI);
4818: ctxt->wellFormed = 0;
4819: ctxt->errNo = XML_ERR_INVALID_URI;
4820: } else {
4821: if (uri->fragment != NULL) {
4822: if ((ctxt->sax != NULL) &&
4823: (!ctxt->disableSAX) &&
4824: (ctxt->sax->error != NULL))
4825: ctxt->sax->error(ctxt->userData,
4826: "Fragment not allowed: %s\n", URI);
4827: ctxt->wellFormed = 0;
4828: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4829: }
4830: xmlFreeURI(uri);
4831: }
4832: }
1.152 daniel 4833: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4834: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4835: ctxt->sax->error(ctxt->userData,
1.59 daniel 4836: "Space required before 'NDATA'\n");
1.123 daniel 4837: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4838: ctxt->wellFormed = 0;
1.180 daniel 4839: ctxt->disableSAX = 1;
1.59 daniel 4840: }
1.42 daniel 4841: SKIP_BLANKS;
1.152 daniel 4842: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4843: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4844: (NXT(4) == 'A')) {
4845: SKIP(5);
1.59 daniel 4846: if (!IS_BLANK(CUR)) {
4847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4848: ctxt->sax->error(ctxt->userData,
1.59 daniel 4849: "Space required after 'NDATA'\n");
1.123 daniel 4850: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4851: ctxt->wellFormed = 0;
1.180 daniel 4852: ctxt->disableSAX = 1;
1.59 daniel 4853: }
1.42 daniel 4854: SKIP_BLANKS;
1.24 daniel 4855: ndata = xmlParseName(ctxt);
1.171 daniel 4856: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4857: (ctxt->sax->unparsedEntityDecl != NULL))
4858: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4859: literal, URI, ndata);
4860: } else {
1.171 daniel 4861: if ((ctxt->sax != NULL) &&
4862: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4863: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4864: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4865: literal, URI, NULL);
1.24 daniel 4866: }
4867: }
4868: }
1.42 daniel 4869: SKIP_BLANKS;
1.152 daniel 4870: if (RAW != '>') {
1.55 daniel 4871: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4872: ctxt->sax->error(ctxt->userData,
1.31 daniel 4873: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4874: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4875: ctxt->wellFormed = 0;
1.180 daniel 4876: ctxt->disableSAX = 1;
1.187 daniel 4877: } else {
4878: if (input != ctxt->input) {
4879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4880: ctxt->sax->error(ctxt->userData,
4881: "Entity declaration doesn't start and stop in the same entity\n");
4882: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4883: ctxt->wellFormed = 0;
4884: ctxt->disableSAX = 1;
4885: }
1.40 daniel 4886: NEXT;
1.187 daniel 4887: }
1.78 daniel 4888: if (orig != NULL) {
4889: /*
1.98 daniel 4890: * Ugly mechanism to save the raw entity value.
1.78 daniel 4891: */
4892: xmlEntityPtr cur = NULL;
4893:
1.98 daniel 4894: if (isParameter) {
4895: if ((ctxt->sax != NULL) &&
4896: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4897: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4898: } else {
4899: if ((ctxt->sax != NULL) &&
4900: (ctxt->sax->getEntity != NULL))
1.120 daniel 4901: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4902: }
4903: if (cur != NULL) {
4904: if (cur->orig != NULL)
1.119 daniel 4905: xmlFree(orig);
1.98 daniel 4906: else
4907: cur->orig = orig;
4908: } else
1.119 daniel 4909: xmlFree(orig);
1.78 daniel 4910: }
1.119 daniel 4911: if (name != NULL) xmlFree(name);
4912: if (value != NULL) xmlFree(value);
4913: if (URI != NULL) xmlFree(URI);
4914: if (literal != NULL) xmlFree(literal);
4915: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4916: }
4917: }
4918:
1.50 daniel 4919: /**
1.59 daniel 4920: * xmlParseDefaultDecl:
4921: * @ctxt: an XML parser context
4922: * @value: Receive a possible fixed default value for the attribute
4923: *
4924: * Parse an attribute default declaration
4925: *
4926: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4927: *
1.99 daniel 4928: * [ VC: Required Attribute ]
1.117 daniel 4929: * if the default declaration is the keyword #REQUIRED, then the
4930: * attribute must be specified for all elements of the type in the
4931: * attribute-list declaration.
1.99 daniel 4932: *
4933: * [ VC: Attribute Default Legal ]
1.102 daniel 4934: * The declared default value must meet the lexical constraints of
4935: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4936: *
4937: * [ VC: Fixed Attribute Default ]
1.117 daniel 4938: * if an attribute has a default value declared with the #FIXED
4939: * keyword, instances of that attribute must match the default value.
1.99 daniel 4940: *
4941: * [ WFC: No < in Attribute Values ]
4942: * handled in xmlParseAttValue()
4943: *
1.59 daniel 4944: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4945: * or XML_ATTRIBUTE_FIXED.
4946: */
4947:
4948: int
1.123 daniel 4949: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4950: int val;
1.123 daniel 4951: xmlChar *ret;
1.59 daniel 4952:
4953: *value = NULL;
1.152 daniel 4954: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4955: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4956: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4957: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4958: (NXT(8) == 'D')) {
4959: SKIP(9);
4960: return(XML_ATTRIBUTE_REQUIRED);
4961: }
1.152 daniel 4962: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4963: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4964: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4965: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4966: SKIP(8);
4967: return(XML_ATTRIBUTE_IMPLIED);
4968: }
4969: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4970: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4971: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4972: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4973: SKIP(6);
4974: val = XML_ATTRIBUTE_FIXED;
4975: if (!IS_BLANK(CUR)) {
4976: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4977: ctxt->sax->error(ctxt->userData,
4978: "Space required after '#FIXED'\n");
1.123 daniel 4979: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4980: ctxt->wellFormed = 0;
1.180 daniel 4981: ctxt->disableSAX = 1;
1.59 daniel 4982: }
4983: SKIP_BLANKS;
4984: }
4985: ret = xmlParseAttValue(ctxt);
1.96 daniel 4986: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 4987: if (ret == NULL) {
4988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4989: ctxt->sax->error(ctxt->userData,
1.59 daniel 4990: "Attribute default value declaration error\n");
4991: ctxt->wellFormed = 0;
1.180 daniel 4992: ctxt->disableSAX = 1;
1.59 daniel 4993: } else
4994: *value = ret;
4995: return(val);
4996: }
4997:
4998: /**
1.66 daniel 4999: * xmlParseNotationType:
5000: * @ctxt: an XML parser context
5001: *
5002: * parse an Notation attribute type.
5003: *
1.99 daniel 5004: * Note: the leading 'NOTATION' S part has already being parsed...
5005: *
1.66 daniel 5006: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5007: *
1.99 daniel 5008: * [ VC: Notation Attributes ]
1.117 daniel 5009: * Values of this type must match one of the notation names included
1.99 daniel 5010: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 5011: *
5012: * Returns: the notation attribute tree built while parsing
5013: */
5014:
5015: xmlEnumerationPtr
5016: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5017: xmlChar *name;
1.66 daniel 5018: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5019:
1.152 daniel 5020: if (RAW != '(') {
1.66 daniel 5021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5022: ctxt->sax->error(ctxt->userData,
5023: "'(' required to start 'NOTATION'\n");
1.123 daniel 5024: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 5025: ctxt->wellFormed = 0;
1.180 daniel 5026: ctxt->disableSAX = 1;
1.66 daniel 5027: return(NULL);
5028: }
1.91 daniel 5029: SHRINK;
1.66 daniel 5030: do {
5031: NEXT;
5032: SKIP_BLANKS;
5033: name = xmlParseName(ctxt);
5034: if (name == NULL) {
5035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5036: ctxt->sax->error(ctxt->userData,
1.66 daniel 5037: "Name expected in NOTATION declaration\n");
1.123 daniel 5038: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 5039: ctxt->wellFormed = 0;
1.180 daniel 5040: ctxt->disableSAX = 1;
1.66 daniel 5041: return(ret);
5042: }
5043: cur = xmlCreateEnumeration(name);
1.119 daniel 5044: xmlFree(name);
1.66 daniel 5045: if (cur == NULL) return(ret);
5046: if (last == NULL) ret = last = cur;
5047: else {
5048: last->next = cur;
5049: last = cur;
5050: }
5051: SKIP_BLANKS;
1.152 daniel 5052: } while (RAW == '|');
5053: if (RAW != ')') {
1.66 daniel 5054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5055: ctxt->sax->error(ctxt->userData,
1.66 daniel 5056: "')' required to finish NOTATION declaration\n");
1.123 daniel 5057: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5058: ctxt->wellFormed = 0;
1.180 daniel 5059: ctxt->disableSAX = 1;
1.170 daniel 5060: if ((last != NULL) && (last != ret))
5061: xmlFreeEnumeration(last);
1.66 daniel 5062: return(ret);
5063: }
5064: NEXT;
5065: return(ret);
5066: }
5067:
5068: /**
5069: * xmlParseEnumerationType:
5070: * @ctxt: an XML parser context
5071: *
5072: * parse an Enumeration attribute type.
5073: *
5074: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5075: *
1.99 daniel 5076: * [ VC: Enumeration ]
1.117 daniel 5077: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5078: * the declaration
5079: *
1.66 daniel 5080: * Returns: the enumeration attribute tree built while parsing
5081: */
5082:
5083: xmlEnumerationPtr
5084: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5085: xmlChar *name;
1.66 daniel 5086: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5087:
1.152 daniel 5088: if (RAW != '(') {
1.66 daniel 5089: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5090: ctxt->sax->error(ctxt->userData,
1.66 daniel 5091: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5092: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5093: ctxt->wellFormed = 0;
1.180 daniel 5094: ctxt->disableSAX = 1;
1.66 daniel 5095: return(NULL);
5096: }
1.91 daniel 5097: SHRINK;
1.66 daniel 5098: do {
5099: NEXT;
5100: SKIP_BLANKS;
5101: name = xmlParseNmtoken(ctxt);
5102: if (name == NULL) {
5103: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5104: ctxt->sax->error(ctxt->userData,
1.66 daniel 5105: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5106: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5107: ctxt->wellFormed = 0;
1.180 daniel 5108: ctxt->disableSAX = 1;
1.66 daniel 5109: return(ret);
5110: }
5111: cur = xmlCreateEnumeration(name);
1.119 daniel 5112: xmlFree(name);
1.66 daniel 5113: if (cur == NULL) return(ret);
5114: if (last == NULL) ret = last = cur;
5115: else {
5116: last->next = cur;
5117: last = cur;
5118: }
5119: SKIP_BLANKS;
1.152 daniel 5120: } while (RAW == '|');
5121: if (RAW != ')') {
1.66 daniel 5122: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5123: ctxt->sax->error(ctxt->userData,
1.66 daniel 5124: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5125: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5126: ctxt->wellFormed = 0;
1.180 daniel 5127: ctxt->disableSAX = 1;
1.66 daniel 5128: return(ret);
5129: }
5130: NEXT;
5131: return(ret);
5132: }
5133:
5134: /**
1.50 daniel 5135: * xmlParseEnumeratedType:
5136: * @ctxt: an XML parser context
1.66 daniel 5137: * @tree: the enumeration tree built while parsing
1.50 daniel 5138: *
1.66 daniel 5139: * parse an Enumerated attribute type.
1.22 daniel 5140: *
5141: * [57] EnumeratedType ::= NotationType | Enumeration
5142: *
5143: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5144: *
1.50 daniel 5145: *
1.66 daniel 5146: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5147: */
5148:
1.66 daniel 5149: int
5150: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5151: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5152: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5153: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5154: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5155: SKIP(8);
5156: if (!IS_BLANK(CUR)) {
5157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5158: ctxt->sax->error(ctxt->userData,
5159: "Space required after 'NOTATION'\n");
1.123 daniel 5160: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5161: ctxt->wellFormed = 0;
1.180 daniel 5162: ctxt->disableSAX = 1;
1.66 daniel 5163: return(0);
5164: }
5165: SKIP_BLANKS;
5166: *tree = xmlParseNotationType(ctxt);
5167: if (*tree == NULL) return(0);
5168: return(XML_ATTRIBUTE_NOTATION);
5169: }
5170: *tree = xmlParseEnumerationType(ctxt);
5171: if (*tree == NULL) return(0);
5172: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5173: }
5174:
1.50 daniel 5175: /**
5176: * xmlParseAttributeType:
5177: * @ctxt: an XML parser context
1.66 daniel 5178: * @tree: the enumeration tree built while parsing
1.50 daniel 5179: *
1.59 daniel 5180: * parse the Attribute list def for an element
1.22 daniel 5181: *
5182: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5183: *
5184: * [55] StringType ::= 'CDATA'
5185: *
5186: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5187: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5188: *
1.102 daniel 5189: * Validity constraints for attribute values syntax are checked in
5190: * xmlValidateAttributeValue()
5191: *
1.99 daniel 5192: * [ VC: ID ]
1.117 daniel 5193: * Values of type ID must match the Name production. A name must not
1.99 daniel 5194: * appear more than once in an XML document as a value of this type;
5195: * i.e., ID values must uniquely identify the elements which bear them.
5196: *
5197: * [ VC: One ID per Element Type ]
1.117 daniel 5198: * No element type may have more than one ID attribute specified.
1.99 daniel 5199: *
5200: * [ VC: ID Attribute Default ]
1.117 daniel 5201: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5202: *
5203: * [ VC: IDREF ]
1.102 daniel 5204: * Values of type IDREF must match the Name production, and values
1.140 daniel 5205: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5206: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5207: * values must match the value of some ID attribute.
5208: *
5209: * [ VC: Entity Name ]
1.102 daniel 5210: * Values of type ENTITY must match the Name production, values
1.140 daniel 5211: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5212: * name of an unparsed entity declared in the DTD.
1.99 daniel 5213: *
5214: * [ VC: Name Token ]
1.102 daniel 5215: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5216: * of type NMTOKENS must match Nmtokens.
5217: *
1.69 daniel 5218: * Returns the attribute type
1.22 daniel 5219: */
1.59 daniel 5220: int
1.66 daniel 5221: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5222: SHRINK;
1.152 daniel 5223: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5224: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5225: (NXT(4) == 'A')) {
5226: SKIP(5);
1.66 daniel 5227: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5228: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5229: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5230: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5231: SKIP(6);
5232: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5233: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5234: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5235: (NXT(4) == 'F')) {
5236: SKIP(5);
1.59 daniel 5237: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5238: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5239: SKIP(2);
5240: return(XML_ATTRIBUTE_ID);
1.152 daniel 5241: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5242: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5243: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5244: SKIP(6);
1.59 daniel 5245: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5246: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5247: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5248: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5249: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5250: SKIP(8);
1.59 daniel 5251: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5252: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5253: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5254: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5255: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5256: SKIP(8);
5257: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5258: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5259: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5260: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5261: (NXT(6) == 'N')) {
5262: SKIP(7);
1.59 daniel 5263: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5264: }
1.66 daniel 5265: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5266: }
5267:
1.50 daniel 5268: /**
5269: * xmlParseAttributeListDecl:
5270: * @ctxt: an XML parser context
5271: *
5272: * : parse the Attribute list def for an element
1.22 daniel 5273: *
5274: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5275: *
5276: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5277: *
1.22 daniel 5278: */
1.55 daniel 5279: void
5280: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5281: xmlChar *elemName;
5282: xmlChar *attrName;
1.103 daniel 5283: xmlEnumerationPtr tree;
1.22 daniel 5284:
1.152 daniel 5285: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5286: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5287: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5288: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5289: (NXT(8) == 'T')) {
1.187 daniel 5290: xmlParserInputPtr input = ctxt->input;
5291:
1.40 daniel 5292: SKIP(9);
1.59 daniel 5293: if (!IS_BLANK(CUR)) {
5294: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5295: ctxt->sax->error(ctxt->userData,
5296: "Space required after '<!ATTLIST'\n");
1.123 daniel 5297: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5298: ctxt->wellFormed = 0;
1.180 daniel 5299: ctxt->disableSAX = 1;
1.59 daniel 5300: }
1.42 daniel 5301: SKIP_BLANKS;
1.59 daniel 5302: elemName = xmlParseName(ctxt);
5303: if (elemName == NULL) {
1.55 daniel 5304: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5305: ctxt->sax->error(ctxt->userData,
5306: "ATTLIST: no name for Element\n");
1.123 daniel 5307: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5308: ctxt->wellFormed = 0;
1.180 daniel 5309: ctxt->disableSAX = 1;
1.22 daniel 5310: return;
5311: }
1.42 daniel 5312: SKIP_BLANKS;
1.152 daniel 5313: while (RAW != '>') {
1.123 daniel 5314: const xmlChar *check = CUR_PTR;
1.59 daniel 5315: int type;
5316: int def;
1.123 daniel 5317: xmlChar *defaultValue = NULL;
1.59 daniel 5318:
1.103 daniel 5319: tree = NULL;
1.59 daniel 5320: attrName = xmlParseName(ctxt);
5321: if (attrName == NULL) {
5322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5323: ctxt->sax->error(ctxt->userData,
5324: "ATTLIST: no name for Attribute\n");
1.123 daniel 5325: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5326: ctxt->wellFormed = 0;
1.180 daniel 5327: ctxt->disableSAX = 1;
1.59 daniel 5328: break;
5329: }
1.97 daniel 5330: GROW;
1.59 daniel 5331: if (!IS_BLANK(CUR)) {
5332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5333: ctxt->sax->error(ctxt->userData,
1.59 daniel 5334: "Space required after the attribute name\n");
1.123 daniel 5335: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5336: ctxt->wellFormed = 0;
1.180 daniel 5337: ctxt->disableSAX = 1;
1.170 daniel 5338: if (attrName != NULL)
5339: xmlFree(attrName);
5340: if (defaultValue != NULL)
5341: xmlFree(defaultValue);
1.59 daniel 5342: break;
5343: }
5344: SKIP_BLANKS;
5345:
1.66 daniel 5346: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5347: if (type <= 0) {
5348: if (attrName != NULL)
5349: xmlFree(attrName);
5350: if (defaultValue != NULL)
5351: xmlFree(defaultValue);
5352: break;
5353: }
1.22 daniel 5354:
1.97 daniel 5355: GROW;
1.59 daniel 5356: if (!IS_BLANK(CUR)) {
5357: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5358: ctxt->sax->error(ctxt->userData,
1.59 daniel 5359: "Space required after the attribute type\n");
1.123 daniel 5360: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5361: ctxt->wellFormed = 0;
1.180 daniel 5362: ctxt->disableSAX = 1;
1.170 daniel 5363: if (attrName != NULL)
5364: xmlFree(attrName);
5365: if (defaultValue != NULL)
5366: xmlFree(defaultValue);
5367: if (tree != NULL)
5368: xmlFreeEnumeration(tree);
1.59 daniel 5369: break;
5370: }
1.42 daniel 5371: SKIP_BLANKS;
1.59 daniel 5372:
5373: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5374: if (def <= 0) {
5375: if (attrName != NULL)
5376: xmlFree(attrName);
5377: if (defaultValue != NULL)
5378: xmlFree(defaultValue);
5379: if (tree != NULL)
5380: xmlFreeEnumeration(tree);
5381: break;
5382: }
1.59 daniel 5383:
1.97 daniel 5384: GROW;
1.152 daniel 5385: if (RAW != '>') {
1.59 daniel 5386: if (!IS_BLANK(CUR)) {
5387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5388: ctxt->sax->error(ctxt->userData,
1.59 daniel 5389: "Space required after the attribute default value\n");
1.123 daniel 5390: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5391: ctxt->wellFormed = 0;
1.180 daniel 5392: ctxt->disableSAX = 1;
1.170 daniel 5393: if (attrName != NULL)
5394: xmlFree(attrName);
5395: if (defaultValue != NULL)
5396: xmlFree(defaultValue);
5397: if (tree != NULL)
5398: xmlFreeEnumeration(tree);
1.59 daniel 5399: break;
5400: }
5401: SKIP_BLANKS;
5402: }
1.40 daniel 5403: if (check == CUR_PTR) {
1.55 daniel 5404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5405: ctxt->sax->error(ctxt->userData,
1.59 daniel 5406: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5407: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5408: if (attrName != NULL)
5409: xmlFree(attrName);
5410: if (defaultValue != NULL)
5411: xmlFree(defaultValue);
5412: if (tree != NULL)
5413: xmlFreeEnumeration(tree);
1.22 daniel 5414: break;
5415: }
1.171 daniel 5416: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5417: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5418: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5419: type, def, defaultValue, tree);
1.59 daniel 5420: if (attrName != NULL)
1.119 daniel 5421: xmlFree(attrName);
1.59 daniel 5422: if (defaultValue != NULL)
1.119 daniel 5423: xmlFree(defaultValue);
1.97 daniel 5424: GROW;
1.22 daniel 5425: }
1.187 daniel 5426: if (RAW == '>') {
5427: if (input != ctxt->input) {
5428: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5429: ctxt->sax->error(ctxt->userData,
5430: "Attribute list declaration doesn't start and stop in the same entity\n");
5431: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5432: ctxt->wellFormed = 0;
5433: ctxt->disableSAX = 1;
5434: }
1.40 daniel 5435: NEXT;
1.187 daniel 5436: }
1.22 daniel 5437:
1.119 daniel 5438: xmlFree(elemName);
1.22 daniel 5439: }
5440: }
5441:
1.50 daniel 5442: /**
1.61 daniel 5443: * xmlParseElementMixedContentDecl:
5444: * @ctxt: an XML parser context
5445: *
5446: * parse the declaration for a Mixed Element content
5447: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5448: *
5449: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5450: * '(' S? '#PCDATA' S? ')'
5451: *
1.99 daniel 5452: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5453: *
5454: * [ VC: No Duplicate Types ]
1.117 daniel 5455: * The same name must not appear more than once in a single
5456: * mixed-content declaration.
1.99 daniel 5457: *
1.61 daniel 5458: * returns: the list of the xmlElementContentPtr describing the element choices
5459: */
5460: xmlElementContentPtr
1.62 daniel 5461: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5462: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5463: xmlChar *elem = NULL;
1.61 daniel 5464:
1.97 daniel 5465: GROW;
1.152 daniel 5466: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5467: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5468: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5469: (NXT(6) == 'A')) {
5470: SKIP(7);
5471: SKIP_BLANKS;
1.91 daniel 5472: SHRINK;
1.152 daniel 5473: if (RAW == ')') {
1.187 daniel 5474: ctxt->entity = ctxt->input;
1.63 daniel 5475: NEXT;
5476: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5477: if (RAW == '*') {
1.136 daniel 5478: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5479: NEXT;
5480: }
1.63 daniel 5481: return(ret);
5482: }
1.152 daniel 5483: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5484: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5485: if (ret == NULL) return(NULL);
1.99 daniel 5486: }
1.152 daniel 5487: while (RAW == '|') {
1.64 daniel 5488: NEXT;
1.61 daniel 5489: if (elem == NULL) {
5490: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5491: if (ret == NULL) return(NULL);
5492: ret->c1 = cur;
1.64 daniel 5493: cur = ret;
1.61 daniel 5494: } else {
1.64 daniel 5495: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5496: if (n == NULL) return(NULL);
5497: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5498: cur->c2 = n;
5499: cur = n;
1.119 daniel 5500: xmlFree(elem);
1.61 daniel 5501: }
5502: SKIP_BLANKS;
5503: elem = xmlParseName(ctxt);
5504: if (elem == NULL) {
5505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5506: ctxt->sax->error(ctxt->userData,
1.61 daniel 5507: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5508: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5509: ctxt->wellFormed = 0;
1.180 daniel 5510: ctxt->disableSAX = 1;
1.61 daniel 5511: xmlFreeElementContent(cur);
5512: return(NULL);
5513: }
5514: SKIP_BLANKS;
1.97 daniel 5515: GROW;
1.61 daniel 5516: }
1.152 daniel 5517: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5518: if (elem != NULL) {
1.61 daniel 5519: cur->c2 = xmlNewElementContent(elem,
5520: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5521: xmlFree(elem);
1.66 daniel 5522: }
1.65 daniel 5523: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5524: ctxt->entity = ctxt->input;
1.64 daniel 5525: SKIP(2);
1.61 daniel 5526: } else {
1.119 daniel 5527: if (elem != NULL) xmlFree(elem);
1.61 daniel 5528: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5529: ctxt->sax->error(ctxt->userData,
1.63 daniel 5530: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5531: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5532: ctxt->wellFormed = 0;
1.180 daniel 5533: ctxt->disableSAX = 1;
1.61 daniel 5534: xmlFreeElementContent(ret);
5535: return(NULL);
5536: }
5537:
5538: } else {
5539: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5540: ctxt->sax->error(ctxt->userData,
1.61 daniel 5541: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5542: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5543: ctxt->wellFormed = 0;
1.180 daniel 5544: ctxt->disableSAX = 1;
1.61 daniel 5545: }
5546: return(ret);
5547: }
5548:
5549: /**
5550: * xmlParseElementChildrenContentDecl:
1.50 daniel 5551: * @ctxt: an XML parser context
5552: *
1.61 daniel 5553: * parse the declaration for a Mixed Element content
5554: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5555: *
1.61 daniel 5556: *
1.22 daniel 5557: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5558: *
5559: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5560: *
5561: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5562: *
5563: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5564: *
1.99 daniel 5565: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5566: * TODO Parameter-entity replacement text must be properly nested
5567: * with parenthetized groups. That is to say, if either of the
5568: * opening or closing parentheses in a choice, seq, or Mixed
5569: * construct is contained in the replacement text for a parameter
5570: * entity, both must be contained in the same replacement text. For
5571: * interoperability, if a parameter-entity reference appears in a
5572: * choice, seq, or Mixed construct, its replacement text should not
5573: * be empty, and neither the first nor last non-blank character of
5574: * the replacement text should be a connector (| or ,).
5575: *
1.62 daniel 5576: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5577: * hierarchy.
5578: */
5579: xmlElementContentPtr
1.62 daniel 5580: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5581: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5582: xmlChar *elem;
5583: xmlChar type = 0;
1.62 daniel 5584:
5585: SKIP_BLANKS;
1.94 daniel 5586: GROW;
1.152 daniel 5587: if (RAW == '(') {
1.63 daniel 5588: /* Recurse on first child */
1.62 daniel 5589: NEXT;
5590: SKIP_BLANKS;
5591: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5592: SKIP_BLANKS;
1.101 daniel 5593: GROW;
1.62 daniel 5594: } else {
5595: elem = xmlParseName(ctxt);
5596: if (elem == NULL) {
5597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5598: ctxt->sax->error(ctxt->userData,
1.62 daniel 5599: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5600: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5601: ctxt->wellFormed = 0;
1.180 daniel 5602: ctxt->disableSAX = 1;
1.62 daniel 5603: return(NULL);
5604: }
5605: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5606: GROW;
1.152 daniel 5607: if (RAW == '?') {
1.104 daniel 5608: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5609: NEXT;
1.152 daniel 5610: } else if (RAW == '*') {
1.104 daniel 5611: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5612: NEXT;
1.152 daniel 5613: } else if (RAW == '+') {
1.104 daniel 5614: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5615: NEXT;
5616: } else {
1.104 daniel 5617: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5618: }
1.119 daniel 5619: xmlFree(elem);
1.101 daniel 5620: GROW;
1.62 daniel 5621: }
5622: SKIP_BLANKS;
1.91 daniel 5623: SHRINK;
1.152 daniel 5624: while (RAW != ')') {
1.63 daniel 5625: /*
5626: * Each loop we parse one separator and one element.
5627: */
1.152 daniel 5628: if (RAW == ',') {
1.62 daniel 5629: if (type == 0) type = CUR;
5630:
5631: /*
5632: * Detect "Name | Name , Name" error
5633: */
5634: else if (type != CUR) {
5635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5636: ctxt->sax->error(ctxt->userData,
1.62 daniel 5637: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5638: type);
1.123 daniel 5639: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5640: ctxt->wellFormed = 0;
1.180 daniel 5641: ctxt->disableSAX = 1;
1.170 daniel 5642: if ((op != NULL) && (op != ret))
5643: xmlFreeElementContent(op);
5644: if ((last != NULL) && (last != ret))
5645: xmlFreeElementContent(last);
5646: if (ret != NULL)
5647: xmlFreeElementContent(ret);
1.62 daniel 5648: return(NULL);
5649: }
1.64 daniel 5650: NEXT;
1.62 daniel 5651:
1.63 daniel 5652: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5653: if (op == NULL) {
5654: xmlFreeElementContent(ret);
5655: return(NULL);
5656: }
5657: if (last == NULL) {
5658: op->c1 = ret;
1.65 daniel 5659: ret = cur = op;
1.63 daniel 5660: } else {
5661: cur->c2 = op;
5662: op->c1 = last;
5663: cur =op;
1.65 daniel 5664: last = NULL;
1.63 daniel 5665: }
1.152 daniel 5666: } else if (RAW == '|') {
1.62 daniel 5667: if (type == 0) type = CUR;
5668:
5669: /*
1.63 daniel 5670: * Detect "Name , Name | Name" error
1.62 daniel 5671: */
5672: else if (type != CUR) {
5673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5674: ctxt->sax->error(ctxt->userData,
1.62 daniel 5675: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5676: type);
1.123 daniel 5677: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5678: ctxt->wellFormed = 0;
1.180 daniel 5679: ctxt->disableSAX = 1;
1.170 daniel 5680: if ((op != NULL) && (op != ret))
5681: xmlFreeElementContent(op);
5682: if ((last != NULL) && (last != ret))
5683: xmlFreeElementContent(last);
5684: if (ret != NULL)
5685: xmlFreeElementContent(ret);
1.62 daniel 5686: return(NULL);
5687: }
1.64 daniel 5688: NEXT;
1.62 daniel 5689:
1.63 daniel 5690: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5691: if (op == NULL) {
1.170 daniel 5692: if ((op != NULL) && (op != ret))
5693: xmlFreeElementContent(op);
5694: if ((last != NULL) && (last != ret))
5695: xmlFreeElementContent(last);
5696: if (ret != NULL)
5697: xmlFreeElementContent(ret);
1.63 daniel 5698: return(NULL);
5699: }
5700: if (last == NULL) {
5701: op->c1 = ret;
1.65 daniel 5702: ret = cur = op;
1.63 daniel 5703: } else {
5704: cur->c2 = op;
5705: op->c1 = last;
5706: cur =op;
1.65 daniel 5707: last = NULL;
1.63 daniel 5708: }
1.62 daniel 5709: } else {
5710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5711: ctxt->sax->error(ctxt->userData,
1.62 daniel 5712: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5713: ctxt->wellFormed = 0;
1.180 daniel 5714: ctxt->disableSAX = 1;
1.123 daniel 5715: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5716: if ((op != NULL) && (op != ret))
5717: xmlFreeElementContent(op);
5718: if ((last != NULL) && (last != ret))
5719: xmlFreeElementContent(last);
5720: if (ret != NULL)
5721: xmlFreeElementContent(ret);
1.62 daniel 5722: return(NULL);
5723: }
1.101 daniel 5724: GROW;
1.62 daniel 5725: SKIP_BLANKS;
1.101 daniel 5726: GROW;
1.152 daniel 5727: if (RAW == '(') {
1.63 daniel 5728: /* Recurse on second child */
1.62 daniel 5729: NEXT;
5730: SKIP_BLANKS;
1.65 daniel 5731: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5732: SKIP_BLANKS;
5733: } else {
5734: elem = xmlParseName(ctxt);
5735: if (elem == NULL) {
5736: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5737: ctxt->sax->error(ctxt->userData,
1.122 daniel 5738: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5739: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5740: ctxt->wellFormed = 0;
1.180 daniel 5741: ctxt->disableSAX = 1;
1.170 daniel 5742: if ((op != NULL) && (op != ret))
5743: xmlFreeElementContent(op);
5744: if ((last != NULL) && (last != ret))
5745: xmlFreeElementContent(last);
5746: if (ret != NULL)
5747: xmlFreeElementContent(ret);
1.62 daniel 5748: return(NULL);
5749: }
1.65 daniel 5750: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5751: xmlFree(elem);
1.152 daniel 5752: if (RAW == '?') {
1.105 daniel 5753: last->ocur = XML_ELEMENT_CONTENT_OPT;
5754: NEXT;
1.152 daniel 5755: } else if (RAW == '*') {
1.105 daniel 5756: last->ocur = XML_ELEMENT_CONTENT_MULT;
5757: NEXT;
1.152 daniel 5758: } else if (RAW == '+') {
1.105 daniel 5759: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5760: NEXT;
5761: } else {
5762: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5763: }
1.63 daniel 5764: }
5765: SKIP_BLANKS;
1.97 daniel 5766: GROW;
1.64 daniel 5767: }
1.65 daniel 5768: if ((cur != NULL) && (last != NULL)) {
5769: cur->c2 = last;
1.62 daniel 5770: }
1.187 daniel 5771: ctxt->entity = ctxt->input;
1.62 daniel 5772: NEXT;
1.152 daniel 5773: if (RAW == '?') {
1.62 daniel 5774: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5775: NEXT;
1.152 daniel 5776: } else if (RAW == '*') {
1.62 daniel 5777: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5778: NEXT;
1.152 daniel 5779: } else if (RAW == '+') {
1.62 daniel 5780: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5781: NEXT;
5782: }
5783: return(ret);
1.61 daniel 5784: }
5785:
5786: /**
5787: * xmlParseElementContentDecl:
5788: * @ctxt: an XML parser context
5789: * @name: the name of the element being defined.
5790: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5791: *
1.61 daniel 5792: * parse the declaration for an Element content either Mixed or Children,
5793: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5794: *
5795: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5796: *
1.61 daniel 5797: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5798: */
5799:
1.61 daniel 5800: int
1.123 daniel 5801: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5802: xmlElementContentPtr *result) {
5803:
5804: xmlElementContentPtr tree = NULL;
1.187 daniel 5805: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5806: int res;
5807:
5808: *result = NULL;
5809:
1.152 daniel 5810: if (RAW != '(') {
1.61 daniel 5811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5812: ctxt->sax->error(ctxt->userData,
1.61 daniel 5813: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5814: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5815: ctxt->wellFormed = 0;
1.180 daniel 5816: ctxt->disableSAX = 1;
1.61 daniel 5817: return(-1);
5818: }
5819: NEXT;
1.97 daniel 5820: GROW;
1.61 daniel 5821: SKIP_BLANKS;
1.152 daniel 5822: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5823: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5824: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5825: (NXT(6) == 'A')) {
1.62 daniel 5826: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5827: res = XML_ELEMENT_TYPE_MIXED;
5828: } else {
1.62 daniel 5829: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5830: res = XML_ELEMENT_TYPE_ELEMENT;
5831: }
1.187 daniel 5832: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5833: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5834: ctxt->sax->error(ctxt->userData,
5835: "Element content declaration doesn't start and stop in the same entity\n");
5836: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5837: ctxt->wellFormed = 0;
5838: ctxt->disableSAX = 1;
5839: }
1.61 daniel 5840: SKIP_BLANKS;
1.63 daniel 5841: /****************************
1.152 daniel 5842: if (RAW != ')') {
1.61 daniel 5843: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5844: ctxt->sax->error(ctxt->userData,
1.61 daniel 5845: "xmlParseElementContentDecl : ')' expected\n");
5846: ctxt->wellFormed = 0;
1.180 daniel 5847: ctxt->disableSAX = 1;
1.61 daniel 5848: return(-1);
5849: }
1.63 daniel 5850: ****************************/
5851: *result = tree;
1.61 daniel 5852: return(res);
1.22 daniel 5853: }
5854:
1.50 daniel 5855: /**
5856: * xmlParseElementDecl:
5857: * @ctxt: an XML parser context
5858: *
5859: * parse an Element declaration.
1.22 daniel 5860: *
5861: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5862: *
1.99 daniel 5863: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5864: * No element type may be declared more than once
1.69 daniel 5865: *
5866: * Returns the type of the element, or -1 in case of error
1.22 daniel 5867: */
1.59 daniel 5868: int
1.55 daniel 5869: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5870: xmlChar *name;
1.59 daniel 5871: int ret = -1;
1.61 daniel 5872: xmlElementContentPtr content = NULL;
1.22 daniel 5873:
1.97 daniel 5874: GROW;
1.152 daniel 5875: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5876: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5877: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5878: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5879: (NXT(8) == 'T')) {
1.187 daniel 5880: xmlParserInputPtr input = ctxt->input;
5881:
1.40 daniel 5882: SKIP(9);
1.59 daniel 5883: if (!IS_BLANK(CUR)) {
5884: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5885: ctxt->sax->error(ctxt->userData,
1.59 daniel 5886: "Space required after 'ELEMENT'\n");
1.123 daniel 5887: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5888: ctxt->wellFormed = 0;
1.180 daniel 5889: ctxt->disableSAX = 1;
1.59 daniel 5890: }
1.42 daniel 5891: SKIP_BLANKS;
1.22 daniel 5892: name = xmlParseName(ctxt);
5893: if (name == NULL) {
1.55 daniel 5894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5895: ctxt->sax->error(ctxt->userData,
1.59 daniel 5896: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5897: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5898: ctxt->wellFormed = 0;
1.180 daniel 5899: ctxt->disableSAX = 1;
1.59 daniel 5900: return(-1);
5901: }
5902: if (!IS_BLANK(CUR)) {
5903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5904: ctxt->sax->error(ctxt->userData,
1.59 daniel 5905: "Space required after the element name\n");
1.123 daniel 5906: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5907: ctxt->wellFormed = 0;
1.180 daniel 5908: ctxt->disableSAX = 1;
1.22 daniel 5909: }
1.42 daniel 5910: SKIP_BLANKS;
1.152 daniel 5911: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5912: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5913: (NXT(4) == 'Y')) {
5914: SKIP(5);
1.22 daniel 5915: /*
5916: * Element must always be empty.
5917: */
1.59 daniel 5918: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5919: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5920: (NXT(2) == 'Y')) {
5921: SKIP(3);
1.22 daniel 5922: /*
5923: * Element is a generic container.
5924: */
1.59 daniel 5925: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5926: } else if (RAW == '(') {
1.61 daniel 5927: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5928: } else {
1.98 daniel 5929: /*
5930: * [ WFC: PEs in Internal Subset ] error handling.
5931: */
1.152 daniel 5932: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5933: (ctxt->inputNr == 1)) {
5934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5935: ctxt->sax->error(ctxt->userData,
5936: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5937: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5938: } else {
5939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5940: ctxt->sax->error(ctxt->userData,
5941: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5942: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5943: }
1.61 daniel 5944: ctxt->wellFormed = 0;
1.180 daniel 5945: ctxt->disableSAX = 1;
1.119 daniel 5946: if (name != NULL) xmlFree(name);
1.61 daniel 5947: return(-1);
1.22 daniel 5948: }
1.142 daniel 5949:
5950: SKIP_BLANKS;
5951: /*
5952: * Pop-up of finished entities.
5953: */
1.152 daniel 5954: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5955: xmlPopInput(ctxt);
1.42 daniel 5956: SKIP_BLANKS;
1.142 daniel 5957:
1.152 daniel 5958: if (RAW != '>') {
1.55 daniel 5959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5960: ctxt->sax->error(ctxt->userData,
1.31 daniel 5961: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5962: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5963: ctxt->wellFormed = 0;
1.180 daniel 5964: ctxt->disableSAX = 1;
1.61 daniel 5965: } else {
1.187 daniel 5966: if (input != ctxt->input) {
5967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5968: ctxt->sax->error(ctxt->userData,
5969: "Element declaration doesn't start and stop in the same entity\n");
5970: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5971: ctxt->wellFormed = 0;
5972: ctxt->disableSAX = 1;
5973: }
5974:
1.40 daniel 5975: NEXT;
1.171 daniel 5976: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5977: (ctxt->sax->elementDecl != NULL))
1.76 daniel 5978: ctxt->sax->elementDecl(ctxt->userData, name, ret,
5979: content);
1.61 daniel 5980: }
1.84 daniel 5981: if (content != NULL) {
5982: xmlFreeElementContent(content);
5983: }
1.61 daniel 5984: if (name != NULL) {
1.119 daniel 5985: xmlFree(name);
1.61 daniel 5986: }
1.22 daniel 5987: }
1.59 daniel 5988: return(ret);
1.22 daniel 5989: }
5990:
1.50 daniel 5991: /**
5992: * xmlParseMarkupDecl:
5993: * @ctxt: an XML parser context
5994: *
5995: * parse Markup declarations
1.22 daniel 5996: *
5997: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5998: * NotationDecl | PI | Comment
5999: *
1.98 daniel 6000: * [ VC: Proper Declaration/PE Nesting ]
6001: * TODO Parameter-entity replacement text must be properly nested with
6002: * markup declarations. That is to say, if either the first character
6003: * or the last character of a markup declaration (markupdecl above) is
6004: * contained in the replacement text for a parameter-entity reference,
6005: * both must be contained in the same replacement text.
6006: *
6007: * [ WFC: PEs in Internal Subset ]
6008: * In the internal DTD subset, parameter-entity references can occur
6009: * only where markup declarations can occur, not within markup declarations.
6010: * (This does not apply to references that occur in external parameter
6011: * entities or to the external subset.)
1.22 daniel 6012: */
1.55 daniel 6013: void
6014: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 6015: GROW;
1.22 daniel 6016: xmlParseElementDecl(ctxt);
6017: xmlParseAttributeListDecl(ctxt);
6018: xmlParseEntityDecl(ctxt);
6019: xmlParseNotationDecl(ctxt);
6020: xmlParsePI(ctxt);
1.114 daniel 6021: xmlParseComment(ctxt);
1.98 daniel 6022: /*
6023: * This is only for internal subset. On external entities,
6024: * the replacement is done before parsing stage
6025: */
6026: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6027: xmlParsePEReference(ctxt);
1.97 daniel 6028: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 6029: }
6030:
1.50 daniel 6031: /**
1.76 daniel 6032: * xmlParseTextDecl:
6033: * @ctxt: an XML parser context
6034: *
6035: * parse an XML declaration header for external entities
6036: *
6037: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 6038: *
6039: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6040: */
6041:
1.172 daniel 6042: void
1.76 daniel 6043: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6044: xmlChar *version;
1.76 daniel 6045:
6046: /*
6047: * We know that '<?xml' is here.
6048: */
1.193 daniel 6049: if ((RAW == '<') && (NXT(1) == '?') &&
6050: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6051: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6052: SKIP(5);
6053: } else {
6054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6055: ctxt->sax->error(ctxt->userData,
6056: "Text declaration '<?xml' required\n");
6057: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6058: ctxt->wellFormed = 0;
6059: ctxt->disableSAX = 1;
6060:
6061: return;
6062: }
1.76 daniel 6063:
6064: if (!IS_BLANK(CUR)) {
6065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6066: ctxt->sax->error(ctxt->userData,
6067: "Space needed after '<?xml'\n");
1.123 daniel 6068: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6069: ctxt->wellFormed = 0;
1.180 daniel 6070: ctxt->disableSAX = 1;
1.76 daniel 6071: }
6072: SKIP_BLANKS;
6073:
6074: /*
6075: * We may have the VersionInfo here.
6076: */
6077: version = xmlParseVersionInfo(ctxt);
6078: if (version == NULL)
6079: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6080: ctxt->input->version = version;
1.76 daniel 6081:
6082: /*
6083: * We must have the encoding declaration
6084: */
6085: if (!IS_BLANK(CUR)) {
6086: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6087: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6088: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6089: ctxt->wellFormed = 0;
1.180 daniel 6090: ctxt->disableSAX = 1;
1.76 daniel 6091: }
1.195 daniel 6092: xmlParseEncodingDecl(ctxt);
1.193 daniel 6093: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6094: /*
6095: * The XML REC instructs us to stop parsing right here
6096: */
6097: return;
6098: }
1.76 daniel 6099:
6100: SKIP_BLANKS;
1.152 daniel 6101: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6102: SKIP(2);
1.152 daniel 6103: } else if (RAW == '>') {
1.76 daniel 6104: /* Deprecated old WD ... */
6105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6106: ctxt->sax->error(ctxt->userData,
6107: "XML declaration must end-up with '?>'\n");
1.123 daniel 6108: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6109: ctxt->wellFormed = 0;
1.180 daniel 6110: ctxt->disableSAX = 1;
1.76 daniel 6111: NEXT;
6112: } else {
6113: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6114: ctxt->sax->error(ctxt->userData,
6115: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6116: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6117: ctxt->wellFormed = 0;
1.180 daniel 6118: ctxt->disableSAX = 1;
1.76 daniel 6119: MOVETO_ENDTAG(CUR_PTR);
6120: NEXT;
6121: }
6122: }
6123:
6124: /*
6125: * xmlParseConditionalSections
6126: * @ctxt: an XML parser context
6127: *
6128: * TODO : Conditionnal section are not yet supported !
6129: *
6130: * [61] conditionalSect ::= includeSect | ignoreSect
6131: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6132: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6133: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6134: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6135: */
6136:
6137: void
6138: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6139: SKIP(3);
6140: SKIP_BLANKS;
1.168 daniel 6141: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6142: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6143: (NXT(6) == 'E')) {
1.165 daniel 6144: SKIP(7);
1.168 daniel 6145: SKIP_BLANKS;
6146: if (RAW != '[') {
6147: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6148: ctxt->sax->error(ctxt->userData,
6149: "XML conditional section '[' expected\n");
6150: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6151: ctxt->wellFormed = 0;
1.180 daniel 6152: ctxt->disableSAX = 1;
1.168 daniel 6153: } else {
6154: NEXT;
6155: }
1.165 daniel 6156: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6157: (NXT(2) != '>'))) {
6158: const xmlChar *check = CUR_PTR;
6159: int cons = ctxt->input->consumed;
6160: int tok = ctxt->token;
6161:
6162: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6163: xmlParseConditionalSections(ctxt);
6164: } else if (IS_BLANK(CUR)) {
6165: NEXT;
6166: } else if (RAW == '%') {
6167: xmlParsePEReference(ctxt);
6168: } else
6169: xmlParseMarkupDecl(ctxt);
6170:
6171: /*
6172: * Pop-up of finished entities.
6173: */
6174: while ((RAW == 0) && (ctxt->inputNr > 1))
6175: xmlPopInput(ctxt);
6176:
6177: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6178: (tok == ctxt->token)) {
6179: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6180: ctxt->sax->error(ctxt->userData,
6181: "Content error in the external subset\n");
6182: ctxt->wellFormed = 0;
1.180 daniel 6183: ctxt->disableSAX = 1;
1.165 daniel 6184: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6185: break;
6186: }
6187: }
1.168 daniel 6188: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6189: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6190: int state;
6191:
1.168 daniel 6192: SKIP(6);
6193: SKIP_BLANKS;
6194: if (RAW != '[') {
6195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6196: ctxt->sax->error(ctxt->userData,
6197: "XML conditional section '[' expected\n");
6198: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6199: ctxt->wellFormed = 0;
1.180 daniel 6200: ctxt->disableSAX = 1;
1.168 daniel 6201: } else {
6202: NEXT;
6203: }
1.171 daniel 6204:
1.143 daniel 6205: /*
1.171 daniel 6206: * Parse up to the end of the conditionnal section
6207: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6208: */
1.171 daniel 6209: state = ctxt->disableSAX;
1.165 daniel 6210: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6211: (NXT(2) != '>'))) {
1.171 daniel 6212: const xmlChar *check = CUR_PTR;
6213: int cons = ctxt->input->consumed;
6214: int tok = ctxt->token;
6215:
6216: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6217: xmlParseConditionalSections(ctxt);
6218: } else if (IS_BLANK(CUR)) {
6219: NEXT;
6220: } else if (RAW == '%') {
6221: xmlParsePEReference(ctxt);
6222: } else
6223: xmlParseMarkupDecl(ctxt);
6224:
1.165 daniel 6225: /*
6226: * Pop-up of finished entities.
6227: */
6228: while ((RAW == 0) && (ctxt->inputNr > 1))
6229: xmlPopInput(ctxt);
1.143 daniel 6230:
1.171 daniel 6231: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6232: (tok == ctxt->token)) {
6233: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6234: ctxt->sax->error(ctxt->userData,
6235: "Content error in the external subset\n");
6236: ctxt->wellFormed = 0;
1.180 daniel 6237: ctxt->disableSAX = 1;
1.171 daniel 6238: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6239: break;
6240: }
1.165 daniel 6241: }
1.171 daniel 6242: ctxt->disableSAX = state;
1.168 daniel 6243: } else {
6244: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6245: ctxt->sax->error(ctxt->userData,
6246: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6247: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6248: ctxt->wellFormed = 0;
1.180 daniel 6249: ctxt->disableSAX = 1;
1.143 daniel 6250: }
6251:
1.152 daniel 6252: if (RAW == 0)
1.143 daniel 6253: SHRINK;
6254:
1.152 daniel 6255: if (RAW == 0) {
1.76 daniel 6256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6257: ctxt->sax->error(ctxt->userData,
6258: "XML conditional section not closed\n");
1.123 daniel 6259: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6260: ctxt->wellFormed = 0;
1.180 daniel 6261: ctxt->disableSAX = 1;
1.143 daniel 6262: } else {
6263: SKIP(3);
1.76 daniel 6264: }
6265: }
6266:
6267: /**
1.124 daniel 6268: * xmlParseExternalSubset:
1.76 daniel 6269: * @ctxt: an XML parser context
1.124 daniel 6270: * @ExternalID: the external identifier
6271: * @SystemID: the system identifier (or URL)
1.76 daniel 6272: *
6273: * parse Markup declarations from an external subset
6274: *
6275: * [30] extSubset ::= textDecl? extSubsetDecl
6276: *
6277: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6278: */
6279: void
1.123 daniel 6280: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6281: const xmlChar *SystemID) {
1.132 daniel 6282: GROW;
1.152 daniel 6283: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6284: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6285: (NXT(4) == 'l')) {
1.172 daniel 6286: xmlParseTextDecl(ctxt);
1.193 daniel 6287: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6288: /*
6289: * The XML REC instructs us to stop parsing right here
6290: */
6291: ctxt->instate = XML_PARSER_EOF;
6292: return;
6293: }
1.76 daniel 6294: }
1.79 daniel 6295: if (ctxt->myDoc == NULL) {
1.116 daniel 6296: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6297: }
6298: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6299: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6300:
1.96 daniel 6301: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6302: ctxt->external = 1;
1.152 daniel 6303: while (((RAW == '<') && (NXT(1) == '?')) ||
6304: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6305: IS_BLANK(CUR)) {
1.123 daniel 6306: const xmlChar *check = CUR_PTR;
1.115 daniel 6307: int cons = ctxt->input->consumed;
1.164 daniel 6308: int tok = ctxt->token;
1.115 daniel 6309:
1.152 daniel 6310: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6311: xmlParseConditionalSections(ctxt);
6312: } else if (IS_BLANK(CUR)) {
6313: NEXT;
1.152 daniel 6314: } else if (RAW == '%') {
1.76 daniel 6315: xmlParsePEReference(ctxt);
6316: } else
6317: xmlParseMarkupDecl(ctxt);
1.77 daniel 6318:
6319: /*
6320: * Pop-up of finished entities.
6321: */
1.166 daniel 6322: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6323: xmlPopInput(ctxt);
6324:
1.164 daniel 6325: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6326: (tok == ctxt->token)) {
1.115 daniel 6327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6328: ctxt->sax->error(ctxt->userData,
6329: "Content error in the external subset\n");
6330: ctxt->wellFormed = 0;
1.180 daniel 6331: ctxt->disableSAX = 1;
1.123 daniel 6332: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6333: break;
6334: }
1.76 daniel 6335: }
6336:
1.152 daniel 6337: if (RAW != 0) {
1.76 daniel 6338: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6339: ctxt->sax->error(ctxt->userData,
6340: "Extra content at the end of the document\n");
1.123 daniel 6341: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6342: ctxt->wellFormed = 0;
1.180 daniel 6343: ctxt->disableSAX = 1;
1.76 daniel 6344: }
6345:
6346: }
6347:
6348: /**
1.77 daniel 6349: * xmlParseReference:
6350: * @ctxt: an XML parser context
6351: *
6352: * parse and handle entity references in content, depending on the SAX
6353: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6354: * CharRef, a predefined entity, if there is no reference() callback.
6355: * or if the parser was asked to switch to that mode.
1.77 daniel 6356: *
6357: * [67] Reference ::= EntityRef | CharRef
6358: */
6359: void
6360: xmlParseReference(xmlParserCtxtPtr ctxt) {
6361: xmlEntityPtr ent;
1.123 daniel 6362: xmlChar *val;
1.152 daniel 6363: if (RAW != '&') return;
1.77 daniel 6364:
1.113 daniel 6365: if (ctxt->inputNr > 1) {
1.123 daniel 6366: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6367:
1.171 daniel 6368: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6369: (!ctxt->disableSAX))
1.113 daniel 6370: ctxt->sax->characters(ctxt->userData, cur, 1);
6371: if (ctxt->token == '&')
6372: ctxt->token = 0;
6373: else {
6374: SKIP(1);
6375: }
6376: return;
6377: }
1.77 daniel 6378: if (NXT(1) == '#') {
1.152 daniel 6379: int i = 0;
1.153 daniel 6380: xmlChar out[10];
6381: int hex = NXT(2);
1.77 daniel 6382: int val = xmlParseCharRef(ctxt);
1.152 daniel 6383:
1.198 daniel 6384: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 6385: /*
6386: * So we are using non-UTF-8 buffers
6387: * Check that the char fit on 8bits, if not
6388: * generate a CharRef.
6389: */
6390: if (val <= 0xFF) {
6391: out[0] = val;
6392: out[1] = 0;
1.171 daniel 6393: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6394: (!ctxt->disableSAX))
1.153 daniel 6395: ctxt->sax->characters(ctxt->userData, out, 1);
6396: } else {
6397: if ((hex == 'x') || (hex == 'X'))
6398: sprintf((char *)out, "#x%X", val);
6399: else
6400: sprintf((char *)out, "#%d", val);
1.171 daniel 6401: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6402: (!ctxt->disableSAX))
1.153 daniel 6403: ctxt->sax->reference(ctxt->userData, out);
6404: }
6405: } else {
6406: /*
6407: * Just encode the value in UTF-8
6408: */
6409: COPY_BUF(0 ,out, i, val);
6410: out[i] = 0;
1.171 daniel 6411: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6412: (!ctxt->disableSAX))
1.153 daniel 6413: ctxt->sax->characters(ctxt->userData, out, i);
6414: }
1.77 daniel 6415: } else {
6416: ent = xmlParseEntityRef(ctxt);
6417: if (ent == NULL) return;
6418: if ((ent->name != NULL) &&
1.159 daniel 6419: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6420: xmlNodePtr list = NULL;
6421: int ret;
6422:
6423:
6424: /*
6425: * The first reference to the entity trigger a parsing phase
6426: * where the ent->children is filled with the result from
6427: * the parsing.
6428: */
6429: if (ent->children == NULL) {
6430: xmlChar *value;
6431: value = ent->content;
6432:
6433: /*
6434: * Check that this entity is well formed
6435: */
6436: if ((value != NULL) &&
6437: (value[1] == 0) && (value[0] == '<') &&
6438: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6439: /*
6440: * TODO: get definite answer on this !!!
6441: * Lots of entity decls are used to declare a single
6442: * char
6443: * <!ENTITY lt "<">
6444: * Which seems to be valid since
6445: * 2.4: The ampersand character (&) and the left angle
6446: * bracket (<) may appear in their literal form only
6447: * when used ... They are also legal within the literal
6448: * entity value of an internal entity declaration;i
6449: * see "4.3.2 Well-Formed Parsed Entities".
6450: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6451: * Looking at the OASIS test suite and James Clark
6452: * tests, this is broken. However the XML REC uses
6453: * it. Is the XML REC not well-formed ????
6454: * This is a hack to avoid this problem
6455: */
6456: list = xmlNewDocText(ctxt->myDoc, value);
6457: if (list != NULL) {
6458: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6459: (ent->children == NULL)) {
6460: ent->children = list;
6461: ent->last = list;
6462: list->parent = (xmlNodePtr) ent;
6463: } else {
6464: xmlFreeNodeList(list);
6465: }
6466: } else if (list != NULL) {
6467: xmlFreeNodeList(list);
6468: }
1.181 daniel 6469: } else {
1.180 daniel 6470: /*
6471: * 4.3.2: An internal general parsed entity is well-formed
6472: * if its replacement text matches the production labeled
6473: * content.
6474: */
1.185 daniel 6475: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6476: ctxt->depth++;
1.180 daniel 6477: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6478: ctxt->sax, NULL, ctxt->depth,
6479: value, &list);
6480: ctxt->depth--;
6481: } else if (ent->etype ==
6482: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6483: ctxt->depth++;
1.180 daniel 6484: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6485: ctxt->sax, NULL, ctxt->depth,
6486: ent->SystemID, ent->ExternalID, &list);
6487: ctxt->depth--;
6488: } else {
1.180 daniel 6489: ret = -1;
6490: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6491: ctxt->sax->error(ctxt->userData,
6492: "Internal: invalid entity type\n");
6493: }
1.185 daniel 6494: if (ret == XML_ERR_ENTITY_LOOP) {
6495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6496: ctxt->sax->error(ctxt->userData,
6497: "Detected entity reference loop\n");
6498: ctxt->wellFormed = 0;
6499: ctxt->disableSAX = 1;
6500: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6501: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6502: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6503: (ent->children == NULL)) {
6504: ent->children = list;
6505: while (list != NULL) {
6506: list->parent = (xmlNodePtr) ent;
6507: if (list->next == NULL)
6508: ent->last = list;
6509: list = list->next;
6510: }
6511: } else {
6512: xmlFreeNodeList(list);
6513: }
6514: } else if (ret > 0) {
6515: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6516: ctxt->sax->error(ctxt->userData,
6517: "Entity value required\n");
6518: ctxt->errNo = ret;
6519: ctxt->wellFormed = 0;
6520: ctxt->disableSAX = 1;
6521: } else if (list != NULL) {
6522: xmlFreeNodeList(list);
6523: }
6524: }
6525: }
1.113 daniel 6526: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6527: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6528: /*
6529: * Create a node.
6530: */
6531: ctxt->sax->reference(ctxt->userData, ent->name);
6532: return;
6533: } else if (ctxt->replaceEntities) {
6534: xmlParserInputPtr input;
1.79 daniel 6535:
1.113 daniel 6536: input = xmlNewEntityInputStream(ctxt, ent);
6537: xmlPushInput(ctxt, input);
1.167 daniel 6538: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6539: (RAW == '<') && (NXT(1) == '?') &&
6540: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6541: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6542: xmlParseTextDecl(ctxt);
1.193 daniel 6543: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6544: /*
6545: * The XML REC instructs us to stop parsing right here
6546: */
6547: ctxt->instate = XML_PARSER_EOF;
6548: return;
6549: }
1.199 daniel 6550: if (input->standalone == 1) {
1.167 daniel 6551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6552: ctxt->sax->error(ctxt->userData,
6553: "external parsed entities cannot be standalone\n");
6554: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6555: ctxt->wellFormed = 0;
1.180 daniel 6556: ctxt->disableSAX = 1;
1.167 daniel 6557: }
6558: }
1.179 daniel 6559: /*
6560: * !!! TODO: build the tree under the entity first
6561: * 1234
6562: */
1.113 daniel 6563: return;
6564: }
1.77 daniel 6565: }
6566: val = ent->content;
6567: if (val == NULL) return;
6568: /*
6569: * inline the entity.
6570: */
1.171 daniel 6571: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6572: (!ctxt->disableSAX))
1.77 daniel 6573: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6574: }
1.24 daniel 6575: }
6576:
1.50 daniel 6577: /**
6578: * xmlParseEntityRef:
6579: * @ctxt: an XML parser context
6580: *
6581: * parse ENTITY references declarations
1.24 daniel 6582: *
6583: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6584: *
1.98 daniel 6585: * [ WFC: Entity Declared ]
6586: * In a document without any DTD, a document with only an internal DTD
6587: * subset which contains no parameter entity references, or a document
6588: * with "standalone='yes'", the Name given in the entity reference
6589: * must match that in an entity declaration, except that well-formed
6590: * documents need not declare any of the following entities: amp, lt,
6591: * gt, apos, quot. The declaration of a parameter entity must precede
6592: * any reference to it. Similarly, the declaration of a general entity
6593: * must precede any reference to it which appears in a default value in an
6594: * attribute-list declaration. Note that if entities are declared in the
6595: * external subset or in external parameter entities, a non-validating
6596: * processor is not obligated to read and process their declarations;
6597: * for such documents, the rule that an entity must be declared is a
6598: * well-formedness constraint only if standalone='yes'.
6599: *
6600: * [ WFC: Parsed Entity ]
6601: * An entity reference must not contain the name of an unparsed entity
6602: *
1.77 daniel 6603: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6604: */
1.77 daniel 6605: xmlEntityPtr
1.55 daniel 6606: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6607: xmlChar *name;
1.72 daniel 6608: xmlEntityPtr ent = NULL;
1.24 daniel 6609:
1.91 daniel 6610: GROW;
1.111 daniel 6611:
1.152 daniel 6612: if (RAW == '&') {
1.40 daniel 6613: NEXT;
1.24 daniel 6614: name = xmlParseName(ctxt);
6615: if (name == NULL) {
1.55 daniel 6616: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6617: ctxt->sax->error(ctxt->userData,
6618: "xmlParseEntityRef: no name\n");
1.123 daniel 6619: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6620: ctxt->wellFormed = 0;
1.180 daniel 6621: ctxt->disableSAX = 1;
1.24 daniel 6622: } else {
1.152 daniel 6623: if (RAW == ';') {
1.40 daniel 6624: NEXT;
1.24 daniel 6625: /*
1.77 daniel 6626: * Ask first SAX for entity resolution, otherwise try the
6627: * predefined set.
6628: */
6629: if (ctxt->sax != NULL) {
6630: if (ctxt->sax->getEntity != NULL)
6631: ent = ctxt->sax->getEntity(ctxt->userData, name);
6632: if (ent == NULL)
6633: ent = xmlGetPredefinedEntity(name);
6634: }
6635: /*
1.98 daniel 6636: * [ WFC: Entity Declared ]
6637: * In a document without any DTD, a document with only an
6638: * internal DTD subset which contains no parameter entity
6639: * references, or a document with "standalone='yes'", the
6640: * Name given in the entity reference must match that in an
6641: * entity declaration, except that well-formed documents
6642: * need not declare any of the following entities: amp, lt,
6643: * gt, apos, quot.
6644: * The declaration of a parameter entity must precede any
6645: * reference to it.
6646: * Similarly, the declaration of a general entity must
6647: * precede any reference to it which appears in a default
6648: * value in an attribute-list declaration. Note that if
6649: * entities are declared in the external subset or in
6650: * external parameter entities, a non-validating processor
6651: * is not obligated to read and process their declarations;
6652: * for such documents, the rule that an entity must be
6653: * declared is a well-formedness constraint only if
6654: * standalone='yes'.
1.59 daniel 6655: */
1.77 daniel 6656: if (ent == NULL) {
1.98 daniel 6657: if ((ctxt->standalone == 1) ||
6658: ((ctxt->hasExternalSubset == 0) &&
6659: (ctxt->hasPErefs == 0))) {
6660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6661: ctxt->sax->error(ctxt->userData,
6662: "Entity '%s' not defined\n", name);
1.123 daniel 6663: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6664: ctxt->wellFormed = 0;
1.180 daniel 6665: ctxt->disableSAX = 1;
1.77 daniel 6666: } else {
1.98 daniel 6667: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6668: ctxt->sax->warning(ctxt->userData,
6669: "Entity '%s' not defined\n", name);
1.123 daniel 6670: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6671: }
1.77 daniel 6672: }
1.59 daniel 6673:
6674: /*
1.98 daniel 6675: * [ WFC: Parsed Entity ]
6676: * An entity reference must not contain the name of an
6677: * unparsed entity
6678: */
1.159 daniel 6679: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6681: ctxt->sax->error(ctxt->userData,
6682: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6683: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6684: ctxt->wellFormed = 0;
1.180 daniel 6685: ctxt->disableSAX = 1;
1.98 daniel 6686: }
6687:
6688: /*
6689: * [ WFC: No External Entity References ]
6690: * Attribute values cannot contain direct or indirect
6691: * entity references to external entities.
6692: */
6693: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6694: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6696: ctxt->sax->error(ctxt->userData,
6697: "Attribute references external entity '%s'\n", name);
1.123 daniel 6698: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6699: ctxt->wellFormed = 0;
1.180 daniel 6700: ctxt->disableSAX = 1;
1.98 daniel 6701: }
6702: /*
6703: * [ WFC: No < in Attribute Values ]
6704: * The replacement text of any entity referred to directly or
6705: * indirectly in an attribute value (other than "<") must
6706: * not contain a <.
1.59 daniel 6707: */
1.98 daniel 6708: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6709: (ent != NULL) &&
6710: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6711: (ent->content != NULL) &&
6712: (xmlStrchr(ent->content, '<'))) {
6713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6714: ctxt->sax->error(ctxt->userData,
6715: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6716: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6717: ctxt->wellFormed = 0;
1.180 daniel 6718: ctxt->disableSAX = 1;
1.98 daniel 6719: }
6720:
6721: /*
6722: * Internal check, no parameter entities here ...
6723: */
6724: else {
1.159 daniel 6725: switch (ent->etype) {
1.59 daniel 6726: case XML_INTERNAL_PARAMETER_ENTITY:
6727: case XML_EXTERNAL_PARAMETER_ENTITY:
6728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6729: ctxt->sax->error(ctxt->userData,
1.59 daniel 6730: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6731: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6732: ctxt->wellFormed = 0;
1.180 daniel 6733: ctxt->disableSAX = 1;
6734: break;
6735: default:
1.59 daniel 6736: break;
6737: }
6738: }
6739:
6740: /*
1.98 daniel 6741: * [ WFC: No Recursion ]
1.117 daniel 6742: * TODO A parsed entity must not contain a recursive reference
6743: * to itself, either directly or indirectly.
1.59 daniel 6744: */
1.77 daniel 6745:
1.24 daniel 6746: } else {
1.55 daniel 6747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6748: ctxt->sax->error(ctxt->userData,
1.59 daniel 6749: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6750: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6751: ctxt->wellFormed = 0;
1.180 daniel 6752: ctxt->disableSAX = 1;
1.24 daniel 6753: }
1.119 daniel 6754: xmlFree(name);
1.24 daniel 6755: }
6756: }
1.77 daniel 6757: return(ent);
1.24 daniel 6758: }
1.135 daniel 6759: /**
6760: * xmlParseStringEntityRef:
6761: * @ctxt: an XML parser context
6762: * @str: a pointer to an index in the string
6763: *
6764: * parse ENTITY references declarations, but this version parses it from
6765: * a string value.
6766: *
6767: * [68] EntityRef ::= '&' Name ';'
6768: *
6769: * [ WFC: Entity Declared ]
6770: * In a document without any DTD, a document with only an internal DTD
6771: * subset which contains no parameter entity references, or a document
6772: * with "standalone='yes'", the Name given in the entity reference
6773: * must match that in an entity declaration, except that well-formed
6774: * documents need not declare any of the following entities: amp, lt,
6775: * gt, apos, quot. The declaration of a parameter entity must precede
6776: * any reference to it. Similarly, the declaration of a general entity
6777: * must precede any reference to it which appears in a default value in an
6778: * attribute-list declaration. Note that if entities are declared in the
6779: * external subset or in external parameter entities, a non-validating
6780: * processor is not obligated to read and process their declarations;
6781: * for such documents, the rule that an entity must be declared is a
6782: * well-formedness constraint only if standalone='yes'.
6783: *
6784: * [ WFC: Parsed Entity ]
6785: * An entity reference must not contain the name of an unparsed entity
6786: *
6787: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6788: * is updated to the current location in the string.
6789: */
6790: xmlEntityPtr
6791: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6792: xmlChar *name;
6793: const xmlChar *ptr;
6794: xmlChar cur;
6795: xmlEntityPtr ent = NULL;
6796:
1.156 daniel 6797: if ((str == NULL) || (*str == NULL))
6798: return(NULL);
1.135 daniel 6799: ptr = *str;
6800: cur = *ptr;
6801: if (cur == '&') {
6802: ptr++;
6803: cur = *ptr;
6804: name = xmlParseStringName(ctxt, &ptr);
6805: if (name == NULL) {
6806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6807: ctxt->sax->error(ctxt->userData,
6808: "xmlParseEntityRef: no name\n");
6809: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6810: ctxt->wellFormed = 0;
1.180 daniel 6811: ctxt->disableSAX = 1;
1.135 daniel 6812: } else {
1.185 daniel 6813: if (*ptr == ';') {
6814: ptr++;
1.135 daniel 6815: /*
6816: * Ask first SAX for entity resolution, otherwise try the
6817: * predefined set.
6818: */
6819: if (ctxt->sax != NULL) {
6820: if (ctxt->sax->getEntity != NULL)
6821: ent = ctxt->sax->getEntity(ctxt->userData, name);
6822: if (ent == NULL)
6823: ent = xmlGetPredefinedEntity(name);
6824: }
6825: /*
6826: * [ WFC: Entity Declared ]
6827: * In a document without any DTD, a document with only an
6828: * internal DTD subset which contains no parameter entity
6829: * references, or a document with "standalone='yes'", the
6830: * Name given in the entity reference must match that in an
6831: * entity declaration, except that well-formed documents
6832: * need not declare any of the following entities: amp, lt,
6833: * gt, apos, quot.
6834: * The declaration of a parameter entity must precede any
6835: * reference to it.
6836: * Similarly, the declaration of a general entity must
6837: * precede any reference to it which appears in a default
6838: * value in an attribute-list declaration. Note that if
6839: * entities are declared in the external subset or in
6840: * external parameter entities, a non-validating processor
6841: * is not obligated to read and process their declarations;
6842: * for such documents, the rule that an entity must be
6843: * declared is a well-formedness constraint only if
6844: * standalone='yes'.
6845: */
6846: if (ent == NULL) {
6847: if ((ctxt->standalone == 1) ||
6848: ((ctxt->hasExternalSubset == 0) &&
6849: (ctxt->hasPErefs == 0))) {
6850: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6851: ctxt->sax->error(ctxt->userData,
6852: "Entity '%s' not defined\n", name);
6853: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6854: ctxt->wellFormed = 0;
1.180 daniel 6855: ctxt->disableSAX = 1;
1.135 daniel 6856: } else {
6857: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6858: ctxt->sax->warning(ctxt->userData,
6859: "Entity '%s' not defined\n", name);
6860: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6861: }
6862: }
6863:
6864: /*
6865: * [ WFC: Parsed Entity ]
6866: * An entity reference must not contain the name of an
6867: * unparsed entity
6868: */
1.159 daniel 6869: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6871: ctxt->sax->error(ctxt->userData,
6872: "Entity reference to unparsed entity %s\n", name);
6873: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6874: ctxt->wellFormed = 0;
1.180 daniel 6875: ctxt->disableSAX = 1;
1.135 daniel 6876: }
6877:
6878: /*
6879: * [ WFC: No External Entity References ]
6880: * Attribute values cannot contain direct or indirect
6881: * entity references to external entities.
6882: */
6883: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6884: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6885: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6886: ctxt->sax->error(ctxt->userData,
6887: "Attribute references external entity '%s'\n", name);
6888: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6889: ctxt->wellFormed = 0;
1.180 daniel 6890: ctxt->disableSAX = 1;
1.135 daniel 6891: }
6892: /*
6893: * [ WFC: No < in Attribute Values ]
6894: * The replacement text of any entity referred to directly or
6895: * indirectly in an attribute value (other than "<") must
6896: * not contain a <.
6897: */
6898: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6899: (ent != NULL) &&
6900: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6901: (ent->content != NULL) &&
6902: (xmlStrchr(ent->content, '<'))) {
6903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6904: ctxt->sax->error(ctxt->userData,
6905: "'<' in entity '%s' is not allowed in attributes values\n", name);
6906: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6907: ctxt->wellFormed = 0;
1.180 daniel 6908: ctxt->disableSAX = 1;
1.135 daniel 6909: }
6910:
6911: /*
6912: * Internal check, no parameter entities here ...
6913: */
6914: else {
1.159 daniel 6915: switch (ent->etype) {
1.135 daniel 6916: case XML_INTERNAL_PARAMETER_ENTITY:
6917: case XML_EXTERNAL_PARAMETER_ENTITY:
6918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6919: ctxt->sax->error(ctxt->userData,
6920: "Attempt to reference the parameter entity '%s'\n", name);
6921: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6922: ctxt->wellFormed = 0;
1.180 daniel 6923: ctxt->disableSAX = 1;
6924: break;
6925: default:
1.135 daniel 6926: break;
6927: }
6928: }
6929:
6930: /*
6931: * [ WFC: No Recursion ]
6932: * TODO A parsed entity must not contain a recursive reference
6933: * to itself, either directly or indirectly.
6934: */
6935:
6936: } else {
6937: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6938: ctxt->sax->error(ctxt->userData,
6939: "xmlParseEntityRef: expecting ';'\n");
6940: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6941: ctxt->wellFormed = 0;
1.180 daniel 6942: ctxt->disableSAX = 1;
1.135 daniel 6943: }
6944: xmlFree(name);
6945: }
6946: }
1.185 daniel 6947: *str = ptr;
1.135 daniel 6948: return(ent);
6949: }
1.24 daniel 6950:
1.50 daniel 6951: /**
6952: * xmlParsePEReference:
6953: * @ctxt: an XML parser context
6954: *
6955: * parse PEReference declarations
1.77 daniel 6956: * The entity content is handled directly by pushing it's content as
6957: * a new input stream.
1.22 daniel 6958: *
6959: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6960: *
1.98 daniel 6961: * [ WFC: No Recursion ]
6962: * TODO A parsed entity must not contain a recursive
6963: * reference to itself, either directly or indirectly.
6964: *
6965: * [ WFC: Entity Declared ]
6966: * In a document without any DTD, a document with only an internal DTD
6967: * subset which contains no parameter entity references, or a document
6968: * with "standalone='yes'", ... ... The declaration of a parameter
6969: * entity must precede any reference to it...
6970: *
6971: * [ VC: Entity Declared ]
6972: * In a document with an external subset or external parameter entities
6973: * with "standalone='no'", ... ... The declaration of a parameter entity
6974: * must precede any reference to it...
6975: *
6976: * [ WFC: In DTD ]
6977: * Parameter-entity references may only appear in the DTD.
6978: * NOTE: misleading but this is handled.
1.22 daniel 6979: */
1.77 daniel 6980: void
1.55 daniel 6981: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 6982: xmlChar *name;
1.72 daniel 6983: xmlEntityPtr entity = NULL;
1.50 daniel 6984: xmlParserInputPtr input;
1.22 daniel 6985:
1.152 daniel 6986: if (RAW == '%') {
1.40 daniel 6987: NEXT;
1.22 daniel 6988: name = xmlParseName(ctxt);
6989: if (name == NULL) {
1.55 daniel 6990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6991: ctxt->sax->error(ctxt->userData,
6992: "xmlParsePEReference: no name\n");
1.123 daniel 6993: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6994: ctxt->wellFormed = 0;
1.180 daniel 6995: ctxt->disableSAX = 1;
1.22 daniel 6996: } else {
1.152 daniel 6997: if (RAW == ';') {
1.40 daniel 6998: NEXT;
1.98 daniel 6999: if ((ctxt->sax != NULL) &&
7000: (ctxt->sax->getParameterEntity != NULL))
7001: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7002: name);
1.45 daniel 7003: if (entity == NULL) {
1.98 daniel 7004: /*
7005: * [ WFC: Entity Declared ]
7006: * In a document without any DTD, a document with only an
7007: * internal DTD subset which contains no parameter entity
7008: * references, or a document with "standalone='yes'", ...
7009: * ... The declaration of a parameter entity must precede
7010: * any reference to it...
7011: */
7012: if ((ctxt->standalone == 1) ||
7013: ((ctxt->hasExternalSubset == 0) &&
7014: (ctxt->hasPErefs == 0))) {
7015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7016: ctxt->sax->error(ctxt->userData,
7017: "PEReference: %%%s; not found\n", name);
1.123 daniel 7018: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 7019: ctxt->wellFormed = 0;
1.180 daniel 7020: ctxt->disableSAX = 1;
1.98 daniel 7021: } else {
7022: /*
7023: * [ VC: Entity Declared ]
7024: * In a document with an external subset or external
7025: * parameter entities with "standalone='no'", ...
7026: * ... The declaration of a parameter entity must precede
7027: * any reference to it...
7028: */
7029: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7030: ctxt->sax->warning(ctxt->userData,
7031: "PEReference: %%%s; not found\n", name);
7032: ctxt->valid = 0;
7033: }
1.50 daniel 7034: } else {
1.98 daniel 7035: /*
7036: * Internal checking in case the entity quest barfed
7037: */
1.159 daniel 7038: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7039: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7040: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7041: ctxt->sax->warning(ctxt->userData,
7042: "Internal: %%%s; is not a parameter entity\n", name);
7043: } else {
1.164 daniel 7044: /*
7045: * TODO !!!
7046: * handle the extra spaces added before and after
7047: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7048: */
1.98 daniel 7049: input = xmlNewEntityInputStream(ctxt, entity);
7050: xmlPushInput(ctxt, input);
1.164 daniel 7051: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7052: (RAW == '<') && (NXT(1) == '?') &&
7053: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7054: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7055: xmlParseTextDecl(ctxt);
1.193 daniel 7056: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7057: /*
7058: * The XML REC instructs us to stop parsing
7059: * right here
7060: */
7061: ctxt->instate = XML_PARSER_EOF;
7062: xmlFree(name);
7063: return;
7064: }
1.164 daniel 7065: }
7066: if (ctxt->token == 0)
7067: ctxt->token = ' ';
1.98 daniel 7068: }
1.45 daniel 7069: }
1.98 daniel 7070: ctxt->hasPErefs = 1;
1.22 daniel 7071: } else {
1.55 daniel 7072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7073: ctxt->sax->error(ctxt->userData,
1.59 daniel 7074: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7075: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7076: ctxt->wellFormed = 0;
1.180 daniel 7077: ctxt->disableSAX = 1;
1.22 daniel 7078: }
1.119 daniel 7079: xmlFree(name);
1.3 veillard 7080: }
7081: }
7082: }
7083:
1.50 daniel 7084: /**
1.135 daniel 7085: * xmlParseStringPEReference:
7086: * @ctxt: an XML parser context
7087: * @str: a pointer to an index in the string
7088: *
7089: * parse PEReference declarations
7090: *
7091: * [69] PEReference ::= '%' Name ';'
7092: *
7093: * [ WFC: No Recursion ]
7094: * TODO A parsed entity must not contain a recursive
7095: * reference to itself, either directly or indirectly.
7096: *
7097: * [ WFC: Entity Declared ]
7098: * In a document without any DTD, a document with only an internal DTD
7099: * subset which contains no parameter entity references, or a document
7100: * with "standalone='yes'", ... ... The declaration of a parameter
7101: * entity must precede any reference to it...
7102: *
7103: * [ VC: Entity Declared ]
7104: * In a document with an external subset or external parameter entities
7105: * with "standalone='no'", ... ... The declaration of a parameter entity
7106: * must precede any reference to it...
7107: *
7108: * [ WFC: In DTD ]
7109: * Parameter-entity references may only appear in the DTD.
7110: * NOTE: misleading but this is handled.
7111: *
7112: * Returns the string of the entity content.
7113: * str is updated to the current value of the index
7114: */
7115: xmlEntityPtr
7116: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7117: const xmlChar *ptr;
7118: xmlChar cur;
7119: xmlChar *name;
7120: xmlEntityPtr entity = NULL;
7121:
7122: if ((str == NULL) || (*str == NULL)) return(NULL);
7123: ptr = *str;
7124: cur = *ptr;
7125: if (cur == '%') {
7126: ptr++;
7127: cur = *ptr;
7128: name = xmlParseStringName(ctxt, &ptr);
7129: if (name == NULL) {
7130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7131: ctxt->sax->error(ctxt->userData,
7132: "xmlParseStringPEReference: no name\n");
7133: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7134: ctxt->wellFormed = 0;
1.180 daniel 7135: ctxt->disableSAX = 1;
1.135 daniel 7136: } else {
7137: cur = *ptr;
7138: if (cur == ';') {
7139: ptr++;
7140: cur = *ptr;
7141: if ((ctxt->sax != NULL) &&
7142: (ctxt->sax->getParameterEntity != NULL))
7143: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7144: name);
7145: if (entity == NULL) {
7146: /*
7147: * [ WFC: Entity Declared ]
7148: * In a document without any DTD, a document with only an
7149: * internal DTD subset which contains no parameter entity
7150: * references, or a document with "standalone='yes'", ...
7151: * ... The declaration of a parameter entity must precede
7152: * any reference to it...
7153: */
7154: if ((ctxt->standalone == 1) ||
7155: ((ctxt->hasExternalSubset == 0) &&
7156: (ctxt->hasPErefs == 0))) {
7157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7158: ctxt->sax->error(ctxt->userData,
7159: "PEReference: %%%s; not found\n", name);
7160: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7161: ctxt->wellFormed = 0;
1.180 daniel 7162: ctxt->disableSAX = 1;
1.135 daniel 7163: } else {
7164: /*
7165: * [ VC: Entity Declared ]
7166: * In a document with an external subset or external
7167: * parameter entities with "standalone='no'", ...
7168: * ... The declaration of a parameter entity must
7169: * precede any reference to it...
7170: */
7171: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7172: ctxt->sax->warning(ctxt->userData,
7173: "PEReference: %%%s; not found\n", name);
7174: ctxt->valid = 0;
7175: }
7176: } else {
7177: /*
7178: * Internal checking in case the entity quest barfed
7179: */
1.159 daniel 7180: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7181: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7182: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7183: ctxt->sax->warning(ctxt->userData,
7184: "Internal: %%%s; is not a parameter entity\n", name);
7185: }
7186: }
7187: ctxt->hasPErefs = 1;
7188: } else {
7189: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7190: ctxt->sax->error(ctxt->userData,
7191: "xmlParseStringPEReference: expecting ';'\n");
7192: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7193: ctxt->wellFormed = 0;
1.180 daniel 7194: ctxt->disableSAX = 1;
1.135 daniel 7195: }
7196: xmlFree(name);
7197: }
7198: }
7199: *str = ptr;
7200: return(entity);
7201: }
7202:
7203: /**
1.181 daniel 7204: * xmlParseDocTypeDecl:
1.50 daniel 7205: * @ctxt: an XML parser context
7206: *
7207: * parse a DOCTYPE declaration
1.21 daniel 7208: *
1.22 daniel 7209: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7210: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7211: *
7212: * [ VC: Root Element Type ]
1.99 daniel 7213: * The Name in the document type declaration must match the element
1.98 daniel 7214: * type of the root element.
1.21 daniel 7215: */
7216:
1.55 daniel 7217: void
7218: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7219: xmlChar *name = NULL;
1.123 daniel 7220: xmlChar *ExternalID = NULL;
7221: xmlChar *URI = NULL;
1.21 daniel 7222:
7223: /*
7224: * We know that '<!DOCTYPE' has been detected.
7225: */
1.40 daniel 7226: SKIP(9);
1.21 daniel 7227:
1.42 daniel 7228: SKIP_BLANKS;
1.21 daniel 7229:
7230: /*
7231: * Parse the DOCTYPE name.
7232: */
7233: name = xmlParseName(ctxt);
7234: if (name == NULL) {
1.55 daniel 7235: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7236: ctxt->sax->error(ctxt->userData,
7237: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7238: ctxt->wellFormed = 0;
1.180 daniel 7239: ctxt->disableSAX = 1;
1.123 daniel 7240: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7241: }
1.165 daniel 7242: ctxt->intSubName = name;
1.21 daniel 7243:
1.42 daniel 7244: SKIP_BLANKS;
1.21 daniel 7245:
7246: /*
1.22 daniel 7247: * Check for SystemID and ExternalID
7248: */
1.67 daniel 7249: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7250:
7251: if ((URI != NULL) || (ExternalID != NULL)) {
7252: ctxt->hasExternalSubset = 1;
7253: }
1.165 daniel 7254: ctxt->extSubURI = URI;
7255: ctxt->extSubSystem = ExternalID;
1.98 daniel 7256:
1.42 daniel 7257: SKIP_BLANKS;
1.36 daniel 7258:
1.76 daniel 7259: /*
1.165 daniel 7260: * Create and update the internal subset.
1.76 daniel 7261: */
1.171 daniel 7262: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7263: (!ctxt->disableSAX))
1.74 daniel 7264: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7265:
7266: /*
1.140 daniel 7267: * Is there any internal subset declarations ?
7268: * they are handled separately in xmlParseInternalSubset()
7269: */
1.152 daniel 7270: if (RAW == '[')
1.140 daniel 7271: return;
7272:
7273: /*
7274: * We should be at the end of the DOCTYPE declaration.
7275: */
1.152 daniel 7276: if (RAW != '>') {
1.140 daniel 7277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7278: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7279: ctxt->wellFormed = 0;
1.180 daniel 7280: ctxt->disableSAX = 1;
1.140 daniel 7281: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7282: }
7283: NEXT;
7284: }
7285:
7286: /**
1.181 daniel 7287: * xmlParseInternalsubset:
1.140 daniel 7288: * @ctxt: an XML parser context
7289: *
7290: * parse the internal subset declaration
7291: *
7292: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7293: */
7294:
7295: void
7296: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7297: /*
1.22 daniel 7298: * Is there any DTD definition ?
7299: */
1.152 daniel 7300: if (RAW == '[') {
1.96 daniel 7301: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7302: NEXT;
1.22 daniel 7303: /*
7304: * Parse the succession of Markup declarations and
7305: * PEReferences.
7306: * Subsequence (markupdecl | PEReference | S)*
7307: */
1.152 daniel 7308: while (RAW != ']') {
1.123 daniel 7309: const xmlChar *check = CUR_PTR;
1.115 daniel 7310: int cons = ctxt->input->consumed;
1.22 daniel 7311:
1.42 daniel 7312: SKIP_BLANKS;
1.22 daniel 7313: xmlParseMarkupDecl(ctxt);
1.50 daniel 7314: xmlParsePEReference(ctxt);
1.22 daniel 7315:
1.115 daniel 7316: /*
7317: * Pop-up of finished entities.
7318: */
1.152 daniel 7319: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7320: xmlPopInput(ctxt);
7321:
1.118 daniel 7322: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7323: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7324: ctxt->sax->error(ctxt->userData,
1.140 daniel 7325: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7326: ctxt->wellFormed = 0;
1.180 daniel 7327: ctxt->disableSAX = 1;
1.123 daniel 7328: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7329: break;
7330: }
7331: }
1.152 daniel 7332: if (RAW == ']') NEXT;
1.22 daniel 7333: }
7334:
7335: /*
7336: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7337: */
1.152 daniel 7338: if (RAW != '>') {
1.55 daniel 7339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7340: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7341: ctxt->wellFormed = 0;
1.180 daniel 7342: ctxt->disableSAX = 1;
1.123 daniel 7343: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7344: }
1.40 daniel 7345: NEXT;
1.21 daniel 7346: }
7347:
1.50 daniel 7348: /**
7349: * xmlParseAttribute:
7350: * @ctxt: an XML parser context
1.123 daniel 7351: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7352: *
7353: * parse an attribute
1.3 veillard 7354: *
1.22 daniel 7355: * [41] Attribute ::= Name Eq AttValue
7356: *
1.98 daniel 7357: * [ WFC: No External Entity References ]
7358: * Attribute values cannot contain direct or indirect entity references
7359: * to external entities.
7360: *
7361: * [ WFC: No < in Attribute Values ]
7362: * The replacement text of any entity referred to directly or indirectly in
7363: * an attribute value (other than "<") must not contain a <.
7364: *
7365: * [ VC: Attribute Value Type ]
1.117 daniel 7366: * The attribute must have been declared; the value must be of the type
1.99 daniel 7367: * declared for it.
1.98 daniel 7368: *
1.22 daniel 7369: * [25] Eq ::= S? '=' S?
7370: *
1.29 daniel 7371: * With namespace:
7372: *
7373: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7374: *
7375: * Also the case QName == xmlns:??? is handled independently as a namespace
7376: * definition.
1.69 daniel 7377: *
1.72 daniel 7378: * Returns the attribute name, and the value in *value.
1.3 veillard 7379: */
7380:
1.123 daniel 7381: xmlChar *
7382: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7383: xmlChar *name, *val;
1.3 veillard 7384:
1.72 daniel 7385: *value = NULL;
7386: name = xmlParseName(ctxt);
1.22 daniel 7387: if (name == NULL) {
1.55 daniel 7388: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7389: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7390: ctxt->wellFormed = 0;
1.180 daniel 7391: ctxt->disableSAX = 1;
1.123 daniel 7392: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7393: return(NULL);
1.3 veillard 7394: }
7395:
7396: /*
1.29 daniel 7397: * read the value
1.3 veillard 7398: */
1.42 daniel 7399: SKIP_BLANKS;
1.152 daniel 7400: if (RAW == '=') {
1.40 daniel 7401: NEXT;
1.42 daniel 7402: SKIP_BLANKS;
1.72 daniel 7403: val = xmlParseAttValue(ctxt);
1.96 daniel 7404: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7405: } else {
1.55 daniel 7406: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7407: ctxt->sax->error(ctxt->userData,
1.59 daniel 7408: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7409: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7410: ctxt->wellFormed = 0;
1.180 daniel 7411: ctxt->disableSAX = 1;
1.170 daniel 7412: xmlFree(name);
1.52 daniel 7413: return(NULL);
1.43 daniel 7414: }
7415:
1.172 daniel 7416: /*
7417: * Check that xml:lang conforms to the specification
7418: */
7419: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7420: if (!xmlCheckLanguageID(val)) {
7421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7422: ctxt->sax->error(ctxt->userData,
7423: "Invalid value for xml:lang : %s\n", val);
7424: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7425: ctxt->wellFormed = 0;
1.180 daniel 7426: ctxt->disableSAX = 1;
1.172 daniel 7427: }
7428: }
7429:
1.176 daniel 7430: /*
7431: * Check that xml:space conforms to the specification
7432: */
7433: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7434: if (!xmlStrcmp(val, BAD_CAST "default"))
7435: *(ctxt->space) = 0;
7436: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7437: *(ctxt->space) = 1;
7438: else {
7439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7440: ctxt->sax->error(ctxt->userData,
7441: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7442: val);
7443: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7444: ctxt->wellFormed = 0;
1.180 daniel 7445: ctxt->disableSAX = 1;
1.176 daniel 7446: }
7447: }
7448:
1.72 daniel 7449: *value = val;
7450: return(name);
1.3 veillard 7451: }
7452:
1.50 daniel 7453: /**
7454: * xmlParseStartTag:
7455: * @ctxt: an XML parser context
7456: *
7457: * parse a start of tag either for rule element or
7458: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7459: *
7460: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7461: *
1.98 daniel 7462: * [ WFC: Unique Att Spec ]
7463: * No attribute name may appear more than once in the same start-tag or
7464: * empty-element tag.
7465: *
1.29 daniel 7466: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7467: *
1.98 daniel 7468: * [ WFC: Unique Att Spec ]
7469: * No attribute name may appear more than once in the same start-tag or
7470: * empty-element tag.
7471: *
1.29 daniel 7472: * With namespace:
7473: *
7474: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7475: *
7476: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7477: *
1.192 daniel 7478: * Returns the element name parsed
1.2 veillard 7479: */
7480:
1.123 daniel 7481: xmlChar *
1.69 daniel 7482: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7483: xmlChar *name;
7484: xmlChar *attname;
7485: xmlChar *attvalue;
7486: const xmlChar **atts = NULL;
1.72 daniel 7487: int nbatts = 0;
7488: int maxatts = 0;
7489: int i;
1.2 veillard 7490:
1.152 daniel 7491: if (RAW != '<') return(NULL);
1.40 daniel 7492: NEXT;
1.3 veillard 7493:
1.72 daniel 7494: name = xmlParseName(ctxt);
1.59 daniel 7495: if (name == NULL) {
7496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7497: ctxt->sax->error(ctxt->userData,
1.59 daniel 7498: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7499: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7500: ctxt->wellFormed = 0;
1.180 daniel 7501: ctxt->disableSAX = 1;
1.83 daniel 7502: return(NULL);
1.50 daniel 7503: }
7504:
7505: /*
1.3 veillard 7506: * Now parse the attributes, it ends up with the ending
7507: *
7508: * (S Attribute)* S?
7509: */
1.42 daniel 7510: SKIP_BLANKS;
1.91 daniel 7511: GROW;
1.168 daniel 7512:
1.153 daniel 7513: while ((IS_CHAR(RAW)) &&
1.152 daniel 7514: (RAW != '>') &&
7515: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7516: const xmlChar *q = CUR_PTR;
1.91 daniel 7517: int cons = ctxt->input->consumed;
1.29 daniel 7518:
1.72 daniel 7519: attname = xmlParseAttribute(ctxt, &attvalue);
7520: if ((attname != NULL) && (attvalue != NULL)) {
7521: /*
1.98 daniel 7522: * [ WFC: Unique Att Spec ]
7523: * No attribute name may appear more than once in the same
7524: * start-tag or empty-element tag.
1.72 daniel 7525: */
7526: for (i = 0; i < nbatts;i += 2) {
7527: if (!xmlStrcmp(atts[i], attname)) {
7528: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7529: ctxt->sax->error(ctxt->userData,
7530: "Attribute %s redefined\n",
7531: attname);
1.72 daniel 7532: ctxt->wellFormed = 0;
1.180 daniel 7533: ctxt->disableSAX = 1;
1.123 daniel 7534: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7535: xmlFree(attname);
7536: xmlFree(attvalue);
1.98 daniel 7537: goto failed;
1.72 daniel 7538: }
7539: }
7540:
7541: /*
7542: * Add the pair to atts
7543: */
7544: if (atts == NULL) {
7545: maxatts = 10;
1.123 daniel 7546: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7547: if (atts == NULL) {
1.86 daniel 7548: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7549: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7550: return(NULL);
1.72 daniel 7551: }
1.127 daniel 7552: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7553: maxatts *= 2;
1.123 daniel 7554: atts = (const xmlChar **) xmlRealloc(atts,
7555: maxatts * sizeof(xmlChar *));
1.72 daniel 7556: if (atts == NULL) {
1.86 daniel 7557: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7558: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7559: return(NULL);
1.72 daniel 7560: }
7561: }
7562: atts[nbatts++] = attname;
7563: atts[nbatts++] = attvalue;
7564: atts[nbatts] = NULL;
7565: atts[nbatts + 1] = NULL;
1.176 daniel 7566: } else {
7567: if (attname != NULL)
7568: xmlFree(attname);
7569: if (attvalue != NULL)
7570: xmlFree(attvalue);
1.72 daniel 7571: }
7572:
1.116 daniel 7573: failed:
1.168 daniel 7574:
7575: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7576: break;
7577: if (!IS_BLANK(RAW)) {
7578: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7579: ctxt->sax->error(ctxt->userData,
7580: "attributes construct error\n");
7581: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7582: ctxt->wellFormed = 0;
1.180 daniel 7583: ctxt->disableSAX = 1;
1.168 daniel 7584: }
1.42 daniel 7585: SKIP_BLANKS;
1.91 daniel 7586: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7587: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7588: ctxt->sax->error(ctxt->userData,
1.31 daniel 7589: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7590: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7591: ctxt->wellFormed = 0;
1.180 daniel 7592: ctxt->disableSAX = 1;
1.29 daniel 7593: break;
1.3 veillard 7594: }
1.91 daniel 7595: GROW;
1.3 veillard 7596: }
7597:
1.43 daniel 7598: /*
1.72 daniel 7599: * SAX: Start of Element !
1.43 daniel 7600: */
1.171 daniel 7601: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7602: (!ctxt->disableSAX))
1.74 daniel 7603: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7604:
1.72 daniel 7605: if (atts != NULL) {
1.123 daniel 7606: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7607: xmlFree(atts);
1.72 daniel 7608: }
1.83 daniel 7609: return(name);
1.3 veillard 7610: }
7611:
1.50 daniel 7612: /**
7613: * xmlParseEndTag:
7614: * @ctxt: an XML parser context
7615: *
7616: * parse an end of tag
1.27 daniel 7617: *
7618: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7619: *
7620: * With namespace
7621: *
1.72 daniel 7622: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7623: */
7624:
1.55 daniel 7625: void
1.140 daniel 7626: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7627: xmlChar *name;
1.140 daniel 7628: xmlChar *oldname;
1.7 veillard 7629:
1.91 daniel 7630: GROW;
1.152 daniel 7631: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7633: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7634: ctxt->wellFormed = 0;
1.180 daniel 7635: ctxt->disableSAX = 1;
1.123 daniel 7636: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7637: return;
7638: }
1.40 daniel 7639: SKIP(2);
1.7 veillard 7640:
1.72 daniel 7641: name = xmlParseName(ctxt);
1.7 veillard 7642:
7643: /*
7644: * We should definitely be at the ending "S? '>'" part
7645: */
1.91 daniel 7646: GROW;
1.42 daniel 7647: SKIP_BLANKS;
1.153 daniel 7648: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7649: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7650: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7651: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7652: ctxt->wellFormed = 0;
1.180 daniel 7653: ctxt->disableSAX = 1;
1.7 veillard 7654: } else
1.40 daniel 7655: NEXT;
1.7 veillard 7656:
1.72 daniel 7657: /*
1.98 daniel 7658: * [ WFC: Element Type Match ]
7659: * The Name in an element's end-tag must match the element type in the
7660: * start-tag.
7661: *
1.83 daniel 7662: */
1.147 daniel 7663: if ((name == NULL) || (ctxt->name == NULL) ||
7664: (xmlStrcmp(name, ctxt->name))) {
7665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7666: if ((name != NULL) && (ctxt->name != NULL)) {
7667: ctxt->sax->error(ctxt->userData,
7668: "Opening and ending tag mismatch: %s and %s\n",
7669: ctxt->name, name);
7670: } else if (ctxt->name != NULL) {
7671: ctxt->sax->error(ctxt->userData,
7672: "Ending tag eror for: %s\n", ctxt->name);
7673: } else {
7674: ctxt->sax->error(ctxt->userData,
7675: "Ending tag error: internal error ???\n");
7676: }
1.122 daniel 7677:
1.147 daniel 7678: }
1.123 daniel 7679: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7680: ctxt->wellFormed = 0;
1.180 daniel 7681: ctxt->disableSAX = 1;
1.83 daniel 7682: }
7683:
7684: /*
1.72 daniel 7685: * SAX: End of Tag
7686: */
1.171 daniel 7687: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7688: (!ctxt->disableSAX))
1.74 daniel 7689: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7690:
7691: if (name != NULL)
1.119 daniel 7692: xmlFree(name);
1.140 daniel 7693: oldname = namePop(ctxt);
1.176 daniel 7694: spacePop(ctxt);
1.140 daniel 7695: if (oldname != NULL) {
7696: #ifdef DEBUG_STACK
7697: fprintf(stderr,"Close: popped %s\n", oldname);
7698: #endif
7699: xmlFree(oldname);
7700: }
1.7 veillard 7701: return;
7702: }
7703:
1.50 daniel 7704: /**
7705: * xmlParseCDSect:
7706: * @ctxt: an XML parser context
7707: *
7708: * Parse escaped pure raw content.
1.29 daniel 7709: *
7710: * [18] CDSect ::= CDStart CData CDEnd
7711: *
7712: * [19] CDStart ::= '<![CDATA['
7713: *
7714: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7715: *
7716: * [21] CDEnd ::= ']]>'
1.3 veillard 7717: */
1.55 daniel 7718: void
7719: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7720: xmlChar *buf = NULL;
7721: int len = 0;
1.140 daniel 7722: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7723: int r, rl;
7724: int s, sl;
7725: int cur, l;
1.3 veillard 7726:
1.106 daniel 7727: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7728: (NXT(2) == '[') && (NXT(3) == 'C') &&
7729: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7730: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7731: (NXT(8) == '[')) {
7732: SKIP(9);
1.29 daniel 7733: } else
1.45 daniel 7734: return;
1.109 daniel 7735:
7736: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7737: r = CUR_CHAR(rl);
7738: if (!IS_CHAR(r)) {
1.55 daniel 7739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7740: ctxt->sax->error(ctxt->userData,
1.135 daniel 7741: "CData section not finished\n");
1.59 daniel 7742: ctxt->wellFormed = 0;
1.180 daniel 7743: ctxt->disableSAX = 1;
1.123 daniel 7744: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7745: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7746: return;
1.3 veillard 7747: }
1.152 daniel 7748: NEXTL(rl);
7749: s = CUR_CHAR(sl);
7750: if (!IS_CHAR(s)) {
1.55 daniel 7751: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7752: ctxt->sax->error(ctxt->userData,
1.135 daniel 7753: "CData section not finished\n");
1.123 daniel 7754: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7755: ctxt->wellFormed = 0;
1.180 daniel 7756: ctxt->disableSAX = 1;
1.109 daniel 7757: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7758: return;
1.3 veillard 7759: }
1.152 daniel 7760: NEXTL(sl);
7761: cur = CUR_CHAR(l);
1.135 daniel 7762: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7763: if (buf == NULL) {
7764: fprintf(stderr, "malloc of %d byte failed\n", size);
7765: return;
7766: }
1.108 veillard 7767: while (IS_CHAR(cur) &&
1.110 daniel 7768: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7769: if (len + 5 >= size) {
1.135 daniel 7770: size *= 2;
7771: buf = xmlRealloc(buf, size * sizeof(xmlChar));
7772: if (buf == NULL) {
7773: fprintf(stderr, "realloc of %d byte failed\n", size);
7774: return;
7775: }
7776: }
1.152 daniel 7777: COPY_BUF(rl,buf,len,r);
1.110 daniel 7778: r = s;
1.152 daniel 7779: rl = sl;
1.110 daniel 7780: s = cur;
1.152 daniel 7781: sl = l;
7782: NEXTL(l);
7783: cur = CUR_CHAR(l);
1.3 veillard 7784: }
1.135 daniel 7785: buf[len] = 0;
1.109 daniel 7786: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7787: if (cur != '>') {
1.55 daniel 7788: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7789: ctxt->sax->error(ctxt->userData,
1.135 daniel 7790: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7791: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7792: ctxt->wellFormed = 0;
1.180 daniel 7793: ctxt->disableSAX = 1;
1.135 daniel 7794: xmlFree(buf);
1.45 daniel 7795: return;
1.3 veillard 7796: }
1.152 daniel 7797: NEXTL(l);
1.16 daniel 7798:
1.45 daniel 7799: /*
1.135 daniel 7800: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7801: */
1.171 daniel 7802: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7803: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7804: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7805: }
1.135 daniel 7806: xmlFree(buf);
1.2 veillard 7807: }
7808:
1.50 daniel 7809: /**
7810: * xmlParseContent:
7811: * @ctxt: an XML parser context
7812: *
7813: * Parse a content:
1.2 veillard 7814: *
1.27 daniel 7815: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7816: */
7817:
1.55 daniel 7818: void
7819: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7820: GROW;
1.176 daniel 7821: while (((RAW != 0) || (ctxt->token != 0)) &&
7822: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7823: const xmlChar *test = CUR_PTR;
1.91 daniel 7824: int cons = ctxt->input->consumed;
1.123 daniel 7825: xmlChar tok = ctxt->token;
1.27 daniel 7826:
7827: /*
1.152 daniel 7828: * Handle possible processed charrefs.
7829: */
7830: if (ctxt->token != 0) {
7831: xmlParseCharData(ctxt, 0);
7832: }
7833: /*
1.27 daniel 7834: * First case : a Processing Instruction.
7835: */
1.152 daniel 7836: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7837: xmlParsePI(ctxt);
7838: }
1.72 daniel 7839:
1.27 daniel 7840: /*
7841: * Second case : a CDSection
7842: */
1.152 daniel 7843: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7844: (NXT(2) == '[') && (NXT(3) == 'C') &&
7845: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7846: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7847: (NXT(8) == '[')) {
1.45 daniel 7848: xmlParseCDSect(ctxt);
1.27 daniel 7849: }
1.72 daniel 7850:
1.27 daniel 7851: /*
7852: * Third case : a comment
7853: */
1.152 daniel 7854: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7855: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7856: xmlParseComment(ctxt);
1.97 daniel 7857: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7858: }
1.72 daniel 7859:
1.27 daniel 7860: /*
7861: * Fourth case : a sub-element.
7862: */
1.152 daniel 7863: else if (RAW == '<') {
1.72 daniel 7864: xmlParseElement(ctxt);
1.45 daniel 7865: }
1.72 daniel 7866:
1.45 daniel 7867: /*
1.50 daniel 7868: * Fifth case : a reference. If if has not been resolved,
7869: * parsing returns it's Name, create the node
1.45 daniel 7870: */
1.97 daniel 7871:
1.152 daniel 7872: else if (RAW == '&') {
1.77 daniel 7873: xmlParseReference(ctxt);
1.27 daniel 7874: }
1.72 daniel 7875:
1.27 daniel 7876: /*
7877: * Last case, text. Note that References are handled directly.
7878: */
7879: else {
1.45 daniel 7880: xmlParseCharData(ctxt, 0);
1.3 veillard 7881: }
1.14 veillard 7882:
1.91 daniel 7883: GROW;
1.14 veillard 7884: /*
1.45 daniel 7885: * Pop-up of finished entities.
1.14 veillard 7886: */
1.152 daniel 7887: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7888: xmlPopInput(ctxt);
1.135 daniel 7889: SHRINK;
1.45 daniel 7890:
1.113 daniel 7891: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7892: (tok == ctxt->token)) {
1.55 daniel 7893: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7894: ctxt->sax->error(ctxt->userData,
1.59 daniel 7895: "detected an error in element content\n");
1.123 daniel 7896: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7897: ctxt->wellFormed = 0;
1.180 daniel 7898: ctxt->disableSAX = 1;
1.29 daniel 7899: break;
7900: }
1.3 veillard 7901: }
1.2 veillard 7902: }
7903:
1.50 daniel 7904: /**
7905: * xmlParseElement:
7906: * @ctxt: an XML parser context
7907: *
7908: * parse an XML element, this is highly recursive
1.26 daniel 7909: *
7910: * [39] element ::= EmptyElemTag | STag content ETag
7911: *
1.98 daniel 7912: * [ WFC: Element Type Match ]
7913: * The Name in an element's end-tag must match the element type in the
7914: * start-tag.
7915: *
7916: * [ VC: Element Valid ]
1.117 daniel 7917: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7918: * where the Name matches the element type and one of the following holds:
7919: * - The declaration matches EMPTY and the element has no content.
7920: * - The declaration matches children and the sequence of child elements
7921: * belongs to the language generated by the regular expression in the
7922: * content model, with optional white space (characters matching the
7923: * nonterminal S) between each pair of child elements.
7924: * - The declaration matches Mixed and the content consists of character
7925: * data and child elements whose types match names in the content model.
7926: * - The declaration matches ANY, and the types of any child elements have
7927: * been declared.
1.2 veillard 7928: */
1.26 daniel 7929:
1.72 daniel 7930: void
1.69 daniel 7931: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7932: const xmlChar *openTag = CUR_PTR;
7933: xmlChar *name;
1.140 daniel 7934: xmlChar *oldname;
1.32 daniel 7935: xmlParserNodeInfo node_info;
1.118 daniel 7936: xmlNodePtr ret;
1.2 veillard 7937:
1.32 daniel 7938: /* Capture start position */
1.118 daniel 7939: if (ctxt->record_info) {
7940: node_info.begin_pos = ctxt->input->consumed +
7941: (CUR_PTR - ctxt->input->base);
7942: node_info.begin_line = ctxt->input->line;
7943: }
1.32 daniel 7944:
1.176 daniel 7945: if (ctxt->spaceNr == 0)
7946: spacePush(ctxt, -1);
7947: else
7948: spacePush(ctxt, *ctxt->space);
7949:
1.83 daniel 7950: name = xmlParseStartTag(ctxt);
7951: if (name == NULL) {
1.176 daniel 7952: spacePop(ctxt);
1.83 daniel 7953: return;
7954: }
1.140 daniel 7955: namePush(ctxt, name);
1.118 daniel 7956: ret = ctxt->node;
1.2 veillard 7957:
7958: /*
1.99 daniel 7959: * [ VC: Root Element Type ]
7960: * The Name in the document type declaration must match the element
7961: * type of the root element.
7962: */
1.105 daniel 7963: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7964: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7965: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7966:
7967: /*
1.2 veillard 7968: * Check for an Empty Element.
7969: */
1.152 daniel 7970: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 7971: SKIP(2);
1.171 daniel 7972: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7973: (!ctxt->disableSAX))
1.83 daniel 7974: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 7975: oldname = namePop(ctxt);
1.176 daniel 7976: spacePop(ctxt);
1.140 daniel 7977: if (oldname != NULL) {
7978: #ifdef DEBUG_STACK
7979: fprintf(stderr,"Close: popped %s\n", oldname);
7980: #endif
7981: xmlFree(oldname);
7982: }
1.72 daniel 7983: return;
1.2 veillard 7984: }
1.152 daniel 7985: if (RAW == '>') {
1.91 daniel 7986: NEXT;
7987: } else {
1.55 daniel 7988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7989: ctxt->sax->error(ctxt->userData,
7990: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 7991: openTag);
1.59 daniel 7992: ctxt->wellFormed = 0;
1.180 daniel 7993: ctxt->disableSAX = 1;
1.123 daniel 7994: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 7995:
7996: /*
7997: * end of parsing of this node.
7998: */
7999: nodePop(ctxt);
1.140 daniel 8000: oldname = namePop(ctxt);
1.176 daniel 8001: spacePop(ctxt);
1.140 daniel 8002: if (oldname != NULL) {
8003: #ifdef DEBUG_STACK
8004: fprintf(stderr,"Close: popped %s\n", oldname);
8005: #endif
8006: xmlFree(oldname);
8007: }
1.118 daniel 8008:
8009: /*
8010: * Capture end position and add node
8011: */
8012: if ( ret != NULL && ctxt->record_info ) {
8013: node_info.end_pos = ctxt->input->consumed +
8014: (CUR_PTR - ctxt->input->base);
8015: node_info.end_line = ctxt->input->line;
8016: node_info.node = ret;
8017: xmlParserAddNodeInfo(ctxt, &node_info);
8018: }
1.72 daniel 8019: return;
1.2 veillard 8020: }
8021:
8022: /*
8023: * Parse the content of the element:
8024: */
1.45 daniel 8025: xmlParseContent(ctxt);
1.153 daniel 8026: if (!IS_CHAR(RAW)) {
1.55 daniel 8027: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8028: ctxt->sax->error(ctxt->userData,
1.57 daniel 8029: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 8030: ctxt->wellFormed = 0;
1.180 daniel 8031: ctxt->disableSAX = 1;
1.123 daniel 8032: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 8033:
8034: /*
8035: * end of parsing of this node.
8036: */
8037: nodePop(ctxt);
1.140 daniel 8038: oldname = namePop(ctxt);
1.176 daniel 8039: spacePop(ctxt);
1.140 daniel 8040: if (oldname != NULL) {
8041: #ifdef DEBUG_STACK
8042: fprintf(stderr,"Close: popped %s\n", oldname);
8043: #endif
8044: xmlFree(oldname);
8045: }
1.72 daniel 8046: return;
1.2 veillard 8047: }
8048:
8049: /*
1.27 daniel 8050: * parse the end of tag: '</' should be here.
1.2 veillard 8051: */
1.140 daniel 8052: xmlParseEndTag(ctxt);
1.118 daniel 8053:
8054: /*
8055: * Capture end position and add node
8056: */
8057: if ( ret != NULL && ctxt->record_info ) {
8058: node_info.end_pos = ctxt->input->consumed +
8059: (CUR_PTR - ctxt->input->base);
8060: node_info.end_line = ctxt->input->line;
8061: node_info.node = ret;
8062: xmlParserAddNodeInfo(ctxt, &node_info);
8063: }
1.2 veillard 8064: }
8065:
1.50 daniel 8066: /**
8067: * xmlParseVersionNum:
8068: * @ctxt: an XML parser context
8069: *
8070: * parse the XML version value.
1.29 daniel 8071: *
8072: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8073: *
8074: * Returns the string giving the XML version number, or NULL
1.29 daniel 8075: */
1.123 daniel 8076: xmlChar *
1.55 daniel 8077: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8078: xmlChar *buf = NULL;
8079: int len = 0;
8080: int size = 10;
8081: xmlChar cur;
1.29 daniel 8082:
1.135 daniel 8083: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8084: if (buf == NULL) {
8085: fprintf(stderr, "malloc of %d byte failed\n", size);
8086: return(NULL);
8087: }
8088: cur = CUR;
1.152 daniel 8089: while (((cur >= 'a') && (cur <= 'z')) ||
8090: ((cur >= 'A') && (cur <= 'Z')) ||
8091: ((cur >= '0') && (cur <= '9')) ||
8092: (cur == '_') || (cur == '.') ||
8093: (cur == ':') || (cur == '-')) {
1.135 daniel 8094: if (len + 1 >= size) {
8095: size *= 2;
8096: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8097: if (buf == NULL) {
8098: fprintf(stderr, "realloc of %d byte failed\n", size);
8099: return(NULL);
8100: }
8101: }
8102: buf[len++] = cur;
8103: NEXT;
8104: cur=CUR;
8105: }
8106: buf[len] = 0;
8107: return(buf);
1.29 daniel 8108: }
8109:
1.50 daniel 8110: /**
8111: * xmlParseVersionInfo:
8112: * @ctxt: an XML parser context
8113: *
8114: * parse the XML version.
1.29 daniel 8115: *
8116: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8117: *
8118: * [25] Eq ::= S? '=' S?
1.50 daniel 8119: *
1.68 daniel 8120: * Returns the version string, e.g. "1.0"
1.29 daniel 8121: */
8122:
1.123 daniel 8123: xmlChar *
1.55 daniel 8124: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8125: xmlChar *version = NULL;
8126: const xmlChar *q;
1.29 daniel 8127:
1.152 daniel 8128: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8129: (NXT(2) == 'r') && (NXT(3) == 's') &&
8130: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8131: (NXT(6) == 'n')) {
8132: SKIP(7);
1.42 daniel 8133: SKIP_BLANKS;
1.152 daniel 8134: if (RAW != '=') {
1.55 daniel 8135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8136: ctxt->sax->error(ctxt->userData,
8137: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8138: ctxt->wellFormed = 0;
1.180 daniel 8139: ctxt->disableSAX = 1;
1.123 daniel 8140: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8141: return(NULL);
8142: }
1.40 daniel 8143: NEXT;
1.42 daniel 8144: SKIP_BLANKS;
1.152 daniel 8145: if (RAW == '"') {
1.40 daniel 8146: NEXT;
8147: q = CUR_PTR;
1.29 daniel 8148: version = xmlParseVersionNum(ctxt);
1.152 daniel 8149: if (RAW != '"') {
1.55 daniel 8150: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8151: ctxt->sax->error(ctxt->userData,
8152: "String not closed\n%.50s\n", q);
1.59 daniel 8153: ctxt->wellFormed = 0;
1.180 daniel 8154: ctxt->disableSAX = 1;
1.123 daniel 8155: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8156: } else
1.40 daniel 8157: NEXT;
1.152 daniel 8158: } else if (RAW == '\''){
1.40 daniel 8159: NEXT;
8160: q = CUR_PTR;
1.29 daniel 8161: version = xmlParseVersionNum(ctxt);
1.152 daniel 8162: if (RAW != '\'') {
1.55 daniel 8163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8164: ctxt->sax->error(ctxt->userData,
8165: "String not closed\n%.50s\n", q);
1.123 daniel 8166: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8167: ctxt->wellFormed = 0;
1.180 daniel 8168: ctxt->disableSAX = 1;
1.55 daniel 8169: } else
1.40 daniel 8170: NEXT;
1.31 daniel 8171: } else {
1.55 daniel 8172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8173: ctxt->sax->error(ctxt->userData,
1.59 daniel 8174: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8175: ctxt->wellFormed = 0;
1.180 daniel 8176: ctxt->disableSAX = 1;
1.123 daniel 8177: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8178: }
8179: }
8180: return(version);
8181: }
8182:
1.50 daniel 8183: /**
8184: * xmlParseEncName:
8185: * @ctxt: an XML parser context
8186: *
8187: * parse the XML encoding name
1.29 daniel 8188: *
8189: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8190: *
1.68 daniel 8191: * Returns the encoding name value or NULL
1.29 daniel 8192: */
1.123 daniel 8193: xmlChar *
1.55 daniel 8194: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8195: xmlChar *buf = NULL;
8196: int len = 0;
8197: int size = 10;
8198: xmlChar cur;
1.29 daniel 8199:
1.135 daniel 8200: cur = CUR;
8201: if (((cur >= 'a') && (cur <= 'z')) ||
8202: ((cur >= 'A') && (cur <= 'Z'))) {
8203: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8204: if (buf == NULL) {
8205: fprintf(stderr, "malloc of %d byte failed\n", size);
8206: return(NULL);
8207: }
8208:
8209: buf[len++] = cur;
1.40 daniel 8210: NEXT;
1.135 daniel 8211: cur = CUR;
1.152 daniel 8212: while (((cur >= 'a') && (cur <= 'z')) ||
8213: ((cur >= 'A') && (cur <= 'Z')) ||
8214: ((cur >= '0') && (cur <= '9')) ||
8215: (cur == '.') || (cur == '_') ||
8216: (cur == '-')) {
1.135 daniel 8217: if (len + 1 >= size) {
8218: size *= 2;
8219: buf = xmlRealloc(buf, size * sizeof(xmlChar));
8220: if (buf == NULL) {
8221: fprintf(stderr, "realloc of %d byte failed\n", size);
8222: return(NULL);
8223: }
8224: }
8225: buf[len++] = cur;
8226: NEXT;
8227: cur = CUR;
8228: if (cur == 0) {
8229: SHRINK;
8230: GROW;
8231: cur = CUR;
8232: }
8233: }
8234: buf[len] = 0;
1.29 daniel 8235: } else {
1.55 daniel 8236: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8237: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8238: ctxt->wellFormed = 0;
1.180 daniel 8239: ctxt->disableSAX = 1;
1.123 daniel 8240: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8241: }
1.135 daniel 8242: return(buf);
1.29 daniel 8243: }
8244:
1.50 daniel 8245: /**
8246: * xmlParseEncodingDecl:
8247: * @ctxt: an XML parser context
8248: *
8249: * parse the XML encoding declaration
1.29 daniel 8250: *
8251: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8252: *
8253: * TODO: this should setup the conversion filters.
8254: *
1.68 daniel 8255: * Returns the encoding value or NULL
1.29 daniel 8256: */
8257:
1.123 daniel 8258: xmlChar *
1.55 daniel 8259: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8260: xmlChar *encoding = NULL;
8261: const xmlChar *q;
1.29 daniel 8262:
1.42 daniel 8263: SKIP_BLANKS;
1.152 daniel 8264: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8265: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8266: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8267: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8268: SKIP(8);
1.42 daniel 8269: SKIP_BLANKS;
1.152 daniel 8270: if (RAW != '=') {
1.55 daniel 8271: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8272: ctxt->sax->error(ctxt->userData,
8273: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8274: ctxt->wellFormed = 0;
1.180 daniel 8275: ctxt->disableSAX = 1;
1.123 daniel 8276: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8277: return(NULL);
8278: }
1.40 daniel 8279: NEXT;
1.42 daniel 8280: SKIP_BLANKS;
1.152 daniel 8281: if (RAW == '"') {
1.40 daniel 8282: NEXT;
8283: q = CUR_PTR;
1.29 daniel 8284: encoding = xmlParseEncName(ctxt);
1.152 daniel 8285: if (RAW != '"') {
1.55 daniel 8286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8287: ctxt->sax->error(ctxt->userData,
8288: "String not closed\n%.50s\n", q);
1.59 daniel 8289: ctxt->wellFormed = 0;
1.180 daniel 8290: ctxt->disableSAX = 1;
1.123 daniel 8291: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8292: } else
1.40 daniel 8293: NEXT;
1.152 daniel 8294: } else if (RAW == '\''){
1.40 daniel 8295: NEXT;
8296: q = CUR_PTR;
1.29 daniel 8297: encoding = xmlParseEncName(ctxt);
1.152 daniel 8298: if (RAW != '\'') {
1.55 daniel 8299: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8300: ctxt->sax->error(ctxt->userData,
8301: "String not closed\n%.50s\n", q);
1.59 daniel 8302: ctxt->wellFormed = 0;
1.180 daniel 8303: ctxt->disableSAX = 1;
1.123 daniel 8304: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8305: } else
1.40 daniel 8306: NEXT;
1.152 daniel 8307: } else if (RAW == '"'){
1.55 daniel 8308: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8309: ctxt->sax->error(ctxt->userData,
1.59 daniel 8310: "xmlParseEncodingDecl : expected ' or \"\n");
8311: ctxt->wellFormed = 0;
1.180 daniel 8312: ctxt->disableSAX = 1;
1.123 daniel 8313: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8314: }
1.193 daniel 8315: if (encoding != NULL) {
8316: xmlCharEncoding enc;
8317: xmlCharEncodingHandlerPtr handler;
8318:
1.195 daniel 8319: if (ctxt->input->encoding != NULL)
8320: xmlFree((xmlChar *) ctxt->input->encoding);
8321: ctxt->input->encoding = encoding;
8322:
1.193 daniel 8323: enc = xmlParseCharEncoding((const char *) encoding);
8324: /*
8325: * registered set of known encodings
8326: */
8327: if (enc != XML_CHAR_ENCODING_ERROR) {
8328: xmlSwitchEncoding(ctxt, enc);
8329: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8330: xmlFree(encoding);
8331: return(NULL);
8332: }
8333: } else {
8334: /*
8335: * fallback for unknown encodings
8336: */
8337: handler = xmlFindCharEncodingHandler((const char *) encoding);
8338: if (handler != NULL) {
8339: xmlSwitchToEncoding(ctxt, handler);
8340: } else {
8341: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
8342: xmlFree(encoding);
8343: return(NULL);
8344: }
8345: }
8346: }
1.29 daniel 8347: }
8348: return(encoding);
8349: }
8350:
1.50 daniel 8351: /**
8352: * xmlParseSDDecl:
8353: * @ctxt: an XML parser context
8354: *
8355: * parse the XML standalone declaration
1.29 daniel 8356: *
8357: * [32] SDDecl ::= S 'standalone' Eq
8358: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8359: *
8360: * [ VC: Standalone Document Declaration ]
8361: * TODO The standalone document declaration must have the value "no"
8362: * if any external markup declarations contain declarations of:
8363: * - attributes with default values, if elements to which these
8364: * attributes apply appear in the document without specifications
8365: * of values for these attributes, or
8366: * - entities (other than amp, lt, gt, apos, quot), if references
8367: * to those entities appear in the document, or
8368: * - attributes with values subject to normalization, where the
8369: * attribute appears in the document with a value which will change
8370: * as a result of normalization, or
8371: * - element types with element content, if white space occurs directly
8372: * within any instance of those types.
1.68 daniel 8373: *
8374: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8375: */
8376:
1.55 daniel 8377: int
8378: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8379: int standalone = -1;
8380:
1.42 daniel 8381: SKIP_BLANKS;
1.152 daniel 8382: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8383: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8384: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8385: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8386: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8387: SKIP(10);
1.81 daniel 8388: SKIP_BLANKS;
1.152 daniel 8389: if (RAW != '=') {
1.55 daniel 8390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8391: ctxt->sax->error(ctxt->userData,
1.59 daniel 8392: "XML standalone declaration : expected '='\n");
1.123 daniel 8393: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8394: ctxt->wellFormed = 0;
1.180 daniel 8395: ctxt->disableSAX = 1;
1.32 daniel 8396: return(standalone);
8397: }
1.40 daniel 8398: NEXT;
1.42 daniel 8399: SKIP_BLANKS;
1.152 daniel 8400: if (RAW == '\''){
1.40 daniel 8401: NEXT;
1.152 daniel 8402: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8403: standalone = 0;
1.40 daniel 8404: SKIP(2);
1.152 daniel 8405: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8406: (NXT(2) == 's')) {
1.29 daniel 8407: standalone = 1;
1.40 daniel 8408: SKIP(3);
1.29 daniel 8409: } else {
1.55 daniel 8410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8411: ctxt->sax->error(ctxt->userData,
8412: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8413: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8414: ctxt->wellFormed = 0;
1.180 daniel 8415: ctxt->disableSAX = 1;
1.29 daniel 8416: }
1.152 daniel 8417: if (RAW != '\'') {
1.55 daniel 8418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8419: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8420: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8421: ctxt->wellFormed = 0;
1.180 daniel 8422: ctxt->disableSAX = 1;
1.55 daniel 8423: } else
1.40 daniel 8424: NEXT;
1.152 daniel 8425: } else if (RAW == '"'){
1.40 daniel 8426: NEXT;
1.152 daniel 8427: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8428: standalone = 0;
1.40 daniel 8429: SKIP(2);
1.152 daniel 8430: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8431: (NXT(2) == 's')) {
1.29 daniel 8432: standalone = 1;
1.40 daniel 8433: SKIP(3);
1.29 daniel 8434: } else {
1.55 daniel 8435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8436: ctxt->sax->error(ctxt->userData,
1.59 daniel 8437: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8438: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8439: ctxt->wellFormed = 0;
1.180 daniel 8440: ctxt->disableSAX = 1;
1.29 daniel 8441: }
1.152 daniel 8442: if (RAW != '"') {
1.55 daniel 8443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8444: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8445: ctxt->wellFormed = 0;
1.180 daniel 8446: ctxt->disableSAX = 1;
1.123 daniel 8447: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8448: } else
1.40 daniel 8449: NEXT;
1.37 daniel 8450: } else {
1.55 daniel 8451: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8452: ctxt->sax->error(ctxt->userData,
8453: "Standalone value not found\n");
1.59 daniel 8454: ctxt->wellFormed = 0;
1.180 daniel 8455: ctxt->disableSAX = 1;
1.123 daniel 8456: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8457: }
1.29 daniel 8458: }
8459: return(standalone);
8460: }
8461:
1.50 daniel 8462: /**
8463: * xmlParseXMLDecl:
8464: * @ctxt: an XML parser context
8465: *
8466: * parse an XML declaration header
1.29 daniel 8467: *
8468: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8469: */
8470:
1.55 daniel 8471: void
8472: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8473: xmlChar *version;
1.1 veillard 8474:
8475: /*
1.19 daniel 8476: * We know that '<?xml' is here.
1.1 veillard 8477: */
1.40 daniel 8478: SKIP(5);
1.1 veillard 8479:
1.153 daniel 8480: if (!IS_BLANK(RAW)) {
1.59 daniel 8481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8482: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8483: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8484: ctxt->wellFormed = 0;
1.180 daniel 8485: ctxt->disableSAX = 1;
1.59 daniel 8486: }
1.42 daniel 8487: SKIP_BLANKS;
1.1 veillard 8488:
8489: /*
1.29 daniel 8490: * We should have the VersionInfo here.
1.1 veillard 8491: */
1.29 daniel 8492: version = xmlParseVersionInfo(ctxt);
8493: if (version == NULL)
1.45 daniel 8494: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8495: ctxt->version = xmlStrdup(version);
1.119 daniel 8496: xmlFree(version);
1.29 daniel 8497:
8498: /*
8499: * We may have the encoding declaration
8500: */
1.153 daniel 8501: if (!IS_BLANK(RAW)) {
1.152 daniel 8502: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8503: SKIP(2);
8504: return;
8505: }
8506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8507: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8508: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8509: ctxt->wellFormed = 0;
1.180 daniel 8510: ctxt->disableSAX = 1;
1.59 daniel 8511: }
1.195 daniel 8512: xmlParseEncodingDecl(ctxt);
1.193 daniel 8513: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8514: /*
8515: * The XML REC instructs us to stop parsing right here
8516: */
8517: return;
8518: }
1.1 veillard 8519:
8520: /*
1.29 daniel 8521: * We may have the standalone status.
1.1 veillard 8522: */
1.164 daniel 8523: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8524: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8525: SKIP(2);
8526: return;
8527: }
8528: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8529: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8530: ctxt->wellFormed = 0;
1.180 daniel 8531: ctxt->disableSAX = 1;
1.123 daniel 8532: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8533: }
8534: SKIP_BLANKS;
1.167 daniel 8535: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8536:
1.42 daniel 8537: SKIP_BLANKS;
1.152 daniel 8538: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8539: SKIP(2);
1.152 daniel 8540: } else if (RAW == '>') {
1.31 daniel 8541: /* Deprecated old WD ... */
1.55 daniel 8542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8543: ctxt->sax->error(ctxt->userData,
8544: "XML declaration must end-up with '?>'\n");
1.59 daniel 8545: ctxt->wellFormed = 0;
1.180 daniel 8546: ctxt->disableSAX = 1;
1.123 daniel 8547: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8548: NEXT;
1.29 daniel 8549: } else {
1.55 daniel 8550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8551: ctxt->sax->error(ctxt->userData,
8552: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8553: ctxt->wellFormed = 0;
1.180 daniel 8554: ctxt->disableSAX = 1;
1.123 daniel 8555: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8556: MOVETO_ENDTAG(CUR_PTR);
8557: NEXT;
1.29 daniel 8558: }
1.1 veillard 8559: }
8560:
1.50 daniel 8561: /**
8562: * xmlParseMisc:
8563: * @ctxt: an XML parser context
8564: *
8565: * parse an XML Misc* optionnal field.
1.21 daniel 8566: *
1.22 daniel 8567: * [27] Misc ::= Comment | PI | S
1.1 veillard 8568: */
8569:
1.55 daniel 8570: void
8571: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8572: while (((RAW == '<') && (NXT(1) == '?')) ||
8573: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8574: (NXT(2) == '-') && (NXT(3) == '-')) ||
8575: IS_BLANK(CUR)) {
1.152 daniel 8576: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8577: xmlParsePI(ctxt);
1.40 daniel 8578: } else if (IS_BLANK(CUR)) {
8579: NEXT;
1.1 veillard 8580: } else
1.114 daniel 8581: xmlParseComment(ctxt);
1.1 veillard 8582: }
8583: }
8584:
1.50 daniel 8585: /**
1.181 daniel 8586: * xmlParseDocument:
1.50 daniel 8587: * @ctxt: an XML parser context
8588: *
8589: * parse an XML document (and build a tree if using the standard SAX
8590: * interface).
1.21 daniel 8591: *
1.22 daniel 8592: * [1] document ::= prolog element Misc*
1.29 daniel 8593: *
8594: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8595: *
1.68 daniel 8596: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8597: * as a result of the parsing.
1.1 veillard 8598: */
8599:
1.55 daniel 8600: int
8601: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8602: xmlChar start[4];
8603: xmlCharEncoding enc;
8604:
1.45 daniel 8605: xmlDefaultSAXHandlerInit();
8606:
1.91 daniel 8607: GROW;
8608:
1.14 veillard 8609: /*
1.44 daniel 8610: * SAX: beginning of the document processing.
8611: */
1.72 daniel 8612: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8613: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8614:
1.156 daniel 8615: /*
8616: * Get the 4 first bytes and decode the charset
8617: * if enc != XML_CHAR_ENCODING_NONE
8618: * plug some encoding conversion routines.
8619: */
8620: start[0] = RAW;
8621: start[1] = NXT(1);
8622: start[2] = NXT(2);
8623: start[3] = NXT(3);
8624: enc = xmlDetectCharEncoding(start, 4);
8625: if (enc != XML_CHAR_ENCODING_NONE) {
8626: xmlSwitchEncoding(ctxt, enc);
8627: }
8628:
1.1 veillard 8629:
1.59 daniel 8630: if (CUR == 0) {
8631: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8632: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8633: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8634: ctxt->wellFormed = 0;
1.180 daniel 8635: ctxt->disableSAX = 1;
1.59 daniel 8636: }
1.1 veillard 8637:
8638: /*
8639: * Check for the XMLDecl in the Prolog.
8640: */
1.91 daniel 8641: GROW;
1.152 daniel 8642: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8643: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8644: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 8645:
8646: /*
8647: * Note that we will switch encoding on the fly.
8648: */
1.19 daniel 8649: xmlParseXMLDecl(ctxt);
1.193 daniel 8650: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8651: /*
8652: * The XML REC instructs us to stop parsing right here
8653: */
8654: return(-1);
8655: }
1.167 daniel 8656: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8657: SKIP_BLANKS;
1.1 veillard 8658: } else {
1.72 daniel 8659: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8660: }
1.171 daniel 8661: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8662: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8663:
8664: /*
8665: * The Misc part of the Prolog
8666: */
1.91 daniel 8667: GROW;
1.16 daniel 8668: xmlParseMisc(ctxt);
1.1 veillard 8669:
8670: /*
1.29 daniel 8671: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8672: * (doctypedecl Misc*)?
8673: */
1.91 daniel 8674: GROW;
1.152 daniel 8675: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8676: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8677: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8678: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8679: (NXT(8) == 'E')) {
1.165 daniel 8680:
1.166 daniel 8681: ctxt->inSubset = 1;
1.22 daniel 8682: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8683: if (RAW == '[') {
1.140 daniel 8684: ctxt->instate = XML_PARSER_DTD;
8685: xmlParseInternalSubset(ctxt);
8686: }
1.165 daniel 8687:
8688: /*
8689: * Create and update the external subset.
8690: */
1.166 daniel 8691: ctxt->inSubset = 2;
1.171 daniel 8692: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8693: (!ctxt->disableSAX))
1.165 daniel 8694: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8695: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8696: ctxt->inSubset = 0;
1.165 daniel 8697:
8698:
1.96 daniel 8699: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8700: xmlParseMisc(ctxt);
1.21 daniel 8701: }
8702:
8703: /*
8704: * Time to start parsing the tree itself
1.1 veillard 8705: */
1.91 daniel 8706: GROW;
1.152 daniel 8707: if (RAW != '<') {
1.59 daniel 8708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8709: ctxt->sax->error(ctxt->userData,
1.151 daniel 8710: "Start tag expected, '<' not found\n");
1.140 daniel 8711: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8712: ctxt->wellFormed = 0;
1.180 daniel 8713: ctxt->disableSAX = 1;
1.140 daniel 8714: ctxt->instate = XML_PARSER_EOF;
8715: } else {
8716: ctxt->instate = XML_PARSER_CONTENT;
8717: xmlParseElement(ctxt);
8718: ctxt->instate = XML_PARSER_EPILOG;
8719:
8720:
8721: /*
8722: * The Misc part at the end
8723: */
8724: xmlParseMisc(ctxt);
8725:
1.152 daniel 8726: if (RAW != 0) {
1.140 daniel 8727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8728: ctxt->sax->error(ctxt->userData,
8729: "Extra content at the end of the document\n");
8730: ctxt->wellFormed = 0;
1.180 daniel 8731: ctxt->disableSAX = 1;
1.140 daniel 8732: ctxt->errNo = XML_ERR_DOCUMENT_END;
8733: }
8734: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8735: }
8736:
1.44 daniel 8737: /*
8738: * SAX: end of the document processing.
8739: */
1.171 daniel 8740: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8741: (!ctxt->disableSAX))
1.74 daniel 8742: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8743:
1.59 daniel 8744: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8745: return(0);
8746: }
8747:
1.98 daniel 8748: /************************************************************************
8749: * *
1.128 daniel 8750: * Progressive parsing interfaces *
8751: * *
8752: ************************************************************************/
8753:
8754: /**
8755: * xmlParseLookupSequence:
8756: * @ctxt: an XML parser context
8757: * @first: the first char to lookup
1.140 daniel 8758: * @next: the next char to lookup or zero
8759: * @third: the next char to lookup or zero
1.128 daniel 8760: *
1.140 daniel 8761: * Try to find if a sequence (first, next, third) or just (first next) or
8762: * (first) is available in the input stream.
8763: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8764: * to avoid rescanning sequences of bytes, it DOES change the state of the
8765: * parser, do not use liberally.
1.128 daniel 8766: *
1.140 daniel 8767: * Returns the index to the current parsing point if the full sequence
8768: * is available, -1 otherwise.
1.128 daniel 8769: */
8770: int
1.140 daniel 8771: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8772: xmlChar next, xmlChar third) {
8773: int base, len;
8774: xmlParserInputPtr in;
8775: const xmlChar *buf;
8776:
8777: in = ctxt->input;
8778: if (in == NULL) return(-1);
8779: base = in->cur - in->base;
8780: if (base < 0) return(-1);
8781: if (ctxt->checkIndex > base)
8782: base = ctxt->checkIndex;
8783: if (in->buf == NULL) {
8784: buf = in->base;
8785: len = in->length;
8786: } else {
8787: buf = in->buf->buffer->content;
8788: len = in->buf->buffer->use;
8789: }
8790: /* take into account the sequence length */
8791: if (third) len -= 2;
8792: else if (next) len --;
8793: for (;base < len;base++) {
8794: if (buf[base] == first) {
8795: if (third != 0) {
8796: if ((buf[base + 1] != next) ||
8797: (buf[base + 2] != third)) continue;
8798: } else if (next != 0) {
8799: if (buf[base + 1] != next) continue;
8800: }
8801: ctxt->checkIndex = 0;
8802: #ifdef DEBUG_PUSH
8803: if (next == 0)
8804: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8805: first, base);
8806: else if (third == 0)
8807: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8808: first, next, base);
8809: else
8810: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8811: first, next, third, base);
8812: #endif
8813: return(base - (in->cur - in->base));
8814: }
8815: }
8816: ctxt->checkIndex = base;
8817: #ifdef DEBUG_PUSH
8818: if (next == 0)
8819: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8820: else if (third == 0)
8821: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8822: else
8823: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8824: #endif
8825: return(-1);
1.128 daniel 8826: }
8827:
8828: /**
1.143 daniel 8829: * xmlParseTryOrFinish:
1.128 daniel 8830: * @ctxt: an XML parser context
1.143 daniel 8831: * @terminate: last chunk indicator
1.128 daniel 8832: *
8833: * Try to progress on parsing
8834: *
8835: * Returns zero if no parsing was possible
8836: */
8837: int
1.143 daniel 8838: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8839: int ret = 0;
1.140 daniel 8840: int avail;
8841: xmlChar cur, next;
8842:
8843: #ifdef DEBUG_PUSH
8844: switch (ctxt->instate) {
8845: case XML_PARSER_EOF:
8846: fprintf(stderr, "PP: try EOF\n"); break;
8847: case XML_PARSER_START:
8848: fprintf(stderr, "PP: try START\n"); break;
8849: case XML_PARSER_MISC:
8850: fprintf(stderr, "PP: try MISC\n");break;
8851: case XML_PARSER_COMMENT:
8852: fprintf(stderr, "PP: try COMMENT\n");break;
8853: case XML_PARSER_PROLOG:
8854: fprintf(stderr, "PP: try PROLOG\n");break;
8855: case XML_PARSER_START_TAG:
8856: fprintf(stderr, "PP: try START_TAG\n");break;
8857: case XML_PARSER_CONTENT:
8858: fprintf(stderr, "PP: try CONTENT\n");break;
8859: case XML_PARSER_CDATA_SECTION:
8860: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8861: case XML_PARSER_END_TAG:
8862: fprintf(stderr, "PP: try END_TAG\n");break;
8863: case XML_PARSER_ENTITY_DECL:
8864: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8865: case XML_PARSER_ENTITY_VALUE:
8866: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8867: case XML_PARSER_ATTRIBUTE_VALUE:
8868: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8869: case XML_PARSER_DTD:
8870: fprintf(stderr, "PP: try DTD\n");break;
8871: case XML_PARSER_EPILOG:
8872: fprintf(stderr, "PP: try EPILOG\n");break;
8873: case XML_PARSER_PI:
8874: fprintf(stderr, "PP: try PI\n");break;
8875: }
8876: #endif
1.128 daniel 8877:
8878: while (1) {
1.140 daniel 8879: /*
8880: * Pop-up of finished entities.
8881: */
1.152 daniel 8882: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8883: xmlPopInput(ctxt);
8884:
1.184 daniel 8885: if (ctxt->input ==NULL) break;
8886: if (ctxt->input->buf == NULL)
8887: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8888: else
1.184 daniel 8889: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8890: if (avail < 1)
8891: goto done;
1.128 daniel 8892: switch (ctxt->instate) {
8893: case XML_PARSER_EOF:
1.140 daniel 8894: /*
8895: * Document parsing is done !
8896: */
8897: goto done;
8898: case XML_PARSER_START:
8899: /*
8900: * Very first chars read from the document flow.
8901: */
1.184 daniel 8902: cur = ctxt->input->cur[0];
1.140 daniel 8903: if (IS_BLANK(cur)) {
8904: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8905: ctxt->sax->setDocumentLocator(ctxt->userData,
8906: &xmlDefaultSAXLocator);
8907: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8908: ctxt->sax->error(ctxt->userData,
8909: "Extra spaces at the beginning of the document are not allowed\n");
8910: ctxt->errNo = XML_ERR_DOCUMENT_START;
8911: ctxt->wellFormed = 0;
1.180 daniel 8912: ctxt->disableSAX = 1;
1.140 daniel 8913: SKIP_BLANKS;
8914: ret++;
1.184 daniel 8915: if (ctxt->input->buf == NULL)
8916: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8917: else
1.184 daniel 8918: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8919: }
8920: if (avail < 2)
8921: goto done;
8922:
1.184 daniel 8923: cur = ctxt->input->cur[0];
8924: next = ctxt->input->cur[1];
1.140 daniel 8925: if (cur == 0) {
8926: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8927: ctxt->sax->setDocumentLocator(ctxt->userData,
8928: &xmlDefaultSAXLocator);
8929: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8930: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8931: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8932: ctxt->wellFormed = 0;
1.180 daniel 8933: ctxt->disableSAX = 1;
1.140 daniel 8934: ctxt->instate = XML_PARSER_EOF;
8935: #ifdef DEBUG_PUSH
8936: fprintf(stderr, "PP: entering EOF\n");
8937: #endif
8938: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8939: ctxt->sax->endDocument(ctxt->userData);
8940: goto done;
8941: }
8942: if ((cur == '<') && (next == '?')) {
8943: /* PI or XML decl */
8944: if (avail < 5) return(ret);
1.143 daniel 8945: if ((!terminate) &&
8946: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8947: return(ret);
8948: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8949: ctxt->sax->setDocumentLocator(ctxt->userData,
8950: &xmlDefaultSAXLocator);
1.184 daniel 8951: if ((ctxt->input->cur[2] == 'x') &&
8952: (ctxt->input->cur[3] == 'm') &&
8953: (ctxt->input->cur[4] == 'l') &&
8954: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8955: ret += 5;
8956: #ifdef DEBUG_PUSH
8957: fprintf(stderr, "PP: Parsing XML Decl\n");
8958: #endif
8959: xmlParseXMLDecl(ctxt);
1.193 daniel 8960: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8961: /*
8962: * The XML REC instructs us to stop parsing right
8963: * here
8964: */
8965: ctxt->instate = XML_PARSER_EOF;
8966: return(0);
8967: }
1.167 daniel 8968: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 8969: if ((ctxt->encoding == NULL) &&
8970: (ctxt->input->encoding != NULL))
8971: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 8972: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8973: (!ctxt->disableSAX))
1.140 daniel 8974: ctxt->sax->startDocument(ctxt->userData);
8975: ctxt->instate = XML_PARSER_MISC;
8976: #ifdef DEBUG_PUSH
8977: fprintf(stderr, "PP: entering MISC\n");
8978: #endif
8979: } else {
8980: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8981: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8982: (!ctxt->disableSAX))
1.140 daniel 8983: ctxt->sax->startDocument(ctxt->userData);
8984: ctxt->instate = XML_PARSER_MISC;
8985: #ifdef DEBUG_PUSH
8986: fprintf(stderr, "PP: entering MISC\n");
8987: #endif
8988: }
8989: } else {
8990: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8991: ctxt->sax->setDocumentLocator(ctxt->userData,
8992: &xmlDefaultSAXLocator);
8993: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 8994: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8995: (!ctxt->disableSAX))
1.140 daniel 8996: ctxt->sax->startDocument(ctxt->userData);
8997: ctxt->instate = XML_PARSER_MISC;
8998: #ifdef DEBUG_PUSH
8999: fprintf(stderr, "PP: entering MISC\n");
9000: #endif
9001: }
9002: break;
9003: case XML_PARSER_MISC:
9004: SKIP_BLANKS;
1.184 daniel 9005: if (ctxt->input->buf == NULL)
9006: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9007: else
1.184 daniel 9008: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9009: if (avail < 2)
9010: goto done;
1.184 daniel 9011: cur = ctxt->input->cur[0];
9012: next = ctxt->input->cur[1];
1.140 daniel 9013: if ((cur == '<') && (next == '?')) {
1.143 daniel 9014: if ((!terminate) &&
9015: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9016: goto done;
9017: #ifdef DEBUG_PUSH
9018: fprintf(stderr, "PP: Parsing PI\n");
9019: #endif
9020: xmlParsePI(ctxt);
9021: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9022: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9023: if ((!terminate) &&
9024: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9025: goto done;
9026: #ifdef DEBUG_PUSH
9027: fprintf(stderr, "PP: Parsing Comment\n");
9028: #endif
9029: xmlParseComment(ctxt);
9030: ctxt->instate = XML_PARSER_MISC;
9031: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9032: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9033: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9034: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9035: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 9036: if ((!terminate) &&
9037: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9038: goto done;
9039: #ifdef DEBUG_PUSH
9040: fprintf(stderr, "PP: Parsing internal subset\n");
9041: #endif
1.166 daniel 9042: ctxt->inSubset = 1;
1.140 daniel 9043: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9044: if (RAW == '[') {
1.140 daniel 9045: ctxt->instate = XML_PARSER_DTD;
9046: #ifdef DEBUG_PUSH
9047: fprintf(stderr, "PP: entering DTD\n");
9048: #endif
9049: } else {
1.166 daniel 9050: /*
9051: * Create and update the external subset.
9052: */
9053: ctxt->inSubset = 2;
1.171 daniel 9054: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9055: (ctxt->sax->externalSubset != NULL))
9056: ctxt->sax->externalSubset(ctxt->userData,
9057: ctxt->intSubName, ctxt->extSubSystem,
9058: ctxt->extSubURI);
9059: ctxt->inSubset = 0;
1.140 daniel 9060: ctxt->instate = XML_PARSER_PROLOG;
9061: #ifdef DEBUG_PUSH
9062: fprintf(stderr, "PP: entering PROLOG\n");
9063: #endif
9064: }
9065: } else if ((cur == '<') && (next == '!') &&
9066: (avail < 9)) {
9067: goto done;
9068: } else {
9069: ctxt->instate = XML_PARSER_START_TAG;
9070: #ifdef DEBUG_PUSH
9071: fprintf(stderr, "PP: entering START_TAG\n");
9072: #endif
9073: }
9074: break;
1.128 daniel 9075: case XML_PARSER_PROLOG:
1.140 daniel 9076: SKIP_BLANKS;
1.184 daniel 9077: if (ctxt->input->buf == NULL)
9078: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9079: else
1.184 daniel 9080: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9081: if (avail < 2)
9082: goto done;
1.184 daniel 9083: cur = ctxt->input->cur[0];
9084: next = ctxt->input->cur[1];
1.140 daniel 9085: if ((cur == '<') && (next == '?')) {
1.143 daniel 9086: if ((!terminate) &&
9087: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9088: goto done;
9089: #ifdef DEBUG_PUSH
9090: fprintf(stderr, "PP: Parsing PI\n");
9091: #endif
9092: xmlParsePI(ctxt);
9093: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9094: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9095: if ((!terminate) &&
9096: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9097: goto done;
9098: #ifdef DEBUG_PUSH
9099: fprintf(stderr, "PP: Parsing Comment\n");
9100: #endif
9101: xmlParseComment(ctxt);
9102: ctxt->instate = XML_PARSER_PROLOG;
9103: } else if ((cur == '<') && (next == '!') &&
9104: (avail < 4)) {
9105: goto done;
9106: } else {
9107: ctxt->instate = XML_PARSER_START_TAG;
9108: #ifdef DEBUG_PUSH
9109: fprintf(stderr, "PP: entering START_TAG\n");
9110: #endif
9111: }
9112: break;
9113: case XML_PARSER_EPILOG:
9114: SKIP_BLANKS;
1.184 daniel 9115: if (ctxt->input->buf == NULL)
9116: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9117: else
1.184 daniel 9118: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9119: if (avail < 2)
9120: goto done;
1.184 daniel 9121: cur = ctxt->input->cur[0];
9122: next = ctxt->input->cur[1];
1.140 daniel 9123: if ((cur == '<') && (next == '?')) {
1.143 daniel 9124: if ((!terminate) &&
9125: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9126: goto done;
9127: #ifdef DEBUG_PUSH
9128: fprintf(stderr, "PP: Parsing PI\n");
9129: #endif
9130: xmlParsePI(ctxt);
9131: ctxt->instate = XML_PARSER_EPILOG;
9132: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9133: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9134: if ((!terminate) &&
9135: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9136: goto done;
9137: #ifdef DEBUG_PUSH
9138: fprintf(stderr, "PP: Parsing Comment\n");
9139: #endif
9140: xmlParseComment(ctxt);
9141: ctxt->instate = XML_PARSER_EPILOG;
9142: } else if ((cur == '<') && (next == '!') &&
9143: (avail < 4)) {
9144: goto done;
9145: } else {
9146: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9147: ctxt->sax->error(ctxt->userData,
9148: "Extra content at the end of the document\n");
9149: ctxt->wellFormed = 0;
1.180 daniel 9150: ctxt->disableSAX = 1;
1.140 daniel 9151: ctxt->errNo = XML_ERR_DOCUMENT_END;
9152: ctxt->instate = XML_PARSER_EOF;
9153: #ifdef DEBUG_PUSH
9154: fprintf(stderr, "PP: entering EOF\n");
9155: #endif
1.171 daniel 9156: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9157: (!ctxt->disableSAX))
1.140 daniel 9158: ctxt->sax->endDocument(ctxt->userData);
9159: goto done;
9160: }
9161: break;
9162: case XML_PARSER_START_TAG: {
9163: xmlChar *name, *oldname;
9164:
1.184 daniel 9165: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9166: goto done;
1.184 daniel 9167: cur = ctxt->input->cur[0];
1.140 daniel 9168: if (cur != '<') {
9169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9170: ctxt->sax->error(ctxt->userData,
9171: "Start tag expect, '<' not found\n");
9172: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9173: ctxt->wellFormed = 0;
1.180 daniel 9174: ctxt->disableSAX = 1;
1.140 daniel 9175: ctxt->instate = XML_PARSER_EOF;
9176: #ifdef DEBUG_PUSH
9177: fprintf(stderr, "PP: entering EOF\n");
9178: #endif
1.171 daniel 9179: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9180: (!ctxt->disableSAX))
1.140 daniel 9181: ctxt->sax->endDocument(ctxt->userData);
9182: goto done;
9183: }
1.143 daniel 9184: if ((!terminate) &&
9185: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9186: goto done;
1.176 daniel 9187: if (ctxt->spaceNr == 0)
9188: spacePush(ctxt, -1);
9189: else
9190: spacePush(ctxt, *ctxt->space);
1.140 daniel 9191: name = xmlParseStartTag(ctxt);
9192: if (name == NULL) {
1.176 daniel 9193: spacePop(ctxt);
1.140 daniel 9194: ctxt->instate = XML_PARSER_EOF;
9195: #ifdef DEBUG_PUSH
9196: fprintf(stderr, "PP: entering EOF\n");
9197: #endif
1.171 daniel 9198: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9199: (!ctxt->disableSAX))
1.140 daniel 9200: ctxt->sax->endDocument(ctxt->userData);
9201: goto done;
9202: }
9203: namePush(ctxt, xmlStrdup(name));
9204:
9205: /*
9206: * [ VC: Root Element Type ]
9207: * The Name in the document type declaration must match
9208: * the element type of the root element.
9209: */
9210: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9211: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9212: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9213:
9214: /*
9215: * Check for an Empty Element.
9216: */
1.152 daniel 9217: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9218: SKIP(2);
1.171 daniel 9219: if ((ctxt->sax != NULL) &&
9220: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9221: ctxt->sax->endElement(ctxt->userData, name);
9222: xmlFree(name);
9223: oldname = namePop(ctxt);
1.176 daniel 9224: spacePop(ctxt);
1.140 daniel 9225: if (oldname != NULL) {
9226: #ifdef DEBUG_STACK
9227: fprintf(stderr,"Close: popped %s\n", oldname);
9228: #endif
9229: xmlFree(oldname);
9230: }
9231: if (ctxt->name == NULL) {
9232: ctxt->instate = XML_PARSER_EPILOG;
9233: #ifdef DEBUG_PUSH
9234: fprintf(stderr, "PP: entering EPILOG\n");
9235: #endif
9236: } else {
9237: ctxt->instate = XML_PARSER_CONTENT;
9238: #ifdef DEBUG_PUSH
9239: fprintf(stderr, "PP: entering CONTENT\n");
9240: #endif
9241: }
9242: break;
9243: }
1.152 daniel 9244: if (RAW == '>') {
1.140 daniel 9245: NEXT;
9246: } else {
9247: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9248: ctxt->sax->error(ctxt->userData,
9249: "Couldn't find end of Start Tag %s\n",
9250: name);
9251: ctxt->wellFormed = 0;
1.180 daniel 9252: ctxt->disableSAX = 1;
1.140 daniel 9253: ctxt->errNo = XML_ERR_GT_REQUIRED;
9254:
9255: /*
9256: * end of parsing of this node.
9257: */
9258: nodePop(ctxt);
9259: oldname = namePop(ctxt);
1.176 daniel 9260: spacePop(ctxt);
1.140 daniel 9261: if (oldname != NULL) {
9262: #ifdef DEBUG_STACK
9263: fprintf(stderr,"Close: popped %s\n", oldname);
9264: #endif
9265: xmlFree(oldname);
9266: }
9267: }
9268: xmlFree(name);
9269: ctxt->instate = XML_PARSER_CONTENT;
9270: #ifdef DEBUG_PUSH
9271: fprintf(stderr, "PP: entering CONTENT\n");
9272: #endif
9273: break;
9274: }
1.128 daniel 9275: case XML_PARSER_CONTENT:
1.140 daniel 9276: /*
9277: * Handle preparsed entities and charRef
9278: */
9279: if (ctxt->token != 0) {
9280: xmlChar cur[2] = { 0 , 0 } ;
9281:
9282: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9283: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9284: (ctxt->sax->characters != NULL))
1.140 daniel 9285: ctxt->sax->characters(ctxt->userData, cur, 1);
9286: ctxt->token = 0;
9287: }
1.184 daniel 9288: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9289: goto done;
1.184 daniel 9290: cur = ctxt->input->cur[0];
9291: next = ctxt->input->cur[1];
1.140 daniel 9292: if ((cur == '<') && (next == '?')) {
1.143 daniel 9293: if ((!terminate) &&
9294: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9295: goto done;
9296: #ifdef DEBUG_PUSH
9297: fprintf(stderr, "PP: Parsing PI\n");
9298: #endif
9299: xmlParsePI(ctxt);
9300: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9301: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9302: if ((!terminate) &&
9303: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9304: goto done;
9305: #ifdef DEBUG_PUSH
9306: fprintf(stderr, "PP: Parsing Comment\n");
9307: #endif
9308: xmlParseComment(ctxt);
9309: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9310: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9311: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9312: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9313: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9314: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9315: SKIP(9);
9316: ctxt->instate = XML_PARSER_CDATA_SECTION;
9317: #ifdef DEBUG_PUSH
9318: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9319: #endif
9320: break;
9321: } else if ((cur == '<') && (next == '!') &&
9322: (avail < 9)) {
9323: goto done;
9324: } else if ((cur == '<') && (next == '/')) {
9325: ctxt->instate = XML_PARSER_END_TAG;
9326: #ifdef DEBUG_PUSH
9327: fprintf(stderr, "PP: entering END_TAG\n");
9328: #endif
9329: break;
9330: } else if (cur == '<') {
9331: ctxt->instate = XML_PARSER_START_TAG;
9332: #ifdef DEBUG_PUSH
9333: fprintf(stderr, "PP: entering START_TAG\n");
9334: #endif
9335: break;
9336: } else if (cur == '&') {
1.143 daniel 9337: if ((!terminate) &&
9338: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9339: goto done;
9340: #ifdef DEBUG_PUSH
9341: fprintf(stderr, "PP: Parsing Reference\n");
9342: #endif
9343: /* TODO: check generation of subtrees if noent !!! */
9344: xmlParseReference(ctxt);
9345: } else {
1.156 daniel 9346: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9347: /*
1.181 daniel 9348: * Goal of the following test is:
1.140 daniel 9349: * - minimize calls to the SAX 'character' callback
9350: * when they are mergeable
9351: * - handle an problem for isBlank when we only parse
9352: * a sequence of blank chars and the next one is
9353: * not available to check against '<' presence.
9354: * - tries to homogenize the differences in SAX
9355: * callbacks beween the push and pull versions
9356: * of the parser.
9357: */
9358: if ((ctxt->inputNr == 1) &&
9359: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9360: if ((!terminate) &&
9361: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9362: goto done;
9363: }
9364: ctxt->checkIndex = 0;
9365: #ifdef DEBUG_PUSH
9366: fprintf(stderr, "PP: Parsing char data\n");
9367: #endif
9368: xmlParseCharData(ctxt, 0);
9369: }
9370: /*
9371: * Pop-up of finished entities.
9372: */
1.152 daniel 9373: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9374: xmlPopInput(ctxt);
9375: break;
9376: case XML_PARSER_CDATA_SECTION: {
9377: /*
9378: * The Push mode need to have the SAX callback for
9379: * cdataBlock merge back contiguous callbacks.
9380: */
9381: int base;
9382:
9383: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9384: if (base < 0) {
9385: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9386: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9387: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9388: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9389: XML_PARSER_BIG_BUFFER_SIZE);
9390: }
9391: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9392: ctxt->checkIndex = 0;
9393: }
9394: goto done;
9395: } else {
1.171 daniel 9396: if ((ctxt->sax != NULL) && (base > 0) &&
9397: (!ctxt->disableSAX)) {
1.140 daniel 9398: if (ctxt->sax->cdataBlock != NULL)
9399: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9400: ctxt->input->cur, base);
1.140 daniel 9401: }
9402: SKIP(base + 3);
9403: ctxt->checkIndex = 0;
9404: ctxt->instate = XML_PARSER_CONTENT;
9405: #ifdef DEBUG_PUSH
9406: fprintf(stderr, "PP: entering CONTENT\n");
9407: #endif
9408: }
9409: break;
9410: }
1.141 daniel 9411: case XML_PARSER_END_TAG:
1.140 daniel 9412: if (avail < 2)
9413: goto done;
1.143 daniel 9414: if ((!terminate) &&
9415: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9416: goto done;
9417: xmlParseEndTag(ctxt);
9418: if (ctxt->name == NULL) {
9419: ctxt->instate = XML_PARSER_EPILOG;
9420: #ifdef DEBUG_PUSH
9421: fprintf(stderr, "PP: entering EPILOG\n");
9422: #endif
9423: } else {
9424: ctxt->instate = XML_PARSER_CONTENT;
9425: #ifdef DEBUG_PUSH
9426: fprintf(stderr, "PP: entering CONTENT\n");
9427: #endif
9428: }
9429: break;
9430: case XML_PARSER_DTD: {
9431: /*
9432: * Sorry but progressive parsing of the internal subset
9433: * is not expected to be supported. We first check that
9434: * the full content of the internal subset is available and
9435: * the parsing is launched only at that point.
9436: * Internal subset ends up with "']' S? '>'" in an unescaped
9437: * section and not in a ']]>' sequence which are conditional
9438: * sections (whoever argued to keep that crap in XML deserve
9439: * a place in hell !).
9440: */
9441: int base, i;
9442: xmlChar *buf;
9443: xmlChar quote = 0;
9444:
1.184 daniel 9445: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9446: if (base < 0) return(0);
9447: if (ctxt->checkIndex > base)
9448: base = ctxt->checkIndex;
1.184 daniel 9449: buf = ctxt->input->buf->buffer->content;
9450: for (;base < ctxt->input->buf->buffer->use;base++) {
1.140 daniel 9451: if (quote != 0) {
9452: if (buf[base] == quote)
9453: quote = 0;
9454: continue;
9455: }
9456: if (buf[base] == '"') {
9457: quote = '"';
9458: continue;
9459: }
9460: if (buf[base] == '\'') {
9461: quote = '\'';
9462: continue;
9463: }
9464: if (buf[base] == ']') {
1.184 daniel 9465: if (base +1 >= ctxt->input->buf->buffer->use)
1.140 daniel 9466: break;
9467: if (buf[base + 1] == ']') {
9468: /* conditional crap, skip both ']' ! */
9469: base++;
9470: continue;
9471: }
1.184 daniel 9472: for (i = 0;base + i < ctxt->input->buf->buffer->use;i++) {
1.140 daniel 9473: if (buf[base + i] == '>')
9474: goto found_end_int_subset;
9475: }
9476: break;
9477: }
9478: }
9479: /*
9480: * We didn't found the end of the Internal subset
9481: */
9482: if (quote == 0)
9483: ctxt->checkIndex = base;
9484: #ifdef DEBUG_PUSH
9485: if (next == 0)
9486: fprintf(stderr, "PP: lookup of int subset end filed\n");
9487: #endif
9488: goto done;
9489:
9490: found_end_int_subset:
9491: xmlParseInternalSubset(ctxt);
1.166 daniel 9492: ctxt->inSubset = 2;
1.171 daniel 9493: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9494: (ctxt->sax->externalSubset != NULL))
9495: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9496: ctxt->extSubSystem, ctxt->extSubURI);
9497: ctxt->inSubset = 0;
1.140 daniel 9498: ctxt->instate = XML_PARSER_PROLOG;
9499: ctxt->checkIndex = 0;
9500: #ifdef DEBUG_PUSH
9501: fprintf(stderr, "PP: entering PROLOG\n");
9502: #endif
9503: break;
9504: }
9505: case XML_PARSER_COMMENT:
9506: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9507: ctxt->instate = XML_PARSER_CONTENT;
9508: #ifdef DEBUG_PUSH
9509: fprintf(stderr, "PP: entering CONTENT\n");
9510: #endif
9511: break;
9512: case XML_PARSER_PI:
9513: fprintf(stderr, "PP: internal error, state == PI\n");
9514: ctxt->instate = XML_PARSER_CONTENT;
9515: #ifdef DEBUG_PUSH
9516: fprintf(stderr, "PP: entering CONTENT\n");
9517: #endif
9518: break;
1.128 daniel 9519: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9520: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9521: ctxt->instate = XML_PARSER_DTD;
9522: #ifdef DEBUG_PUSH
9523: fprintf(stderr, "PP: entering DTD\n");
9524: #endif
9525: break;
1.128 daniel 9526: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9527: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9528: ctxt->instate = XML_PARSER_CONTENT;
9529: #ifdef DEBUG_PUSH
9530: fprintf(stderr, "PP: entering DTD\n");
9531: #endif
9532: break;
1.128 daniel 9533: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9534: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9535: ctxt->instate = XML_PARSER_START_TAG;
9536: #ifdef DEBUG_PUSH
9537: fprintf(stderr, "PP: entering START_TAG\n");
9538: #endif
9539: break;
9540: case XML_PARSER_SYSTEM_LITERAL:
9541: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9542: ctxt->instate = XML_PARSER_START_TAG;
9543: #ifdef DEBUG_PUSH
9544: fprintf(stderr, "PP: entering START_TAG\n");
9545: #endif
9546: break;
1.128 daniel 9547: }
9548: }
1.140 daniel 9549: done:
9550: #ifdef DEBUG_PUSH
9551: fprintf(stderr, "PP: done %d\n", ret);
9552: #endif
1.128 daniel 9553: return(ret);
9554: }
9555:
9556: /**
1.143 daniel 9557: * xmlParseTry:
9558: * @ctxt: an XML parser context
9559: *
9560: * Try to progress on parsing
9561: *
9562: * Returns zero if no parsing was possible
9563: */
9564: int
9565: xmlParseTry(xmlParserCtxtPtr ctxt) {
9566: return(xmlParseTryOrFinish(ctxt, 0));
9567: }
9568:
9569: /**
1.128 daniel 9570: * xmlParseChunk:
9571: * @ctxt: an XML parser context
9572: * @chunk: an char array
9573: * @size: the size in byte of the chunk
9574: * @terminate: last chunk indicator
9575: *
9576: * Parse a Chunk of memory
9577: *
9578: * Returns zero if no error, the xmlParserErrors otherwise.
9579: */
1.140 daniel 9580: int
1.128 daniel 9581: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9582: int terminate) {
1.132 daniel 9583: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9584: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9585: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9586: int cur = ctxt->input->cur - ctxt->input->base;
9587:
1.132 daniel 9588: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9589: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9590: ctxt->input->cur = ctxt->input->base + cur;
9591: #ifdef DEBUG_PUSH
9592: fprintf(stderr, "PP: pushed %d\n", size);
9593: #endif
9594:
1.150 daniel 9595: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9596: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9597: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9598: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9599: if (terminate) {
1.151 daniel 9600: /*
9601: * Check for termination
9602: */
1.140 daniel 9603: if ((ctxt->instate != XML_PARSER_EOF) &&
9604: (ctxt->instate != XML_PARSER_EPILOG)) {
9605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9606: ctxt->sax->error(ctxt->userData,
9607: "Extra content at the end of the document\n");
9608: ctxt->wellFormed = 0;
1.180 daniel 9609: ctxt->disableSAX = 1;
1.140 daniel 9610: ctxt->errNo = XML_ERR_DOCUMENT_END;
9611: }
9612: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9613: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9614: (!ctxt->disableSAX))
1.140 daniel 9615: ctxt->sax->endDocument(ctxt->userData);
9616: }
9617: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9618: }
9619: return((xmlParserErrors) ctxt->errNo);
9620: }
9621:
9622: /************************************************************************
9623: * *
1.98 daniel 9624: * I/O front end functions to the parser *
9625: * *
9626: ************************************************************************/
9627:
1.50 daniel 9628: /**
1.181 daniel 9629: * xmlCreatePushParserCtxt:
1.140 daniel 9630: * @sax: a SAX handler
9631: * @user_data: The user data returned on SAX callbacks
9632: * @chunk: a pointer to an array of chars
9633: * @size: number of chars in the array
9634: * @filename: an optional file name or URI
9635: *
9636: * Create a parser context for using the XML parser in push mode
9637: * To allow content encoding detection, @size should be >= 4
9638: * The value of @filename is used for fetching external entities
9639: * and error/warning reports.
9640: *
9641: * Returns the new parser context or NULL
9642: */
9643: xmlParserCtxtPtr
9644: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9645: const char *chunk, int size, const char *filename) {
9646: xmlParserCtxtPtr ctxt;
9647: xmlParserInputPtr inputStream;
9648: xmlParserInputBufferPtr buf;
9649: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9650:
9651: /*
1.156 daniel 9652: * plug some encoding conversion routines
1.140 daniel 9653: */
9654: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9655: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9656:
9657: buf = xmlAllocParserInputBuffer(enc);
9658: if (buf == NULL) return(NULL);
9659:
9660: ctxt = xmlNewParserCtxt();
9661: if (ctxt == NULL) {
9662: xmlFree(buf);
9663: return(NULL);
9664: }
9665: if (sax != NULL) {
9666: if (ctxt->sax != &xmlDefaultSAXHandler)
9667: xmlFree(ctxt->sax);
9668: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9669: if (ctxt->sax == NULL) {
9670: xmlFree(buf);
9671: xmlFree(ctxt);
9672: return(NULL);
9673: }
9674: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9675: if (user_data != NULL)
9676: ctxt->userData = user_data;
9677: }
9678: if (filename == NULL) {
9679: ctxt->directory = NULL;
9680: } else {
9681: ctxt->directory = xmlParserGetDirectory(filename);
9682: }
9683:
9684: inputStream = xmlNewInputStream(ctxt);
9685: if (inputStream == NULL) {
9686: xmlFreeParserCtxt(ctxt);
9687: return(NULL);
9688: }
9689:
9690: if (filename == NULL)
9691: inputStream->filename = NULL;
9692: else
9693: inputStream->filename = xmlMemStrdup(filename);
9694: inputStream->buf = buf;
9695: inputStream->base = inputStream->buf->buffer->content;
9696: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9697: if (enc != XML_CHAR_ENCODING_NONE) {
9698: xmlSwitchEncoding(ctxt, enc);
9699: }
1.140 daniel 9700:
9701: inputPush(ctxt, inputStream);
9702:
9703: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9704: (ctxt->input->buf != NULL)) {
9705: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9706: #ifdef DEBUG_PUSH
9707: fprintf(stderr, "PP: pushed %d\n", size);
9708: #endif
9709: }
1.190 daniel 9710:
9711: return(ctxt);
9712: }
9713:
9714: /**
9715: * xmlCreateIOParserCtxt:
9716: * @sax: a SAX handler
9717: * @user_data: The user data returned on SAX callbacks
9718: * @ioread: an I/O read function
9719: * @ioclose: an I/O close function
9720: * @ioctx: an I/O handler
9721: * @enc: the charset encoding if known
9722: *
9723: * Create a parser context for using the XML parser with an existing
9724: * I/O stream
9725: *
9726: * Returns the new parser context or NULL
9727: */
9728: xmlParserCtxtPtr
9729: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9730: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9731: void *ioctx, xmlCharEncoding enc) {
9732: xmlParserCtxtPtr ctxt;
9733: xmlParserInputPtr inputStream;
9734: xmlParserInputBufferPtr buf;
9735:
9736: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9737: if (buf == NULL) return(NULL);
9738:
9739: ctxt = xmlNewParserCtxt();
9740: if (ctxt == NULL) {
9741: xmlFree(buf);
9742: return(NULL);
9743: }
9744: if (sax != NULL) {
9745: if (ctxt->sax != &xmlDefaultSAXHandler)
9746: xmlFree(ctxt->sax);
9747: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9748: if (ctxt->sax == NULL) {
9749: xmlFree(buf);
9750: xmlFree(ctxt);
9751: return(NULL);
9752: }
9753: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9754: if (user_data != NULL)
9755: ctxt->userData = user_data;
9756: }
9757:
9758: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9759: if (inputStream == NULL) {
9760: xmlFreeParserCtxt(ctxt);
9761: return(NULL);
9762: }
9763: inputPush(ctxt, inputStream);
1.140 daniel 9764:
9765: return(ctxt);
9766: }
9767:
9768: /**
1.181 daniel 9769: * xmlCreateDocParserCtxt:
1.123 daniel 9770: * @cur: a pointer to an array of xmlChar
1.50 daniel 9771: *
1.192 daniel 9772: * Creates a parser context for an XML in-memory document.
1.69 daniel 9773: *
9774: * Returns the new parser context or NULL
1.16 daniel 9775: */
1.69 daniel 9776: xmlParserCtxtPtr
1.123 daniel 9777: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9778: xmlParserCtxtPtr ctxt;
1.40 daniel 9779: xmlParserInputPtr input;
1.16 daniel 9780:
1.97 daniel 9781: ctxt = xmlNewParserCtxt();
1.16 daniel 9782: if (ctxt == NULL) {
9783: return(NULL);
9784: }
1.96 daniel 9785: input = xmlNewInputStream(ctxt);
1.40 daniel 9786: if (input == NULL) {
1.97 daniel 9787: xmlFreeParserCtxt(ctxt);
1.40 daniel 9788: return(NULL);
9789: }
9790:
9791: input->base = cur;
9792: input->cur = cur;
9793:
9794: inputPush(ctxt, input);
1.69 daniel 9795: return(ctxt);
9796: }
9797:
9798: /**
1.181 daniel 9799: * xmlSAXParseDoc:
1.69 daniel 9800: * @sax: the SAX handler block
1.123 daniel 9801: * @cur: a pointer to an array of xmlChar
1.69 daniel 9802: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9803: * documents
9804: *
9805: * parse an XML in-memory document and build a tree.
9806: * It use the given SAX function block to handle the parsing callback.
9807: * If sax is NULL, fallback to the default DOM tree building routines.
9808: *
9809: * Returns the resulting document tree
9810: */
9811:
9812: xmlDocPtr
1.123 daniel 9813: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9814: xmlDocPtr ret;
9815: xmlParserCtxtPtr ctxt;
9816:
9817: if (cur == NULL) return(NULL);
1.16 daniel 9818:
9819:
1.69 daniel 9820: ctxt = xmlCreateDocParserCtxt(cur);
9821: if (ctxt == NULL) return(NULL);
1.74 daniel 9822: if (sax != NULL) {
9823: ctxt->sax = sax;
9824: ctxt->userData = NULL;
9825: }
1.69 daniel 9826:
1.16 daniel 9827: xmlParseDocument(ctxt);
1.72 daniel 9828: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9829: else {
9830: ret = NULL;
1.72 daniel 9831: xmlFreeDoc(ctxt->myDoc);
9832: ctxt->myDoc = NULL;
1.59 daniel 9833: }
1.86 daniel 9834: if (sax != NULL)
9835: ctxt->sax = NULL;
1.69 daniel 9836: xmlFreeParserCtxt(ctxt);
1.16 daniel 9837:
1.1 veillard 9838: return(ret);
9839: }
9840:
1.50 daniel 9841: /**
1.181 daniel 9842: * xmlParseDoc:
1.123 daniel 9843: * @cur: a pointer to an array of xmlChar
1.55 daniel 9844: *
9845: * parse an XML in-memory document and build a tree.
9846: *
1.68 daniel 9847: * Returns the resulting document tree
1.55 daniel 9848: */
9849:
1.69 daniel 9850: xmlDocPtr
1.123 daniel 9851: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9852: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9853: }
9854:
9855: /**
1.181 daniel 9856: * xmlSAXParseDTD:
1.76 daniel 9857: * @sax: the SAX handler block
9858: * @ExternalID: a NAME* containing the External ID of the DTD
9859: * @SystemID: a NAME* containing the URL to the DTD
9860: *
9861: * Load and parse an external subset.
9862: *
9863: * Returns the resulting xmlDtdPtr or NULL in case of error.
9864: */
9865:
9866: xmlDtdPtr
1.123 daniel 9867: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9868: const xmlChar *SystemID) {
1.76 daniel 9869: xmlDtdPtr ret = NULL;
9870: xmlParserCtxtPtr ctxt;
1.83 daniel 9871: xmlParserInputPtr input = NULL;
1.76 daniel 9872: xmlCharEncoding enc;
9873:
9874: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9875:
1.97 daniel 9876: ctxt = xmlNewParserCtxt();
1.76 daniel 9877: if (ctxt == NULL) {
9878: return(NULL);
9879: }
9880:
9881: /*
9882: * Set-up the SAX context
9883: */
9884: if (ctxt == NULL) return(NULL);
9885: if (sax != NULL) {
1.93 veillard 9886: if (ctxt->sax != NULL)
1.119 daniel 9887: xmlFree(ctxt->sax);
1.76 daniel 9888: ctxt->sax = sax;
9889: ctxt->userData = NULL;
9890: }
9891:
9892: /*
9893: * Ask the Entity resolver to load the damn thing
9894: */
9895:
9896: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9897: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9898: if (input == NULL) {
1.86 daniel 9899: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9900: xmlFreeParserCtxt(ctxt);
9901: return(NULL);
9902: }
9903:
9904: /*
1.156 daniel 9905: * plug some encoding conversion routines here.
1.76 daniel 9906: */
9907: xmlPushInput(ctxt, input);
1.156 daniel 9908: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9909: xmlSwitchEncoding(ctxt, enc);
9910:
1.95 veillard 9911: if (input->filename == NULL)
1.156 daniel 9912: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9913: input->line = 1;
9914: input->col = 1;
9915: input->base = ctxt->input->cur;
9916: input->cur = ctxt->input->cur;
9917: input->free = NULL;
9918:
9919: /*
9920: * let's parse that entity knowing it's an external subset.
9921: */
1.191 daniel 9922: ctxt->inSubset = 2;
9923: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9924: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9925: ExternalID, SystemID);
1.79 daniel 9926: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9927:
9928: if (ctxt->myDoc != NULL) {
9929: if (ctxt->wellFormed) {
1.191 daniel 9930: ret = ctxt->myDoc->extSubset;
9931: ctxt->myDoc->extSubset = NULL;
1.76 daniel 9932: } else {
9933: ret = NULL;
9934: }
9935: xmlFreeDoc(ctxt->myDoc);
9936: ctxt->myDoc = NULL;
9937: }
1.86 daniel 9938: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9939: xmlFreeParserCtxt(ctxt);
9940:
9941: return(ret);
9942: }
9943:
9944: /**
1.181 daniel 9945: * xmlParseDTD:
1.76 daniel 9946: * @ExternalID: a NAME* containing the External ID of the DTD
9947: * @SystemID: a NAME* containing the URL to the DTD
9948: *
9949: * Load and parse an external subset.
9950: *
9951: * Returns the resulting xmlDtdPtr or NULL in case of error.
9952: */
9953:
9954: xmlDtdPtr
1.123 daniel 9955: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 9956: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 9957: }
9958:
9959: /**
1.181 daniel 9960: * xmlSAXParseBalancedChunk:
1.144 daniel 9961: * @ctx: an XML parser context (possibly NULL)
9962: * @sax: the SAX handler bloc (possibly NULL)
9963: * @user_data: The user data returned on SAX callbacks (possibly NULL)
9964: * @input: a parser input stream
9965: * @enc: the encoding
9966: *
9967: * Parse a well-balanced chunk of an XML document
9968: * The user has to provide SAX callback block whose routines will be
9969: * called by the parser
9970: * The allowed sequence for the Well Balanced Chunk is the one defined by
9971: * the content production in the XML grammar:
9972: *
9973: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9974: *
1.176 daniel 9975: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 9976: * the error code otherwise
9977: */
9978:
9979: int
9980: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
9981: void *user_data, xmlParserInputPtr input,
9982: xmlCharEncoding enc) {
9983: xmlParserCtxtPtr ctxt;
9984: int ret;
9985:
9986: if (input == NULL) return(-1);
9987:
9988: if (ctx != NULL)
9989: ctxt = ctx;
9990: else {
9991: ctxt = xmlNewParserCtxt();
9992: if (ctxt == NULL)
9993: return(-1);
9994: if (sax == NULL)
9995: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9996: }
9997:
9998: /*
9999: * Set-up the SAX context
10000: */
10001: if (sax != NULL) {
10002: if (ctxt->sax != NULL)
10003: xmlFree(ctxt->sax);
10004: ctxt->sax = sax;
10005: ctxt->userData = user_data;
10006: }
10007:
10008: /*
10009: * plug some encoding conversion routines here.
10010: */
10011: xmlPushInput(ctxt, input);
10012: if (enc != XML_CHAR_ENCODING_NONE)
10013: xmlSwitchEncoding(ctxt, enc);
10014:
10015: /*
10016: * let's parse that entity knowing it's an external subset.
10017: */
10018: xmlParseContent(ctxt);
10019: ret = ctxt->errNo;
10020:
10021: if (ctx == NULL) {
10022: if (sax != NULL)
10023: ctxt->sax = NULL;
10024: else
10025: xmlFreeDoc(ctxt->myDoc);
10026: xmlFreeParserCtxt(ctxt);
10027: }
10028: return(ret);
10029: }
10030:
10031: /**
1.181 daniel 10032: * xmlParseExternalEntity:
10033: * @doc: the document the chunk pertains to
10034: * @sax: the SAX handler bloc (possibly NULL)
10035: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10036: * @depth: Used for loop detection, use 0
1.181 daniel 10037: * @URL: the URL for the entity to load
10038: * @ID: the System ID for the entity to load
10039: * @list: the return value for the set of parsed nodes
10040: *
10041: * Parse an external general entity
10042: * An external general parsed entity is well-formed if it matches the
10043: * production labeled extParsedEnt.
10044: *
10045: * [78] extParsedEnt ::= TextDecl? content
10046: *
10047: * Returns 0 if the entity is well formed, -1 in case of args problem and
10048: * the parser error code otherwise
10049: */
10050:
10051: int
10052: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10053: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10054: xmlParserCtxtPtr ctxt;
10055: xmlDocPtr newDoc;
10056: xmlSAXHandlerPtr oldsax = NULL;
10057: int ret = 0;
10058:
1.185 daniel 10059: if (depth > 40) {
10060: return(XML_ERR_ENTITY_LOOP);
10061: }
10062:
10063:
1.181 daniel 10064:
10065: if (list != NULL)
10066: *list = NULL;
10067: if ((URL == NULL) && (ID == NULL))
10068: return(-1);
10069:
10070:
10071: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10072: if (ctxt == NULL) return(-1);
10073: ctxt->userData = ctxt;
10074: if (sax != NULL) {
10075: oldsax = ctxt->sax;
10076: ctxt->sax = sax;
10077: if (user_data != NULL)
10078: ctxt->userData = user_data;
10079: }
10080: newDoc = xmlNewDoc(BAD_CAST "1.0");
10081: if (newDoc == NULL) {
10082: xmlFreeParserCtxt(ctxt);
10083: return(-1);
10084: }
10085: if (doc != NULL) {
10086: newDoc->intSubset = doc->intSubset;
10087: newDoc->extSubset = doc->extSubset;
10088: }
10089: if (doc->URL != NULL) {
10090: newDoc->URL = xmlStrdup(doc->URL);
10091: }
10092: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10093: if (newDoc->children == NULL) {
10094: if (sax != NULL)
10095: ctxt->sax = oldsax;
10096: xmlFreeParserCtxt(ctxt);
10097: newDoc->intSubset = NULL;
10098: newDoc->extSubset = NULL;
10099: xmlFreeDoc(newDoc);
10100: return(-1);
10101: }
10102: nodePush(ctxt, newDoc->children);
10103: if (doc == NULL) {
10104: ctxt->myDoc = newDoc;
10105: } else {
10106: ctxt->myDoc = doc;
10107: newDoc->children->doc = doc;
10108: }
10109:
10110: /*
10111: * Parse a possible text declaration first
10112: */
10113: GROW;
10114: if ((RAW == '<') && (NXT(1) == '?') &&
10115: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10116: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10117: xmlParseTextDecl(ctxt);
10118: }
10119:
10120: /*
10121: * Doing validity checking on chunk doesn't make sense
10122: */
10123: ctxt->instate = XML_PARSER_CONTENT;
10124: ctxt->validate = 0;
1.185 daniel 10125: ctxt->depth = depth;
1.181 daniel 10126:
10127: xmlParseContent(ctxt);
10128:
10129: if ((RAW == '<') && (NXT(1) == '/')) {
10130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10131: ctxt->sax->error(ctxt->userData,
10132: "chunk is not well balanced\n");
10133: ctxt->wellFormed = 0;
10134: ctxt->disableSAX = 1;
10135: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10136: } else if (RAW != 0) {
10137: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10138: ctxt->sax->error(ctxt->userData,
10139: "extra content at the end of well balanced chunk\n");
10140: ctxt->wellFormed = 0;
10141: ctxt->disableSAX = 1;
10142: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10143: }
10144: if (ctxt->node != newDoc->children) {
10145: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10146: ctxt->sax->error(ctxt->userData,
10147: "chunk is not well balanced\n");
10148: ctxt->wellFormed = 0;
10149: ctxt->disableSAX = 1;
10150: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10151: }
10152:
10153: if (!ctxt->wellFormed) {
10154: if (ctxt->errNo == 0)
10155: ret = 1;
10156: else
10157: ret = ctxt->errNo;
10158: } else {
10159: if (list != NULL) {
10160: xmlNodePtr cur;
10161:
10162: /*
10163: * Return the newly created nodeset after unlinking it from
10164: * they pseudo parent.
10165: */
10166: cur = newDoc->children->children;
10167: *list = cur;
10168: while (cur != NULL) {
10169: cur->parent = NULL;
10170: cur = cur->next;
10171: }
10172: newDoc->children->children = NULL;
10173: }
10174: ret = 0;
10175: }
10176: if (sax != NULL)
10177: ctxt->sax = oldsax;
10178: xmlFreeParserCtxt(ctxt);
10179: newDoc->intSubset = NULL;
10180: newDoc->extSubset = NULL;
10181: xmlFreeDoc(newDoc);
10182:
10183: return(ret);
10184: }
10185:
10186: /**
10187: * xmlParseBalancedChunk:
1.176 daniel 10188: * @doc: the document the chunk pertains to
10189: * @sax: the SAX handler bloc (possibly NULL)
10190: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10191: * @depth: Used for loop detection, use 0
1.176 daniel 10192: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10193: * @list: the return value for the set of parsed nodes
10194: *
10195: * Parse a well-balanced chunk of an XML document
10196: * called by the parser
10197: * The allowed sequence for the Well Balanced Chunk is the one defined by
10198: * the content production in the XML grammar:
1.144 daniel 10199: *
1.175 daniel 10200: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10201: *
1.176 daniel 10202: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10203: * the parser error code otherwise
1.144 daniel 10204: */
10205:
1.175 daniel 10206: int
10207: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10208: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10209: xmlParserCtxtPtr ctxt;
1.175 daniel 10210: xmlDocPtr newDoc;
1.181 daniel 10211: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10212: int size;
1.176 daniel 10213: int ret = 0;
1.175 daniel 10214:
1.185 daniel 10215: if (depth > 40) {
10216: return(XML_ERR_ENTITY_LOOP);
10217: }
10218:
1.175 daniel 10219:
1.176 daniel 10220: if (list != NULL)
10221: *list = NULL;
10222: if (string == NULL)
10223: return(-1);
10224:
10225: size = xmlStrlen(string);
10226:
1.183 daniel 10227: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10228: if (ctxt == NULL) return(-1);
10229: ctxt->userData = ctxt;
1.175 daniel 10230: if (sax != NULL) {
1.176 daniel 10231: oldsax = ctxt->sax;
10232: ctxt->sax = sax;
10233: if (user_data != NULL)
10234: ctxt->userData = user_data;
1.175 daniel 10235: }
10236: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10237: if (newDoc == NULL) {
10238: xmlFreeParserCtxt(ctxt);
10239: return(-1);
10240: }
1.175 daniel 10241: if (doc != NULL) {
10242: newDoc->intSubset = doc->intSubset;
10243: newDoc->extSubset = doc->extSubset;
10244: }
1.176 daniel 10245: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10246: if (newDoc->children == NULL) {
10247: if (sax != NULL)
10248: ctxt->sax = oldsax;
10249: xmlFreeParserCtxt(ctxt);
10250: newDoc->intSubset = NULL;
10251: newDoc->extSubset = NULL;
10252: xmlFreeDoc(newDoc);
10253: return(-1);
10254: }
10255: nodePush(ctxt, newDoc->children);
10256: if (doc == NULL) {
10257: ctxt->myDoc = newDoc;
10258: } else {
10259: ctxt->myDoc = doc;
10260: newDoc->children->doc = doc;
10261: }
10262: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10263: ctxt->depth = depth;
1.176 daniel 10264:
10265: /*
10266: * Doing validity checking on chunk doesn't make sense
10267: */
10268: ctxt->validate = 0;
10269:
1.175 daniel 10270: xmlParseContent(ctxt);
1.176 daniel 10271:
10272: if ((RAW == '<') && (NXT(1) == '/')) {
10273: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10274: ctxt->sax->error(ctxt->userData,
10275: "chunk is not well balanced\n");
10276: ctxt->wellFormed = 0;
1.180 daniel 10277: ctxt->disableSAX = 1;
1.176 daniel 10278: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10279: } else if (RAW != 0) {
10280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10281: ctxt->sax->error(ctxt->userData,
10282: "extra content at the end of well balanced chunk\n");
10283: ctxt->wellFormed = 0;
1.180 daniel 10284: ctxt->disableSAX = 1;
1.176 daniel 10285: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10286: }
10287: if (ctxt->node != newDoc->children) {
10288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10289: ctxt->sax->error(ctxt->userData,
10290: "chunk is not well balanced\n");
10291: ctxt->wellFormed = 0;
1.180 daniel 10292: ctxt->disableSAX = 1;
1.176 daniel 10293: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10294: }
1.175 daniel 10295:
1.176 daniel 10296: if (!ctxt->wellFormed) {
10297: if (ctxt->errNo == 0)
10298: ret = 1;
10299: else
10300: ret = ctxt->errNo;
10301: } else {
10302: if (list != NULL) {
10303: xmlNodePtr cur;
1.175 daniel 10304:
1.176 daniel 10305: /*
10306: * Return the newly created nodeset after unlinking it from
10307: * they pseudo parent.
10308: */
10309: cur = newDoc->children->children;
10310: *list = cur;
10311: while (cur != NULL) {
10312: cur->parent = NULL;
10313: cur = cur->next;
10314: }
10315: newDoc->children->children = NULL;
10316: }
10317: ret = 0;
1.175 daniel 10318: }
1.176 daniel 10319: if (sax != NULL)
10320: ctxt->sax = oldsax;
1.175 daniel 10321: xmlFreeParserCtxt(ctxt);
10322: newDoc->intSubset = NULL;
10323: newDoc->extSubset = NULL;
1.176 daniel 10324: xmlFreeDoc(newDoc);
1.175 daniel 10325:
1.176 daniel 10326: return(ret);
1.144 daniel 10327: }
10328:
10329: /**
1.181 daniel 10330: * xmlParseBalancedChunkFile:
1.144 daniel 10331: * @doc: the document the chunk pertains to
10332: *
10333: * Parse a well-balanced chunk of an XML document contained in a file
10334: *
10335: * Returns the resulting list of nodes resulting from the parsing,
10336: * they are not added to @node
10337: */
10338:
10339: xmlNodePtr
10340: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10341: /* TODO !!! */
10342: return(NULL);
1.144 daniel 10343: }
10344:
10345: /**
1.181 daniel 10346: * xmlRecoverDoc:
1.123 daniel 10347: * @cur: a pointer to an array of xmlChar
1.59 daniel 10348: *
10349: * parse an XML in-memory document and build a tree.
10350: * In the case the document is not Well Formed, a tree is built anyway
10351: *
1.68 daniel 10352: * Returns the resulting document tree
1.59 daniel 10353: */
10354:
1.69 daniel 10355: xmlDocPtr
1.123 daniel 10356: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10357: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10358: }
10359:
10360: /**
1.181 daniel 10361: * xmlCreateEntityParserCtxt:
10362: * @URL: the entity URL
10363: * @ID: the entity PUBLIC ID
10364: * @base: a posible base for the target URI
10365: *
10366: * Create a parser context for an external entity
10367: * Automatic support for ZLIB/Compress compressed document is provided
10368: * by default if found at compile-time.
10369: *
10370: * Returns the new parser context or NULL
10371: */
10372: xmlParserCtxtPtr
10373: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10374: const xmlChar *base) {
10375: xmlParserCtxtPtr ctxt;
10376: xmlParserInputPtr inputStream;
10377: char *directory = NULL;
10378:
10379: ctxt = xmlNewParserCtxt();
10380: if (ctxt == NULL) {
10381: return(NULL);
10382: }
10383:
1.182 daniel 10384: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
1.181 daniel 10385: if (inputStream == NULL) {
10386: xmlFreeParserCtxt(ctxt);
10387: return(NULL);
10388: }
10389:
10390: inputPush(ctxt, inputStream);
10391:
10392: if ((ctxt->directory == NULL) && (directory == NULL))
1.182 daniel 10393: directory = xmlParserGetDirectory((char *)URL);
1.181 daniel 10394: if ((ctxt->directory == NULL) && (directory != NULL))
10395: ctxt->directory = directory;
10396:
10397: return(ctxt);
10398: }
10399:
10400: /**
10401: * xmlCreateFileParserCtxt:
1.50 daniel 10402: * @filename: the filename
10403: *
1.69 daniel 10404: * Create a parser context for a file content.
10405: * Automatic support for ZLIB/Compress compressed document is provided
10406: * by default if found at compile-time.
1.50 daniel 10407: *
1.69 daniel 10408: * Returns the new parser context or NULL
1.9 httpng 10409: */
1.69 daniel 10410: xmlParserCtxtPtr
10411: xmlCreateFileParserCtxt(const char *filename)
10412: {
10413: xmlParserCtxtPtr ctxt;
1.40 daniel 10414: xmlParserInputPtr inputStream;
1.91 daniel 10415: xmlParserInputBufferPtr buf;
1.111 daniel 10416: char *directory = NULL;
1.9 httpng 10417:
1.91 daniel 10418: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10419: if (buf == NULL) return(NULL);
1.9 httpng 10420:
1.97 daniel 10421: ctxt = xmlNewParserCtxt();
1.16 daniel 10422: if (ctxt == NULL) {
10423: return(NULL);
10424: }
1.97 daniel 10425:
1.96 daniel 10426: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10427: if (inputStream == NULL) {
1.97 daniel 10428: xmlFreeParserCtxt(ctxt);
1.40 daniel 10429: return(NULL);
10430: }
10431:
1.119 daniel 10432: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10433: inputStream->buf = buf;
10434: inputStream->base = inputStream->buf->buffer->content;
10435: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10436:
1.40 daniel 10437: inputPush(ctxt, inputStream);
1.110 daniel 10438: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10439: directory = xmlParserGetDirectory(filename);
10440: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10441: ctxt->directory = directory;
1.106 daniel 10442:
1.69 daniel 10443: return(ctxt);
10444: }
10445:
10446: /**
1.181 daniel 10447: * xmlSAXParseFile:
1.69 daniel 10448: * @sax: the SAX handler block
10449: * @filename: the filename
10450: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10451: * documents
10452: *
10453: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10454: * compressed document is provided by default if found at compile-time.
10455: * It use the given SAX function block to handle the parsing callback.
10456: * If sax is NULL, fallback to the default DOM tree building routines.
10457: *
10458: * Returns the resulting document tree
10459: */
10460:
1.79 daniel 10461: xmlDocPtr
10462: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10463: int recovery) {
10464: xmlDocPtr ret;
10465: xmlParserCtxtPtr ctxt;
1.111 daniel 10466: char *directory = NULL;
1.69 daniel 10467:
10468: ctxt = xmlCreateFileParserCtxt(filename);
10469: if (ctxt == NULL) return(NULL);
1.74 daniel 10470: if (sax != NULL) {
1.93 veillard 10471: if (ctxt->sax != NULL)
1.119 daniel 10472: xmlFree(ctxt->sax);
1.74 daniel 10473: ctxt->sax = sax;
10474: ctxt->userData = NULL;
10475: }
1.106 daniel 10476:
1.110 daniel 10477: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10478: directory = xmlParserGetDirectory(filename);
10479: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10480: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10481:
10482: xmlParseDocument(ctxt);
1.40 daniel 10483:
1.72 daniel 10484: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10485: else {
10486: ret = NULL;
1.72 daniel 10487: xmlFreeDoc(ctxt->myDoc);
10488: ctxt->myDoc = NULL;
1.59 daniel 10489: }
1.86 daniel 10490: if (sax != NULL)
10491: ctxt->sax = NULL;
1.69 daniel 10492: xmlFreeParserCtxt(ctxt);
1.20 daniel 10493:
10494: return(ret);
10495: }
10496:
1.55 daniel 10497: /**
1.181 daniel 10498: * xmlParseFile:
1.55 daniel 10499: * @filename: the filename
10500: *
10501: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10502: * compressed document is provided by default if found at compile-time.
10503: *
1.68 daniel 10504: * Returns the resulting document tree
1.55 daniel 10505: */
10506:
1.79 daniel 10507: xmlDocPtr
10508: xmlParseFile(const char *filename) {
1.59 daniel 10509: return(xmlSAXParseFile(NULL, filename, 0));
10510: }
10511:
10512: /**
1.181 daniel 10513: * xmlRecoverFile:
1.59 daniel 10514: * @filename: the filename
10515: *
10516: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10517: * compressed document is provided by default if found at compile-time.
10518: * In the case the document is not Well Formed, a tree is built anyway
10519: *
1.68 daniel 10520: * Returns the resulting document tree
1.59 daniel 10521: */
10522:
1.79 daniel 10523: xmlDocPtr
10524: xmlRecoverFile(const char *filename) {
1.59 daniel 10525: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10526: }
1.32 daniel 10527:
1.50 daniel 10528: /**
1.181 daniel 10529: * xmlCreateMemoryParserCtxt:
10530: * @buffer: a pointer to a zero terminated char array
10531: * @size: the size of the array (without the trailing 0)
1.50 daniel 10532: *
1.69 daniel 10533: * Create a parser context for an XML in-memory document.
1.50 daniel 10534: *
1.69 daniel 10535: * Returns the new parser context or NULL
1.20 daniel 10536: */
1.69 daniel 10537: xmlParserCtxtPtr
10538: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10539: xmlParserCtxtPtr ctxt;
1.40 daniel 10540: xmlParserInputPtr input;
10541:
1.179 daniel 10542: if (buffer[size] != 0)
1.181 daniel 10543: return(NULL);
1.40 daniel 10544:
1.97 daniel 10545: ctxt = xmlNewParserCtxt();
1.181 daniel 10546: if (ctxt == NULL)
1.20 daniel 10547: return(NULL);
1.97 daniel 10548:
1.96 daniel 10549: input = xmlNewInputStream(ctxt);
1.40 daniel 10550: if (input == NULL) {
1.97 daniel 10551: xmlFreeParserCtxt(ctxt);
1.40 daniel 10552: return(NULL);
10553: }
1.20 daniel 10554:
1.40 daniel 10555: input->filename = NULL;
10556: input->line = 1;
10557: input->col = 1;
1.96 daniel 10558: input->buf = NULL;
1.91 daniel 10559: input->consumed = 0;
1.75 daniel 10560:
1.116 daniel 10561: input->base = BAD_CAST buffer;
10562: input->cur = BAD_CAST buffer;
1.69 daniel 10563: input->free = NULL;
1.20 daniel 10564:
1.40 daniel 10565: inputPush(ctxt, input);
1.69 daniel 10566: return(ctxt);
10567: }
10568:
10569: /**
1.181 daniel 10570: * xmlSAXParseMemory:
1.69 daniel 10571: * @sax: the SAX handler block
10572: * @buffer: an pointer to a char array
1.127 daniel 10573: * @size: the size of the array
10574: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10575: * documents
10576: *
10577: * parse an XML in-memory block and use the given SAX function block
10578: * to handle the parsing callback. If sax is NULL, fallback to the default
10579: * DOM tree building routines.
10580: *
10581: * Returns the resulting document tree
10582: */
10583: xmlDocPtr
10584: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10585: xmlDocPtr ret;
10586: xmlParserCtxtPtr ctxt;
10587:
10588: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10589: if (ctxt == NULL) return(NULL);
1.74 daniel 10590: if (sax != NULL) {
10591: ctxt->sax = sax;
10592: ctxt->userData = NULL;
10593: }
1.20 daniel 10594:
10595: xmlParseDocument(ctxt);
1.40 daniel 10596:
1.72 daniel 10597: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10598: else {
10599: ret = NULL;
1.72 daniel 10600: xmlFreeDoc(ctxt->myDoc);
10601: ctxt->myDoc = NULL;
1.59 daniel 10602: }
1.86 daniel 10603: if (sax != NULL)
10604: ctxt->sax = NULL;
1.69 daniel 10605: xmlFreeParserCtxt(ctxt);
1.16 daniel 10606:
1.9 httpng 10607: return(ret);
1.17 daniel 10608: }
10609:
1.55 daniel 10610: /**
1.181 daniel 10611: * xmlParseMemory:
1.68 daniel 10612: * @buffer: an pointer to a char array
1.55 daniel 10613: * @size: the size of the array
10614: *
10615: * parse an XML in-memory block and build a tree.
10616: *
1.68 daniel 10617: * Returns the resulting document tree
1.55 daniel 10618: */
10619:
10620: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10621: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10622: }
10623:
10624: /**
1.181 daniel 10625: * xmlRecoverMemory:
1.68 daniel 10626: * @buffer: an pointer to a char array
1.59 daniel 10627: * @size: the size of the array
10628: *
10629: * parse an XML in-memory block and build a tree.
10630: * In the case the document is not Well Formed, a tree is built anyway
10631: *
1.68 daniel 10632: * Returns the resulting document tree
1.59 daniel 10633: */
10634:
10635: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10636: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10637: }
10638:
10639:
1.50 daniel 10640: /**
10641: * xmlSetupParserForBuffer:
10642: * @ctxt: an XML parser context
1.123 daniel 10643: * @buffer: a xmlChar * buffer
1.50 daniel 10644: * @filename: a file name
10645: *
1.19 daniel 10646: * Setup the parser context to parse a new buffer; Clears any prior
10647: * contents from the parser context. The buffer parameter must not be
10648: * NULL, but the filename parameter can be
10649: */
1.55 daniel 10650: void
1.123 daniel 10651: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10652: const char* filename)
10653: {
1.96 daniel 10654: xmlParserInputPtr input;
1.40 daniel 10655:
1.96 daniel 10656: input = xmlNewInputStream(ctxt);
10657: if (input == NULL) {
10658: perror("malloc");
1.119 daniel 10659: xmlFree(ctxt);
1.145 daniel 10660: return;
1.96 daniel 10661: }
10662:
10663: xmlClearParserCtxt(ctxt);
10664: if (filename != NULL)
1.119 daniel 10665: input->filename = xmlMemStrdup(filename);
1.96 daniel 10666: input->base = buffer;
10667: input->cur = buffer;
10668: inputPush(ctxt, input);
1.17 daniel 10669: }
10670:
1.123 daniel 10671: /**
10672: * xmlSAXUserParseFile:
10673: * @sax: a SAX handler
10674: * @user_data: The user data returned on SAX callbacks
10675: * @filename: a file name
10676: *
10677: * parse an XML file and call the given SAX handler routines.
10678: * Automatic support for ZLIB/Compress compressed document is provided
10679: *
10680: * Returns 0 in case of success or a error number otherwise
10681: */
1.131 daniel 10682: int
10683: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10684: const char *filename) {
1.123 daniel 10685: int ret = 0;
10686: xmlParserCtxtPtr ctxt;
10687:
10688: ctxt = xmlCreateFileParserCtxt(filename);
10689: if (ctxt == NULL) return -1;
1.134 daniel 10690: if (ctxt->sax != &xmlDefaultSAXHandler)
10691: xmlFree(ctxt->sax);
1.123 daniel 10692: ctxt->sax = sax;
1.140 daniel 10693: if (user_data != NULL)
10694: ctxt->userData = user_data;
1.123 daniel 10695:
10696: xmlParseDocument(ctxt);
10697:
10698: if (ctxt->wellFormed)
10699: ret = 0;
10700: else {
10701: if (ctxt->errNo != 0)
10702: ret = ctxt->errNo;
10703: else
10704: ret = -1;
10705: }
10706: if (sax != NULL)
10707: ctxt->sax = NULL;
10708: xmlFreeParserCtxt(ctxt);
10709:
10710: return ret;
10711: }
10712:
10713: /**
10714: * xmlSAXUserParseMemory:
10715: * @sax: a SAX handler
10716: * @user_data: The user data returned on SAX callbacks
10717: * @buffer: an in-memory XML document input
1.127 daniel 10718: * @size: the length of the XML document in bytes
1.123 daniel 10719: *
10720: * A better SAX parsing routine.
10721: * parse an XML in-memory buffer and call the given SAX handler routines.
10722: *
10723: * Returns 0 in case of success or a error number otherwise
10724: */
10725: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10726: char *buffer, int size) {
10727: int ret = 0;
10728: xmlParserCtxtPtr ctxt;
10729:
10730: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10731: if (ctxt == NULL) return -1;
10732: ctxt->sax = sax;
10733: ctxt->userData = user_data;
10734:
10735: xmlParseDocument(ctxt);
10736:
10737: if (ctxt->wellFormed)
10738: ret = 0;
10739: else {
10740: if (ctxt->errNo != 0)
10741: ret = ctxt->errNo;
10742: else
10743: ret = -1;
10744: }
10745: if (sax != NULL)
10746: ctxt->sax = NULL;
10747: xmlFreeParserCtxt(ctxt);
10748:
10749: return ret;
10750: }
10751:
1.32 daniel 10752:
1.98 daniel 10753: /************************************************************************
10754: * *
1.127 daniel 10755: * Miscellaneous *
1.98 daniel 10756: * *
10757: ************************************************************************/
10758:
1.132 daniel 10759: /**
10760: * xmlCleanupParser:
10761: *
10762: * Cleanup function for the XML parser. It tries to reclaim all
10763: * parsing related global memory allocated for the parser processing.
10764: * It doesn't deallocate any document related memory. Calling this
10765: * function should not prevent reusing the parser.
10766: */
10767:
10768: void
10769: xmlCleanupParser(void) {
10770: xmlCleanupCharEncodingHandlers();
1.133 daniel 10771: xmlCleanupPredefinedEntities();
1.132 daniel 10772: }
1.98 daniel 10773:
1.50 daniel 10774: /**
10775: * xmlParserFindNodeInfo:
10776: * @ctxt: an XML parser context
10777: * @node: an XML node within the tree
10778: *
10779: * Find the parser node info struct for a given node
10780: *
1.68 daniel 10781: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10782: */
10783: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10784: const xmlNode* node)
10785: {
10786: unsigned long pos;
10787:
10788: /* Find position where node should be at */
10789: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10790: if ( ctx->node_seq.buffer[pos].node == node )
10791: return &ctx->node_seq.buffer[pos];
10792: else
10793: return NULL;
10794: }
10795:
10796:
1.50 daniel 10797: /**
1.181 daniel 10798: * xmlInitNodeInfoSeq:
1.50 daniel 10799: * @seq: a node info sequence pointer
10800: *
10801: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10802: */
1.55 daniel 10803: void
10804: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10805: {
10806: seq->length = 0;
10807: seq->maximum = 0;
10808: seq->buffer = NULL;
10809: }
10810:
1.50 daniel 10811: /**
1.181 daniel 10812: * xmlClearNodeInfoSeq:
1.50 daniel 10813: * @seq: a node info sequence pointer
10814: *
10815: * -- Clear (release memory and reinitialize) node
1.32 daniel 10816: * info sequence
10817: */
1.55 daniel 10818: void
10819: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10820: {
10821: if ( seq->buffer != NULL )
1.119 daniel 10822: xmlFree(seq->buffer);
1.32 daniel 10823: xmlInitNodeInfoSeq(seq);
10824: }
10825:
10826:
1.50 daniel 10827: /**
10828: * xmlParserFindNodeInfoIndex:
10829: * @seq: a node info sequence pointer
10830: * @node: an XML node pointer
10831: *
10832: *
1.32 daniel 10833: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10834: * the given node is or should be at in a sorted sequence
1.68 daniel 10835: *
10836: * Returns a long indicating the position of the record
1.32 daniel 10837: */
10838: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10839: const xmlNode* node)
10840: {
10841: unsigned long upper, lower, middle;
10842: int found = 0;
10843:
10844: /* Do a binary search for the key */
10845: lower = 1;
10846: upper = seq->length;
10847: middle = 0;
10848: while ( lower <= upper && !found) {
10849: middle = lower + (upper - lower) / 2;
10850: if ( node == seq->buffer[middle - 1].node )
10851: found = 1;
10852: else if ( node < seq->buffer[middle - 1].node )
10853: upper = middle - 1;
10854: else
10855: lower = middle + 1;
10856: }
10857:
10858: /* Return position */
10859: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10860: return middle;
10861: else
10862: return middle - 1;
10863: }
10864:
10865:
1.50 daniel 10866: /**
10867: * xmlParserAddNodeInfo:
10868: * @ctxt: an XML parser context
1.68 daniel 10869: * @info: a node info sequence pointer
1.50 daniel 10870: *
10871: * Insert node info record into the sorted sequence
1.32 daniel 10872: */
1.55 daniel 10873: void
10874: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10875: const xmlParserNodeInfo* info)
1.32 daniel 10876: {
10877: unsigned long pos;
10878: static unsigned int block_size = 5;
10879:
10880: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10881: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10882: if ( pos < ctxt->node_seq.length
10883: && ctxt->node_seq.buffer[pos].node == info->node ) {
10884: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10885: }
10886:
10887: /* Otherwise, we need to add new node to buffer */
10888: else {
10889: /* Expand buffer by 5 if needed */
1.55 daniel 10890: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10891: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10892: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10893: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10894:
1.55 daniel 10895: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10896: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10897: else
1.119 daniel 10898: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10899:
10900: if ( tmp_buffer == NULL ) {
1.55 daniel 10901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10902: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10903: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10904: return;
10905: }
1.55 daniel 10906: ctxt->node_seq.buffer = tmp_buffer;
10907: ctxt->node_seq.maximum += block_size;
1.32 daniel 10908: }
10909:
10910: /* If position is not at end, move elements out of the way */
1.55 daniel 10911: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10912: unsigned long i;
10913:
1.55 daniel 10914: for ( i = ctxt->node_seq.length; i > pos; i-- )
10915: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10916: }
10917:
10918: /* Copy element and increase length */
1.55 daniel 10919: ctxt->node_seq.buffer[pos] = *info;
10920: ctxt->node_seq.length++;
1.32 daniel 10921: }
10922: }
1.77 daniel 10923:
1.98 daniel 10924:
10925: /**
1.181 daniel 10926: * xmlSubstituteEntitiesDefault:
1.98 daniel 10927: * @val: int 0 or 1
10928: *
10929: * Set and return the previous value for default entity support.
10930: * Initially the parser always keep entity references instead of substituting
10931: * entity values in the output. This function has to be used to change the
10932: * default parser behaviour
10933: * SAX::subtituteEntities() has to be used for changing that on a file by
10934: * file basis.
10935: *
10936: * Returns the last value for 0 for no substitution, 1 for substitution.
10937: */
10938:
10939: int
10940: xmlSubstituteEntitiesDefault(int val) {
10941: int old = xmlSubstituteEntitiesDefaultValue;
10942:
10943: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 10944: return(old);
10945: }
10946:
10947: /**
10948: * xmlKeepBlanksDefault:
10949: * @val: int 0 or 1
10950: *
10951: * Set and return the previous value for default blanks text nodes support.
10952: * The 1.x version of the parser used an heuristic to try to detect
10953: * ignorable white spaces. As a result the SAX callback was generating
10954: * ignorableWhitespace() callbacks instead of characters() one, and when
10955: * using the DOM output text nodes containing those blanks were not generated.
10956: * The 2.x and later version will switch to the XML standard way and
10957: * ignorableWhitespace() are only generated when running the parser in
10958: * validating mode and when the current element doesn't allow CDATA or
10959: * mixed content.
10960: * This function is provided as a way to force the standard behaviour
10961: * on 1.X libs and to switch back to the old mode for compatibility when
10962: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
10963: * by using xmlIsBlankNode() commodity function to detect the "empty"
10964: * nodes generated.
10965: * This value also affect autogeneration of indentation when saving code
10966: * if blanks sections are kept, indentation is not generated.
10967: *
10968: * Returns the last value for 0 for no substitution, 1 for substitution.
10969: */
10970:
10971: int
10972: xmlKeepBlanksDefault(int val) {
10973: int old = xmlKeepBlanksDefaultValue;
10974:
10975: xmlKeepBlanksDefaultValue = val;
10976: xmlIndentTreeOutput = !val;
1.98 daniel 10977: return(old);
10978: }
1.77 daniel 10979:
Webmaster