Annotation of XML/parser.c, revision 1.211
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.204 veillard 16: #include <string.h>
1.121 daniel 17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.200 daniel 65:
66: /*
67: * Version handling
68: */
69: const char *xmlParserVersion = LIBXML_VERSION_STRING;
70:
71: /*
72: * xmlCheckVersion:
73: * @version: the include version number
74: *
75: * check the compiled lib version against the include one.
76: * This can warn or immediately kill the application
77: */
78: void
79: xmlCheckVersion(int version) {
1.202 daniel 80: int myversion = (int) LIBXML_VERSION;
1.200 daniel 81:
82: if ((myversion / 10000) != (version / 10000)) {
83: fprintf(stderr,
84: "Fatal: program compiled against libxml %d using libxml %d\n",
85: (version / 10000), (myversion / 10000));
86: exit(1);
87: }
88: if ((myversion / 100) < (version / 100)) {
89: fprintf(stderr,
90: "Warning: program compiled against libxml %d using older %d\n",
91: (version / 100), (myversion / 100));
92: }
93: }
94:
95:
1.91 daniel 96: /************************************************************************
97: * *
98: * Input handling functions for progressive parsing *
99: * *
100: ************************************************************************/
101:
102: /* #define DEBUG_INPUT */
1.140 daniel 103: /* #define DEBUG_STACK */
104: /* #define DEBUG_PUSH */
105:
1.91 daniel 106:
1.110 daniel 107: #define INPUT_CHUNK 250
108: /* we need to keep enough input to show errors in context */
109: #define LINE_LEN 80
1.91 daniel 110:
111: #ifdef DEBUG_INPUT
112: #define CHECK_BUFFER(in) check_buffer(in)
113:
114: void check_buffer(xmlParserInputPtr in) {
115: if (in->base != in->buf->buffer->content) {
116: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
117: }
118: if (in->cur < in->base) {
119: fprintf(stderr, "xmlParserInput: cur < base problem\n");
120: }
121: if (in->cur > in->base + in->buf->buffer->use) {
122: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
123: }
124: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
125: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
126: in->buf->buffer->use, in->buf->buffer->size);
127: }
128:
1.110 daniel 129: #else
130: #define CHECK_BUFFER(in)
131: #endif
132:
1.91 daniel 133:
134: /**
135: * xmlParserInputRead:
136: * @in: an XML parser input
137: * @len: an indicative size for the lookahead
138: *
139: * This function refresh the input for the parser. It doesn't try to
140: * preserve pointers to the input buffer, and discard already read data
141: *
1.123 daniel 142: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 143: * end of this entity
144: */
145: int
146: xmlParserInputRead(xmlParserInputPtr in, int len) {
147: int ret;
148: int used;
149: int index;
150:
151: #ifdef DEBUG_INPUT
152: fprintf(stderr, "Read\n");
153: #endif
154: if (in->buf == NULL) return(-1);
155: if (in->base == NULL) return(-1);
156: if (in->cur == NULL) return(-1);
157: if (in->buf->buffer == NULL) return(-1);
158:
159: CHECK_BUFFER(in);
160:
161: used = in->cur - in->buf->buffer->content;
162: ret = xmlBufferShrink(in->buf->buffer, used);
163: if (ret > 0) {
164: in->cur -= ret;
165: in->consumed += ret;
166: }
167: ret = xmlParserInputBufferRead(in->buf, len);
168: if (in->base != in->buf->buffer->content) {
169: /*
170: * the buffer has been realloced
171: */
172: index = in->cur - in->base;
173: in->base = in->buf->buffer->content;
174: in->cur = &in->buf->buffer->content[index];
175: }
176:
177: CHECK_BUFFER(in);
178:
179: return(ret);
180: }
181:
182: /**
183: * xmlParserInputGrow:
184: * @in: an XML parser input
185: * @len: an indicative size for the lookahead
186: *
187: * This function increase the input for the parser. It tries to
188: * preserve pointers to the input buffer, and keep already read data
189: *
1.123 daniel 190: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 191: * end of this entity
192: */
193: int
194: xmlParserInputGrow(xmlParserInputPtr in, int len) {
195: int ret;
196: int index;
197:
198: #ifdef DEBUG_INPUT
199: fprintf(stderr, "Grow\n");
200: #endif
201: if (in->buf == NULL) return(-1);
202: if (in->base == NULL) return(-1);
203: if (in->cur == NULL) return(-1);
204: if (in->buf->buffer == NULL) return(-1);
205:
206: CHECK_BUFFER(in);
207:
208: index = in->cur - in->base;
1.202 daniel 209: if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
1.91 daniel 210:
211: CHECK_BUFFER(in);
212:
213: return(0);
214: }
1.189 daniel 215: if (in->buf->readcallback != NULL)
1.140 daniel 216: ret = xmlParserInputBufferGrow(in->buf, len);
217: else
218: return(0);
1.135 daniel 219:
220: /*
221: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
222: * block, but we use it really as an integer to do some
223: * pointer arithmetic. Insure will raise it as a bug but in
224: * that specific case, that's not !
225: */
1.91 daniel 226: if (in->base != in->buf->buffer->content) {
227: /*
228: * the buffer has been realloced
229: */
230: index = in->cur - in->base;
231: in->base = in->buf->buffer->content;
232: in->cur = &in->buf->buffer->content[index];
233: }
234:
235: CHECK_BUFFER(in);
236:
237: return(ret);
238: }
239:
240: /**
241: * xmlParserInputShrink:
242: * @in: an XML parser input
243: *
244: * This function removes used input for the parser.
245: */
246: void
247: xmlParserInputShrink(xmlParserInputPtr in) {
248: int used;
249: int ret;
250: int index;
251:
252: #ifdef DEBUG_INPUT
253: fprintf(stderr, "Shrink\n");
254: #endif
255: if (in->buf == NULL) return;
256: if (in->base == NULL) return;
257: if (in->cur == NULL) return;
258: if (in->buf->buffer == NULL) return;
259:
260: CHECK_BUFFER(in);
261:
262: used = in->cur - in->buf->buffer->content;
263: if (used > INPUT_CHUNK) {
1.110 daniel 264: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 265: if (ret > 0) {
266: in->cur -= ret;
267: in->consumed += ret;
268: }
269: }
270:
271: CHECK_BUFFER(in);
272:
273: if (in->buf->buffer->use > INPUT_CHUNK) {
274: return;
275: }
276: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
277: if (in->base != in->buf->buffer->content) {
278: /*
279: * the buffer has been realloced
280: */
281: index = in->cur - in->base;
282: in->base = in->buf->buffer->content;
283: in->cur = &in->buf->buffer->content[index];
284: }
285:
286: CHECK_BUFFER(in);
287: }
288:
1.45 daniel 289: /************************************************************************
290: * *
291: * Parser stacks related functions and macros *
292: * *
293: ************************************************************************/
1.79 daniel 294:
295: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 296: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 297: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 298: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
299: const xmlChar ** str);
1.79 daniel 300:
1.1 veillard 301: /*
1.40 daniel 302: * Generic function for accessing stacks in the Parser Context
1.1 veillard 303: */
304:
1.140 daniel 305: #define PUSH_AND_POP(scope, type, name) \
306: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 307: if (ctxt->name##Nr >= ctxt->name##Max) { \
308: ctxt->name##Max *= 2; \
1.204 veillard 309: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 310: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
311: if (ctxt->name##Tab == NULL) { \
1.31 daniel 312: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 313: return(0); \
1.31 daniel 314: } \
315: } \
1.40 daniel 316: ctxt->name##Tab[ctxt->name##Nr] = value; \
317: ctxt->name = value; \
318: return(ctxt->name##Nr++); \
1.31 daniel 319: } \
1.140 daniel 320: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 321: type ret; \
1.40 daniel 322: if (ctxt->name##Nr <= 0) return(0); \
323: ctxt->name##Nr--; \
1.50 daniel 324: if (ctxt->name##Nr > 0) \
325: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
326: else \
327: ctxt->name = NULL; \
1.69 daniel 328: ret = ctxt->name##Tab[ctxt->name##Nr]; \
329: ctxt->name##Tab[ctxt->name##Nr] = 0; \
330: return(ret); \
1.31 daniel 331: } \
332:
1.140 daniel 333: PUSH_AND_POP(extern, xmlParserInputPtr, input)
334: PUSH_AND_POP(extern, xmlNodePtr, node)
335: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 336:
1.176 daniel 337: int spacePush(xmlParserCtxtPtr ctxt, int val) {
338: if (ctxt->spaceNr >= ctxt->spaceMax) {
339: ctxt->spaceMax *= 2;
1.204 veillard 340: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 341: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
342: if (ctxt->spaceTab == NULL) {
343: fprintf(stderr, "realloc failed !\n");
344: return(0);
345: }
346: }
347: ctxt->spaceTab[ctxt->spaceNr] = val;
348: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
349: return(ctxt->spaceNr++);
350: }
351:
352: int spacePop(xmlParserCtxtPtr ctxt) {
353: int ret;
354: if (ctxt->spaceNr <= 0) return(0);
355: ctxt->spaceNr--;
356: if (ctxt->spaceNr > 0)
357: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
358: else
359: ctxt->space = NULL;
360: ret = ctxt->spaceTab[ctxt->spaceNr];
361: ctxt->spaceTab[ctxt->spaceNr] = -1;
362: return(ret);
363: }
364:
1.55 daniel 365: /*
366: * Macros for accessing the content. Those should be used only by the parser,
367: * and not exported.
368: *
369: * Dirty macros, i.e. one need to make assumption on the context to use them
370: *
1.123 daniel 371: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 372: * To be used with extreme caution since operations consuming
373: * characters may move the input buffer to a different location !
1.123 daniel 374: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 375: * in ISO-Latin or UTF-8.
1.151 daniel 376: * This should be used internally by the parser
1.55 daniel 377: * only to compare to ASCII values otherwise it would break when
378: * running with UTF-8 encoding.
1.123 daniel 379: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 380: * to compare on ASCII based substring.
1.123 daniel 381: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 382: * strings within the parser.
383: *
1.77 daniel 384: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 385: *
386: * NEXT Skip to the next character, this does the proper decoding
387: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 388: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 389: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 390: */
1.45 daniel 391:
1.152 daniel 392: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 393: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 394: #define NXT(val) ctxt->input->cur[(val)]
395: #define CUR_PTR ctxt->input->cur
1.154 daniel 396:
1.164 daniel 397: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
398: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 399: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
400: if ((*ctxt->input->cur == 0) && \
401: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
402: xmlPopInput(ctxt)
1.164 daniel 403:
1.97 daniel 404: #define SHRINK xmlParserInputShrink(ctxt->input); \
405: if ((*ctxt->input->cur == 0) && \
406: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
407: xmlPopInput(ctxt)
408:
409: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
410: if ((*ctxt->input->cur == 0) && \
411: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
412: xmlPopInput(ctxt)
1.55 daniel 413:
1.155 daniel 414: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 415:
1.151 daniel 416: #define NEXT xmlNextChar(ctxt);
1.154 daniel 417:
1.153 daniel 418: #define NEXTL(l) \
419: if (*(ctxt->input->cur) == '\n') { \
420: ctxt->input->line++; ctxt->input->col = 1; \
421: } else ctxt->input->col++; \
1.154 daniel 422: ctxt->token = 0; ctxt->input->cur += l; \
423: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
424: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
425:
1.152 daniel 426: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 427: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 428:
1.152 daniel 429: #define COPY_BUF(l,b,i,v) \
430: if (l == 1) b[i++] = (xmlChar) v; \
431: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 432:
433: /**
434: * xmlNextChar:
435: * @ctxt: the XML parser context
436: *
437: * Skip to the next char input char.
438: */
1.55 daniel 439:
1.151 daniel 440: void
441: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.201 daniel 442: if (ctxt->instate == XML_PARSER_EOF)
443: return;
444:
1.176 daniel 445: /*
446: * TODO: 2.11 End-of-Line Handling
447: * the literal two-character sequence "#xD#xA" or a standalone
448: * literal #xD, an XML processor must pass to the application
449: * the single character #xA.
450: */
1.151 daniel 451: if (ctxt->token != 0) ctxt->token = 0;
1.208 veillard 452: else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 453: if ((*ctxt->input->cur == 0) &&
454: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
455: (ctxt->instate != XML_PARSER_COMMENT)) {
456: /*
457: * If we are at the end of the current entity and
458: * the context allows it, we pop consumed entities
459: * automatically.
460: * TODO: the auto closing should be blocked in other cases
461: */
462: xmlPopInput(ctxt);
463: } else {
464: if (*(ctxt->input->cur) == '\n') {
465: ctxt->input->line++; ctxt->input->col = 1;
466: } else ctxt->input->col++;
1.198 daniel 467: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 468: /*
469: * We are supposed to handle UTF8, check it's valid
470: * From rfc2044: encoding of the Unicode values on UTF-8:
471: *
472: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
473: * 0000 0000-0000 007F 0xxxxxxx
474: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
475: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
476: *
1.160 daniel 477: * Check for the 0x110000 limit too
1.151 daniel 478: */
479: const unsigned char *cur = ctxt->input->cur;
480: unsigned char c;
1.91 daniel 481:
1.151 daniel 482: c = *cur;
483: if (c & 0x80) {
484: if (cur[1] == 0)
485: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486: if ((cur[1] & 0xc0) != 0x80)
487: goto encoding_error;
488: if ((c & 0xe0) == 0xe0) {
489: unsigned int val;
490:
491: if (cur[2] == 0)
492: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
493: if ((cur[2] & 0xc0) != 0x80)
494: goto encoding_error;
495: if ((c & 0xf0) == 0xf0) {
496: if (cur[3] == 0)
497: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
498: if (((c & 0xf8) != 0xf0) ||
499: ((cur[3] & 0xc0) != 0x80))
500: goto encoding_error;
501: /* 4-byte code */
502: ctxt->input->cur += 4;
503: val = (cur[0] & 0x7) << 18;
504: val |= (cur[1] & 0x3f) << 12;
505: val |= (cur[2] & 0x3f) << 6;
506: val |= cur[3] & 0x3f;
507: } else {
508: /* 3-byte code */
509: ctxt->input->cur += 3;
510: val = (cur[0] & 0xf) << 12;
511: val |= (cur[1] & 0x3f) << 6;
512: val |= cur[2] & 0x3f;
513: }
514: if (((val > 0xd7ff) && (val < 0xe000)) ||
515: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 516: (val >= 0x110000)) {
1.151 daniel 517: if ((ctxt->sax != NULL) &&
518: (ctxt->sax->error != NULL))
519: ctxt->sax->error(ctxt->userData,
1.196 daniel 520: "Char 0x%X out of allowed range\n", val);
1.151 daniel 521: ctxt->errNo = XML_ERR_INVALID_ENCODING;
522: ctxt->wellFormed = 0;
1.180 daniel 523: ctxt->disableSAX = 1;
1.151 daniel 524: }
525: } else
526: /* 2-byte code */
527: ctxt->input->cur += 2;
528: } else
529: /* 1-byte code */
530: ctxt->input->cur++;
531: } else {
532: /*
533: * Assume it's a fixed lenght encoding (1) with
534: * a compatibke encoding for the ASCII set, since
535: * XML constructs only use < 128 chars
536: */
537: ctxt->input->cur++;
538: }
539: ctxt->nbChars++;
540: if (*ctxt->input->cur == 0)
541: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
542: }
1.208 veillard 543: } else {
544: ctxt->input->cur++;
545: ctxt->nbChars++;
546: if (*ctxt->input->cur == 0)
547: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1.151 daniel 548: }
1.207 veillard 549: if ((*ctxt->input->cur == '%') && (!ctxt->html))
550: xmlParserHandlePEReference(ctxt);
551: if ((*ctxt->input->cur == '&')&& (!ctxt->html))
552: xmlParserHandleReference(ctxt);
1.168 daniel 553: if ((*ctxt->input->cur == 0) &&
554: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
555: xmlPopInput(ctxt);
1.151 daniel 556: return;
557: encoding_error:
558: /*
559: * If we detect an UTF8 error that probably mean that the
560: * input encoding didn't get properly advertized in the
561: * declaration header. Report the error and switch the encoding
562: * to ISO-Latin-1 (if you don't like this policy, just declare the
563: * encoding !)
564: */
1.198 daniel 565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.151 daniel 566: ctxt->sax->error(ctxt->userData,
567: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 568: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
569: ctxt->input->cur[0], ctxt->input->cur[1],
570: ctxt->input->cur[2], ctxt->input->cur[3]);
571: }
1.151 daniel 572: ctxt->errNo = XML_ERR_INVALID_ENCODING;
573:
1.198 daniel 574: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.151 daniel 575: ctxt->input->cur++;
576: return;
577: }
1.42 daniel 578:
1.152 daniel 579: /**
580: * xmlCurrentChar:
581: * @ctxt: the XML parser context
582: * @len: pointer to the length of the char read
583: *
584: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 585: * bytes in the input buffer. Implement the end of line normalization:
586: * 2.11 End-of-Line Handling
587: * Wherever an external parsed entity or the literal entity value
588: * of an internal parsed entity contains either the literal two-character
589: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
590: * must pass to the application the single character #xA.
591: * This behavior can conveniently be produced by normalizing all
592: * line breaks to #xA on input, before parsing.)
1.152 daniel 593: *
594: * Returns the current char value and its lenght
595: */
596:
597: int
598: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1.201 daniel 599: if (ctxt->instate == XML_PARSER_EOF)
600: return(0);
601:
1.152 daniel 602: if (ctxt->token != 0) {
603: *len = 0;
604: return(ctxt->token);
605: }
1.198 daniel 606: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.152 daniel 607: /*
608: * We are supposed to handle UTF8, check it's valid
609: * From rfc2044: encoding of the Unicode values on UTF-8:
610: *
611: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
612: * 0000 0000-0000 007F 0xxxxxxx
613: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
614: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
615: *
1.160 daniel 616: * Check for the 0x110000 limit too
1.152 daniel 617: */
618: const unsigned char *cur = ctxt->input->cur;
619: unsigned char c;
620: unsigned int val;
621:
622: c = *cur;
623: if (c & 0x80) {
624: if (cur[1] == 0)
625: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
626: if ((cur[1] & 0xc0) != 0x80)
627: goto encoding_error;
628: if ((c & 0xe0) == 0xe0) {
629:
630: if (cur[2] == 0)
631: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
632: if ((cur[2] & 0xc0) != 0x80)
633: goto encoding_error;
634: if ((c & 0xf0) == 0xf0) {
635: if (cur[3] == 0)
636: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
637: if (((c & 0xf8) != 0xf0) ||
638: ((cur[3] & 0xc0) != 0x80))
639: goto encoding_error;
640: /* 4-byte code */
641: *len = 4;
642: val = (cur[0] & 0x7) << 18;
643: val |= (cur[1] & 0x3f) << 12;
644: val |= (cur[2] & 0x3f) << 6;
645: val |= cur[3] & 0x3f;
646: } else {
647: /* 3-byte code */
648: *len = 3;
649: val = (cur[0] & 0xf) << 12;
650: val |= (cur[1] & 0x3f) << 6;
651: val |= cur[2] & 0x3f;
652: }
653: } else {
654: /* 2-byte code */
655: *len = 2;
656: val = (cur[0] & 0x1f) << 6;
1.168 daniel 657: val |= cur[1] & 0x3f;
1.152 daniel 658: }
659: if (!IS_CHAR(val)) {
660: if ((ctxt->sax != NULL) &&
661: (ctxt->sax->error != NULL))
662: ctxt->sax->error(ctxt->userData,
1.196 daniel 663: "Char 0x%X out of allowed range\n", val);
1.152 daniel 664: ctxt->errNo = XML_ERR_INVALID_ENCODING;
665: ctxt->wellFormed = 0;
1.180 daniel 666: ctxt->disableSAX = 1;
1.152 daniel 667: }
668: return(val);
669: } else {
670: /* 1-byte code */
671: *len = 1;
1.180 daniel 672: if (*ctxt->input->cur == 0xD) {
673: if (ctxt->input->cur[1] == 0xA) {
674: ctxt->nbChars++;
675: ctxt->input->cur++;
676: }
677: return(0xA);
678: }
1.152 daniel 679: return((int) *ctxt->input->cur);
680: }
681: }
682: /*
683: * Assume it's a fixed lenght encoding (1) with
684: * a compatibke encoding for the ASCII set, since
685: * XML constructs only use < 128 chars
686: */
687: *len = 1;
1.180 daniel 688: if (*ctxt->input->cur == 0xD) {
689: if (ctxt->input->cur[1] == 0xA) {
690: ctxt->nbChars++;
691: ctxt->input->cur++;
692: }
693: return(0xA);
694: }
1.152 daniel 695: return((int) *ctxt->input->cur);
696: encoding_error:
697: /*
698: * If we detect an UTF8 error that probably mean that the
699: * input encoding didn't get properly advertized in the
700: * declaration header. Report the error and switch the encoding
701: * to ISO-Latin-1 (if you don't like this policy, just declare the
702: * encoding !)
703: */
1.198 daniel 704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.152 daniel 705: ctxt->sax->error(ctxt->userData,
706: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 707: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
708: ctxt->input->cur[0], ctxt->input->cur[1],
709: ctxt->input->cur[2], ctxt->input->cur[3]);
710: }
1.152 daniel 711: ctxt->errNo = XML_ERR_INVALID_ENCODING;
712:
1.198 daniel 713: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.152 daniel 714: *len = 1;
715: return((int) *ctxt->input->cur);
716: }
717:
718: /**
1.162 daniel 719: * xmlStringCurrentChar:
720: * @ctxt: the XML parser context
721: * @cur: pointer to the beginning of the char
722: * @len: pointer to the length of the char read
723: *
724: * The current char value, if using UTF-8 this may actaully span multiple
725: * bytes in the input buffer.
726: *
727: * Returns the current char value and its lenght
728: */
729:
730: int
731: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1.198 daniel 732: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.162 daniel 733: /*
734: * We are supposed to handle UTF8, check it's valid
735: * From rfc2044: encoding of the Unicode values on UTF-8:
736: *
737: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
738: * 0000 0000-0000 007F 0xxxxxxx
739: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
740: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
741: *
742: * Check for the 0x110000 limit too
743: */
744: unsigned char c;
745: unsigned int val;
746:
747: c = *cur;
748: if (c & 0x80) {
749: if ((cur[1] & 0xc0) != 0x80)
750: goto encoding_error;
751: if ((c & 0xe0) == 0xe0) {
752:
753: if ((cur[2] & 0xc0) != 0x80)
754: goto encoding_error;
755: if ((c & 0xf0) == 0xf0) {
756: if (((c & 0xf8) != 0xf0) ||
757: ((cur[3] & 0xc0) != 0x80))
758: goto encoding_error;
759: /* 4-byte code */
760: *len = 4;
761: val = (cur[0] & 0x7) << 18;
762: val |= (cur[1] & 0x3f) << 12;
763: val |= (cur[2] & 0x3f) << 6;
764: val |= cur[3] & 0x3f;
765: } else {
766: /* 3-byte code */
767: *len = 3;
768: val = (cur[0] & 0xf) << 12;
769: val |= (cur[1] & 0x3f) << 6;
770: val |= cur[2] & 0x3f;
771: }
772: } else {
773: /* 2-byte code */
774: *len = 2;
775: val = (cur[0] & 0x1f) << 6;
776: val |= cur[2] & 0x3f;
777: }
778: if (!IS_CHAR(val)) {
779: if ((ctxt->sax != NULL) &&
780: (ctxt->sax->error != NULL))
781: ctxt->sax->error(ctxt->userData,
1.196 daniel 782: "Char 0x%X out of allowed range\n", val);
1.162 daniel 783: ctxt->errNo = XML_ERR_INVALID_ENCODING;
784: ctxt->wellFormed = 0;
1.180 daniel 785: ctxt->disableSAX = 1;
1.162 daniel 786: }
787: return(val);
788: } else {
789: /* 1-byte code */
790: *len = 1;
791: return((int) *cur);
792: }
793: }
794: /*
795: * Assume it's a fixed lenght encoding (1) with
796: * a compatibke encoding for the ASCII set, since
797: * XML constructs only use < 128 chars
798: */
799: *len = 1;
800: return((int) *cur);
801: encoding_error:
802: /*
803: * If we detect an UTF8 error that probably mean that the
804: * input encoding didn't get properly advertized in the
805: * declaration header. Report the error and switch the encoding
806: * to ISO-Latin-1 (if you don't like this policy, just declare the
807: * encoding !)
808: */
1.198 daniel 809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.162 daniel 810: ctxt->sax->error(ctxt->userData,
811: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 812: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
813: ctxt->input->cur[0], ctxt->input->cur[1],
814: ctxt->input->cur[2], ctxt->input->cur[3]);
815: }
1.162 daniel 816: ctxt->errNo = XML_ERR_INVALID_ENCODING;
817:
818: *len = 1;
819: return((int) *cur);
820: }
821:
822: /**
1.152 daniel 823: * xmlCopyChar:
824: * @len: pointer to the length of the char read (or zero)
825: * @array: pointer to an arry of xmlChar
826: * @val: the char value
827: *
828: * append the char value in the array
829: *
830: * Returns the number of xmlChar written
831: */
832:
833: int
834: xmlCopyChar(int len, xmlChar *out, int val) {
835: /*
836: * We are supposed to handle UTF8, check it's valid
837: * From rfc2044: encoding of the Unicode values on UTF-8:
838: *
839: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
840: * 0000 0000-0000 007F 0xxxxxxx
841: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
842: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
843: */
844: if (len == 0) {
845: if (val < 0) len = 0;
1.160 daniel 846: else if (val < 0x80) len = 1;
847: else if (val < 0x800) len = 2;
848: else if (val < 0x10000) len = 3;
849: else if (val < 0x110000) len = 4;
1.152 daniel 850: if (len == 0) {
851: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
852: val);
853: return(0);
854: }
855: }
856: if (len > 1) {
857: int bits;
858:
859: if (val < 0x80) { *out++= val; bits= -6; }
860: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
861: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
862: else { *out++= (val >> 18) | 0xF0; bits= 12; }
863:
864: for ( ; bits >= 0; bits-= 6)
865: *out++= ((val >> bits) & 0x3F) | 0x80 ;
866:
867: return(len);
868: }
869: *out = (xmlChar) val;
870: return(1);
1.155 daniel 871: }
872:
873: /**
874: * xmlSkipBlankChars:
875: * @ctxt: the XML parser context
876: *
877: * skip all blanks character found at that point in the input streams.
878: * It pops up finished entities in the process if allowable at that point.
879: *
880: * Returns the number of space chars skipped
881: */
882:
883: int
884: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
885: int cur, res = 0;
886:
887: do {
888: cur = CUR;
889: while (IS_BLANK(cur)) {
890: NEXT;
891: cur = CUR;
892: res++;
893: }
894: while ((cur == 0) && (ctxt->inputNr > 1) &&
895: (ctxt->instate != XML_PARSER_COMMENT)) {
896: xmlPopInput(ctxt);
897: cur = CUR;
898: }
899: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
900: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
901: } while (IS_BLANK(cur));
902: return(res);
1.152 daniel 903: }
904:
1.97 daniel 905: /************************************************************************
906: * *
907: * Commodity functions to handle entities processing *
908: * *
909: ************************************************************************/
1.40 daniel 910:
1.50 daniel 911: /**
912: * xmlPopInput:
913: * @ctxt: an XML parser context
914: *
1.40 daniel 915: * xmlPopInput: the current input pointed by ctxt->input came to an end
916: * pop it and return the next char.
1.45 daniel 917: *
1.123 daniel 918: * Returns the current xmlChar in the parser context
1.40 daniel 919: */
1.123 daniel 920: xmlChar
1.55 daniel 921: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 922: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 923: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 924: if ((*ctxt->input->cur == 0) &&
925: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
926: return(xmlPopInput(ctxt));
1.40 daniel 927: return(CUR);
928: }
929:
1.50 daniel 930: /**
931: * xmlPushInput:
932: * @ctxt: an XML parser context
933: * @input: an XML parser input fragment (entity, XML fragment ...).
934: *
1.40 daniel 935: * xmlPushInput: switch to a new input stream which is stacked on top
936: * of the previous one(s).
937: */
1.55 daniel 938: void
939: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 940: if (input == NULL) return;
941: inputPush(ctxt, input);
1.164 daniel 942: GROW;
1.40 daniel 943: }
944:
1.50 daniel 945: /**
1.69 daniel 946: * xmlFreeInputStream:
1.127 daniel 947: * @input: an xmlParserInputPtr
1.69 daniel 948: *
949: * Free up an input stream.
950: */
951: void
952: xmlFreeInputStream(xmlParserInputPtr input) {
953: if (input == NULL) return;
954:
1.119 daniel 955: if (input->filename != NULL) xmlFree((char *) input->filename);
956: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 957: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 958: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 959: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 960: input->free((xmlChar *) input->base);
1.93 veillard 961: if (input->buf != NULL)
962: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 963: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 964: xmlFree(input);
1.69 daniel 965: }
966:
967: /**
1.96 daniel 968: * xmlNewInputStream:
969: * @ctxt: an XML parser context
970: *
971: * Create a new input stream structure
972: * Returns the new input stream or NULL
973: */
974: xmlParserInputPtr
975: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
976: xmlParserInputPtr input;
977:
1.119 daniel 978: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 979: if (input == NULL) {
1.190 daniel 980: if (ctxt != NULL) {
981: ctxt->errNo = XML_ERR_NO_MEMORY;
982: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983: ctxt->sax->error(ctxt->userData,
984: "malloc: couldn't allocate a new input stream\n");
985: ctxt->errNo = XML_ERR_NO_MEMORY;
986: }
1.96 daniel 987: return(NULL);
988: }
1.165 daniel 989: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 990: input->line = 1;
991: input->col = 1;
1.167 daniel 992: input->standalone = -1;
1.96 daniel 993: return(input);
994: }
995:
996: /**
1.190 daniel 997: * xmlNewIOInputStream:
998: * @ctxt: an XML parser context
999: * @input: an I/O Input
1000: * @enc: the charset encoding if known
1001: *
1002: * Create a new input stream structure encapsulating the @input into
1003: * a stream suitable for the parser.
1004: *
1005: * Returns the new input stream or NULL
1006: */
1007: xmlParserInputPtr
1008: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1009: xmlCharEncoding enc) {
1010: xmlParserInputPtr inputStream;
1011:
1012: inputStream = xmlNewInputStream(ctxt);
1013: if (inputStream == NULL) {
1014: return(NULL);
1015: }
1016: inputStream->filename = NULL;
1017: inputStream->buf = input;
1018: inputStream->base = inputStream->buf->buffer->content;
1019: inputStream->cur = inputStream->buf->buffer->content;
1020: if (enc != XML_CHAR_ENCODING_NONE) {
1021: xmlSwitchEncoding(ctxt, enc);
1022: }
1023:
1024: return(inputStream);
1025: }
1026:
1027: /**
1.50 daniel 1028: * xmlNewEntityInputStream:
1029: * @ctxt: an XML parser context
1030: * @entity: an Entity pointer
1031: *
1.82 daniel 1032: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 1033: *
1034: * Returns the new input stream or NULL
1.45 daniel 1035: */
1.50 daniel 1036: xmlParserInputPtr
1037: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1038: xmlParserInputPtr input;
1039:
1040: if (entity == NULL) {
1.123 daniel 1041: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1042: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1043: ctxt->sax->error(ctxt->userData,
1.45 daniel 1044: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 1045: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 1046: return(NULL);
1.45 daniel 1047: }
1048: if (entity->content == NULL) {
1.159 daniel 1049: switch (entity->etype) {
1.113 daniel 1050: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 1051: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 1052: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1053: ctxt->sax->error(ctxt->userData,
1054: "xmlNewEntityInputStream unparsed entity !\n");
1055: break;
1056: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1057: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1058: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1059: (char *) entity->ExternalID, ctxt));
1.113 daniel 1060: case XML_INTERNAL_GENERAL_ENTITY:
1061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1062: ctxt->sax->error(ctxt->userData,
1063: "Internal entity %s without content !\n", entity->name);
1064: break;
1065: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1066: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1067: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1068: ctxt->sax->error(ctxt->userData,
1069: "Internal parameter entity %s without content !\n", entity->name);
1070: break;
1071: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1072: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1073: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1074: ctxt->sax->error(ctxt->userData,
1075: "Predefined entity %s without content !\n", entity->name);
1076: break;
1077: }
1.50 daniel 1078: return(NULL);
1.45 daniel 1079: }
1.96 daniel 1080: input = xmlNewInputStream(ctxt);
1.45 daniel 1081: if (input == NULL) {
1.50 daniel 1082: return(NULL);
1.45 daniel 1083: }
1.156 daniel 1084: input->filename = (char *) entity->SystemID;
1.45 daniel 1085: input->base = entity->content;
1086: input->cur = entity->content;
1.140 daniel 1087: input->length = entity->length;
1.50 daniel 1088: return(input);
1.45 daniel 1089: }
1090:
1.59 daniel 1091: /**
1092: * xmlNewStringInputStream:
1093: * @ctxt: an XML parser context
1.96 daniel 1094: * @buffer: an memory buffer
1.59 daniel 1095: *
1096: * Create a new input stream based on a memory buffer.
1.68 daniel 1097: * Returns the new input stream
1.59 daniel 1098: */
1099: xmlParserInputPtr
1.123 daniel 1100: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1101: xmlParserInputPtr input;
1102:
1.96 daniel 1103: if (buffer == NULL) {
1.123 daniel 1104: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1106: ctxt->sax->error(ctxt->userData,
1.59 daniel 1107: "internal: xmlNewStringInputStream string = NULL\n");
1108: return(NULL);
1109: }
1.96 daniel 1110: input = xmlNewInputStream(ctxt);
1.59 daniel 1111: if (input == NULL) {
1112: return(NULL);
1113: }
1.96 daniel 1114: input->base = buffer;
1115: input->cur = buffer;
1.140 daniel 1116: input->length = xmlStrlen(buffer);
1.59 daniel 1117: return(input);
1118: }
1119:
1.76 daniel 1120: /**
1121: * xmlNewInputFromFile:
1122: * @ctxt: an XML parser context
1123: * @filename: the filename to use as entity
1124: *
1125: * Create a new input stream based on a file.
1126: *
1127: * Returns the new input stream or NULL in case of error
1128: */
1129: xmlParserInputPtr
1.79 daniel 1130: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1131: xmlParserInputBufferPtr buf;
1.76 daniel 1132: xmlParserInputPtr inputStream;
1.111 daniel 1133: char *directory = NULL;
1.76 daniel 1134:
1.96 daniel 1135: if (ctxt == NULL) return(NULL);
1.91 daniel 1136: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1137: if (buf == NULL) {
1.140 daniel 1138: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1139:
1.94 daniel 1140: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1141: #ifdef WIN32
1142: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1143: #else
1144: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1145: #endif
1146: buf = xmlParserInputBufferCreateFilename(name,
1147: XML_CHAR_ENCODING_NONE);
1.106 daniel 1148: if (buf != NULL)
1.142 daniel 1149: directory = xmlParserGetDirectory(name);
1.106 daniel 1150: }
1151: if ((buf == NULL) && (ctxt->directory != NULL)) {
1152: #ifdef WIN32
1153: sprintf(name, "%s\\%s", ctxt->directory, filename);
1154: #else
1155: sprintf(name, "%s/%s", ctxt->directory, filename);
1156: #endif
1157: buf = xmlParserInputBufferCreateFilename(name,
1158: XML_CHAR_ENCODING_NONE);
1159: if (buf != NULL)
1.142 daniel 1160: directory = xmlParserGetDirectory(name);
1.106 daniel 1161: }
1162: if (buf == NULL)
1.94 daniel 1163: return(NULL);
1164: }
1165: if (directory == NULL)
1166: directory = xmlParserGetDirectory(filename);
1.76 daniel 1167:
1.96 daniel 1168: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1169: if (inputStream == NULL) {
1.119 daniel 1170: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1171: return(NULL);
1172: }
1173:
1.119 daniel 1174: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1175: inputStream->directory = directory;
1.91 daniel 1176: inputStream->buf = buf;
1.76 daniel 1177:
1.91 daniel 1178: inputStream->base = inputStream->buf->buffer->content;
1179: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1180: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1181: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1182: return(inputStream);
1183: }
1184:
1.77 daniel 1185: /************************************************************************
1186: * *
1.97 daniel 1187: * Commodity functions to handle parser contexts *
1188: * *
1189: ************************************************************************/
1190:
1191: /**
1192: * xmlInitParserCtxt:
1193: * @ctxt: an XML parser context
1194: *
1195: * Initialize a parser context
1196: */
1197:
1198: void
1199: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1200: {
1201: xmlSAXHandler *sax;
1202:
1.168 daniel 1203: xmlDefaultSAXHandlerInit();
1204:
1.119 daniel 1205: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1206: if (sax == NULL) {
1207: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1208: }
1.180 daniel 1209: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1210:
1211: /* Allocate the Input stack */
1.119 daniel 1212: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1213: ctxt->inputNr = 0;
1214: ctxt->inputMax = 5;
1215: ctxt->input = NULL;
1.165 daniel 1216:
1.97 daniel 1217: ctxt->version = NULL;
1218: ctxt->encoding = NULL;
1219: ctxt->standalone = -1;
1.98 daniel 1220: ctxt->hasExternalSubset = 0;
1221: ctxt->hasPErefs = 0;
1.97 daniel 1222: ctxt->html = 0;
1.98 daniel 1223: ctxt->external = 0;
1.140 daniel 1224: ctxt->instate = XML_PARSER_START;
1.97 daniel 1225: ctxt->token = 0;
1.106 daniel 1226: ctxt->directory = NULL;
1.97 daniel 1227:
1228: /* Allocate the Node stack */
1.119 daniel 1229: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1230: ctxt->nodeNr = 0;
1231: ctxt->nodeMax = 10;
1232: ctxt->node = NULL;
1233:
1.140 daniel 1234: /* Allocate the Name stack */
1235: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1236: ctxt->nameNr = 0;
1237: ctxt->nameMax = 10;
1238: ctxt->name = NULL;
1239:
1.176 daniel 1240: /* Allocate the space stack */
1241: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1242: ctxt->spaceNr = 1;
1243: ctxt->spaceMax = 10;
1244: ctxt->spaceTab[0] = -1;
1245: ctxt->space = &ctxt->spaceTab[0];
1246:
1.160 daniel 1247: if (sax == NULL) {
1248: ctxt->sax = &xmlDefaultSAXHandler;
1249: } else {
1.97 daniel 1250: ctxt->sax = sax;
1251: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1252: }
1253: ctxt->userData = ctxt;
1254: ctxt->myDoc = NULL;
1255: ctxt->wellFormed = 1;
1.99 daniel 1256: ctxt->valid = 1;
1.100 daniel 1257: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1258: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1259: ctxt->vctxt.userData = ctxt;
1.149 daniel 1260: if (ctxt->validate) {
1261: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1262: if (xmlGetWarningsDefaultValue == 0)
1263: ctxt->vctxt.warning = NULL;
1264: else
1265: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1266: /* Allocate the Node stack */
1267: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1268: ctxt->vctxt.nodeNr = 0;
1269: ctxt->vctxt.nodeMax = 4;
1270: ctxt->vctxt.node = NULL;
1.149 daniel 1271: } else {
1272: ctxt->vctxt.error = NULL;
1273: ctxt->vctxt.warning = NULL;
1274: }
1.97 daniel 1275: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1276: ctxt->record_info = 0;
1.135 daniel 1277: ctxt->nbChars = 0;
1.140 daniel 1278: ctxt->checkIndex = 0;
1.180 daniel 1279: ctxt->inSubset = 0;
1.140 daniel 1280: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1281: ctxt->depth = 0;
1.198 daniel 1282: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.97 daniel 1283: xmlInitNodeInfoSeq(&ctxt->node_seq);
1284: }
1285:
1286: /**
1287: * xmlFreeParserCtxt:
1288: * @ctxt: an XML parser context
1289: *
1290: * Free all the memory used by a parser context. However the parsed
1291: * document in ctxt->myDoc is not freed.
1292: */
1293:
1294: void
1295: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1296: {
1297: xmlParserInputPtr input;
1.140 daniel 1298: xmlChar *oldname;
1.97 daniel 1299:
1300: if (ctxt == NULL) return;
1301:
1302: while ((input = inputPop(ctxt)) != NULL) {
1303: xmlFreeInputStream(input);
1304: }
1.140 daniel 1305: while ((oldname = namePop(ctxt)) != NULL) {
1306: xmlFree(oldname);
1307: }
1.176 daniel 1308: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1309: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1310: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1311: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1312: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1313: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1314: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1315: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1316: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1317: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1318: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1319: xmlFree(ctxt->sax);
1320: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1321: xmlFree(ctxt);
1.97 daniel 1322: }
1323:
1324: /**
1325: * xmlNewParserCtxt:
1326: *
1327: * Allocate and initialize a new parser context.
1328: *
1329: * Returns the xmlParserCtxtPtr or NULL
1330: */
1331:
1332: xmlParserCtxtPtr
1333: xmlNewParserCtxt()
1334: {
1335: xmlParserCtxtPtr ctxt;
1336:
1.119 daniel 1337: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1338: if (ctxt == NULL) {
1339: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1340: perror("malloc");
1341: return(NULL);
1342: }
1.165 daniel 1343: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1344: xmlInitParserCtxt(ctxt);
1345: return(ctxt);
1346: }
1347:
1348: /**
1349: * xmlClearParserCtxt:
1350: * @ctxt: an XML parser context
1351: *
1352: * Clear (release owned resources) and reinitialize a parser context
1353: */
1354:
1355: void
1356: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1357: {
1358: xmlClearNodeInfoSeq(&ctxt->node_seq);
1359: xmlInitParserCtxt(ctxt);
1360: }
1361:
1362: /************************************************************************
1363: * *
1.77 daniel 1364: * Commodity functions to handle entities *
1365: * *
1366: ************************************************************************/
1367:
1.174 daniel 1368: /**
1369: * xmlCheckEntity:
1370: * @ctxt: an XML parser context
1371: * @content: the entity content string
1372: *
1373: * Parse an entity content and checks the WF constraints
1374: *
1375: */
1376:
1377: void
1378: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1379: }
1.97 daniel 1380:
1381: /**
1382: * xmlParseCharRef:
1383: * @ctxt: an XML parser context
1384: *
1385: * parse Reference declarations
1386: *
1387: * [66] CharRef ::= '&#' [0-9]+ ';' |
1388: * '&#x' [0-9a-fA-F]+ ';'
1389: *
1.98 daniel 1390: * [ WFC: Legal Character ]
1391: * Characters referred to using character references must match the
1392: * production for Char.
1393: *
1.135 daniel 1394: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1395: */
1.97 daniel 1396: int
1397: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1398: int val = 0;
1399:
1.111 daniel 1400: if (ctxt->token != 0) {
1401: val = ctxt->token;
1402: ctxt->token = 0;
1403: return(val);
1404: }
1.152 daniel 1405: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1406: (NXT(2) == 'x')) {
1407: SKIP(3);
1.152 daniel 1408: while (RAW != ';') {
1409: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1410: val = val * 16 + (CUR - '0');
1.152 daniel 1411: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1412: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1413: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1414: val = val * 16 + (CUR - 'A') + 10;
1415: else {
1.123 daniel 1416: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1417: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1418: ctxt->sax->error(ctxt->userData,
1419: "xmlParseCharRef: invalid hexadecimal value\n");
1420: ctxt->wellFormed = 0;
1.180 daniel 1421: ctxt->disableSAX = 1;
1.97 daniel 1422: val = 0;
1423: break;
1424: }
1425: NEXT;
1426: }
1.164 daniel 1427: if (RAW == ';') {
1428: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1429: ctxt->nbChars ++;
1430: ctxt->input->cur++;
1431: }
1.152 daniel 1432: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1433: SKIP(2);
1.152 daniel 1434: while (RAW != ';') {
1435: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1436: val = val * 10 + (CUR - '0');
1437: else {
1.123 daniel 1438: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1440: ctxt->sax->error(ctxt->userData,
1441: "xmlParseCharRef: invalid decimal value\n");
1442: ctxt->wellFormed = 0;
1.180 daniel 1443: ctxt->disableSAX = 1;
1.97 daniel 1444: val = 0;
1445: break;
1446: }
1447: NEXT;
1448: }
1.164 daniel 1449: if (RAW == ';') {
1450: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1451: ctxt->nbChars ++;
1452: ctxt->input->cur++;
1453: }
1.97 daniel 1454: } else {
1.123 daniel 1455: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1457: ctxt->sax->error(ctxt->userData,
1458: "xmlParseCharRef: invalid value\n");
1.97 daniel 1459: ctxt->wellFormed = 0;
1.180 daniel 1460: ctxt->disableSAX = 1;
1.97 daniel 1461: }
1.98 daniel 1462:
1.97 daniel 1463: /*
1.98 daniel 1464: * [ WFC: Legal Character ]
1465: * Characters referred to using character references must match the
1466: * production for Char.
1.97 daniel 1467: */
1468: if (IS_CHAR(val)) {
1469: return(val);
1470: } else {
1.123 daniel 1471: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1473: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1474: val);
1475: ctxt->wellFormed = 0;
1.180 daniel 1476: ctxt->disableSAX = 1;
1.97 daniel 1477: }
1478: return(0);
1.77 daniel 1479: }
1480:
1.96 daniel 1481: /**
1.135 daniel 1482: * xmlParseStringCharRef:
1483: * @ctxt: an XML parser context
1484: * @str: a pointer to an index in the string
1485: *
1486: * parse Reference declarations, variant parsing from a string rather
1487: * than an an input flow.
1488: *
1489: * [66] CharRef ::= '&#' [0-9]+ ';' |
1490: * '&#x' [0-9a-fA-F]+ ';'
1491: *
1492: * [ WFC: Legal Character ]
1493: * Characters referred to using character references must match the
1494: * production for Char.
1495: *
1496: * Returns the value parsed (as an int), 0 in case of error, str will be
1497: * updated to the current value of the index
1498: */
1499: int
1500: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1501: const xmlChar *ptr;
1502: xmlChar cur;
1503: int val = 0;
1504:
1505: if ((str == NULL) || (*str == NULL)) return(0);
1506: ptr = *str;
1507: cur = *ptr;
1.137 daniel 1508: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1509: ptr += 3;
1510: cur = *ptr;
1511: while (cur != ';') {
1512: if ((cur >= '0') && (cur <= '9'))
1513: val = val * 16 + (cur - '0');
1514: else if ((cur >= 'a') && (cur <= 'f'))
1515: val = val * 16 + (cur - 'a') + 10;
1516: else if ((cur >= 'A') && (cur <= 'F'))
1517: val = val * 16 + (cur - 'A') + 10;
1518: else {
1519: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1521: ctxt->sax->error(ctxt->userData,
1.198 daniel 1522: "xmlParseStringCharRef: invalid hexadecimal value\n");
1.135 daniel 1523: ctxt->wellFormed = 0;
1.180 daniel 1524: ctxt->disableSAX = 1;
1.135 daniel 1525: val = 0;
1526: break;
1527: }
1528: ptr++;
1529: cur = *ptr;
1530: }
1531: if (cur == ';')
1532: ptr++;
1.145 daniel 1533: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1534: ptr += 2;
1535: cur = *ptr;
1536: while (cur != ';') {
1537: if ((cur >= '0') && (cur <= '9'))
1538: val = val * 10 + (cur - '0');
1539: else {
1540: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1542: ctxt->sax->error(ctxt->userData,
1.198 daniel 1543: "xmlParseStringCharRef: invalid decimal value\n");
1.135 daniel 1544: ctxt->wellFormed = 0;
1.180 daniel 1545: ctxt->disableSAX = 1;
1.135 daniel 1546: val = 0;
1547: break;
1548: }
1549: ptr++;
1550: cur = *ptr;
1551: }
1552: if (cur == ';')
1553: ptr++;
1554: } else {
1555: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1557: ctxt->sax->error(ctxt->userData,
1558: "xmlParseCharRef: invalid value\n");
1559: ctxt->wellFormed = 0;
1.180 daniel 1560: ctxt->disableSAX = 1;
1.135 daniel 1561: return(0);
1562: }
1563: *str = ptr;
1564:
1565: /*
1566: * [ WFC: Legal Character ]
1567: * Characters referred to using character references must match the
1568: * production for Char.
1569: */
1570: if (IS_CHAR(val)) {
1571: return(val);
1572: } else {
1573: ctxt->errNo = XML_ERR_INVALID_CHAR;
1574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575: ctxt->sax->error(ctxt->userData,
1576: "CharRef: invalid xmlChar value %d\n", val);
1577: ctxt->wellFormed = 0;
1.180 daniel 1578: ctxt->disableSAX = 1;
1.135 daniel 1579: }
1580: return(0);
1581: }
1582:
1583: /**
1.96 daniel 1584: * xmlParserHandleReference:
1585: * @ctxt: the parser context
1586: *
1.97 daniel 1587: * [67] Reference ::= EntityRef | CharRef
1588: *
1.96 daniel 1589: * [68] EntityRef ::= '&' Name ';'
1590: *
1.98 daniel 1591: * [ WFC: Entity Declared ]
1592: * the Name given in the entity reference must match that in an entity
1593: * declaration, except that well-formed documents need not declare any
1594: * of the following entities: amp, lt, gt, apos, quot.
1595: *
1596: * [ WFC: Parsed Entity ]
1597: * An entity reference must not contain the name of an unparsed entity
1598: *
1.97 daniel 1599: * [66] CharRef ::= '&#' [0-9]+ ';' |
1600: * '&#x' [0-9a-fA-F]+ ';'
1601: *
1.96 daniel 1602: * A PEReference may have been detectect in the current input stream
1603: * the handling is done accordingly to
1604: * http://www.w3.org/TR/REC-xml#entproc
1605: */
1606: void
1607: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1608: xmlParserInputPtr input;
1.123 daniel 1609: xmlChar *name;
1.97 daniel 1610: xmlEntityPtr ent = NULL;
1611:
1.126 daniel 1612: if (ctxt->token != 0) {
1613: return;
1614: }
1.152 daniel 1615: if (RAW != '&') return;
1.97 daniel 1616: GROW;
1.152 daniel 1617: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1618: switch(ctxt->instate) {
1.140 daniel 1619: case XML_PARSER_ENTITY_DECL:
1620: case XML_PARSER_PI:
1.109 daniel 1621: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1622: case XML_PARSER_COMMENT:
1.168 daniel 1623: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1624: /* we just ignore it there */
1625: return;
1626: case XML_PARSER_START_TAG:
1.109 daniel 1627: return;
1.140 daniel 1628: case XML_PARSER_END_TAG:
1.97 daniel 1629: return;
1630: case XML_PARSER_EOF:
1.123 daniel 1631: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1633: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1634: ctxt->wellFormed = 0;
1.180 daniel 1635: ctxt->disableSAX = 1;
1.97 daniel 1636: return;
1637: case XML_PARSER_PROLOG:
1.140 daniel 1638: case XML_PARSER_START:
1639: case XML_PARSER_MISC:
1.123 daniel 1640: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1643: ctxt->wellFormed = 0;
1.180 daniel 1644: ctxt->disableSAX = 1;
1.97 daniel 1645: return;
1646: case XML_PARSER_EPILOG:
1.123 daniel 1647: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1650: ctxt->wellFormed = 0;
1.180 daniel 1651: ctxt->disableSAX = 1;
1.97 daniel 1652: return;
1653: case XML_PARSER_DTD:
1.123 daniel 1654: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656: ctxt->sax->error(ctxt->userData,
1657: "CharRef are forbiden in DTDs!\n");
1658: ctxt->wellFormed = 0;
1.180 daniel 1659: ctxt->disableSAX = 1;
1.97 daniel 1660: return;
1661: case XML_PARSER_ENTITY_VALUE:
1662: /*
1663: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1664: * substitution here since we need the literal
1.97 daniel 1665: * entity value to be able to save the internal
1666: * subset of the document.
1667: * This will be handled by xmlDecodeEntities
1668: */
1669: return;
1670: case XML_PARSER_CONTENT:
1671: case XML_PARSER_ATTRIBUTE_VALUE:
1672: ctxt->token = xmlParseCharRef(ctxt);
1673: return;
1674: }
1675: return;
1676: }
1677:
1678: switch(ctxt->instate) {
1.109 daniel 1679: case XML_PARSER_CDATA_SECTION:
1680: return;
1.140 daniel 1681: case XML_PARSER_PI:
1.97 daniel 1682: case XML_PARSER_COMMENT:
1.168 daniel 1683: case XML_PARSER_SYSTEM_LITERAL:
1684: case XML_PARSER_CONTENT:
1.97 daniel 1685: return;
1.140 daniel 1686: case XML_PARSER_START_TAG:
1687: return;
1688: case XML_PARSER_END_TAG:
1689: return;
1.97 daniel 1690: case XML_PARSER_EOF:
1.123 daniel 1691: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1693: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1694: ctxt->wellFormed = 0;
1.180 daniel 1695: ctxt->disableSAX = 1;
1.97 daniel 1696: return;
1697: case XML_PARSER_PROLOG:
1.140 daniel 1698: case XML_PARSER_START:
1699: case XML_PARSER_MISC:
1.123 daniel 1700: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1703: ctxt->wellFormed = 0;
1.180 daniel 1704: ctxt->disableSAX = 1;
1.97 daniel 1705: return;
1706: case XML_PARSER_EPILOG:
1.123 daniel 1707: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1710: ctxt->wellFormed = 0;
1.180 daniel 1711: ctxt->disableSAX = 1;
1.97 daniel 1712: return;
1713: case XML_PARSER_ENTITY_VALUE:
1714: /*
1715: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1716: * substitution here since we need the literal
1.97 daniel 1717: * entity value to be able to save the internal
1718: * subset of the document.
1719: * This will be handled by xmlDecodeEntities
1720: */
1721: return;
1722: case XML_PARSER_ATTRIBUTE_VALUE:
1723: /*
1724: * NOTE: in the case of attributes values, we don't do the
1725: * substitution here unless we are in a mode where
1726: * the parser is explicitely asked to substitute
1727: * entities. The SAX callback is called with values
1728: * without entity substitution.
1729: * This will then be handled by xmlDecodeEntities
1730: */
1.113 daniel 1731: return;
1.97 daniel 1732: case XML_PARSER_ENTITY_DECL:
1733: /*
1734: * we just ignore it there
1735: * the substitution will be done once the entity is referenced
1736: */
1737: return;
1738: case XML_PARSER_DTD:
1.123 daniel 1739: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741: ctxt->sax->error(ctxt->userData,
1742: "Entity references are forbiden in DTDs!\n");
1743: ctxt->wellFormed = 0;
1.180 daniel 1744: ctxt->disableSAX = 1;
1.97 daniel 1745: return;
1746: }
1747:
1748: NEXT;
1749: name = xmlScanName(ctxt);
1750: if (name == NULL) {
1.123 daniel 1751: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1752: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1753: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1754: ctxt->wellFormed = 0;
1.180 daniel 1755: ctxt->disableSAX = 1;
1.97 daniel 1756: ctxt->token = '&';
1757: return;
1758: }
1759: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1760: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1762: ctxt->sax->error(ctxt->userData,
1763: "Entity reference: ';' expected\n");
1764: ctxt->wellFormed = 0;
1.180 daniel 1765: ctxt->disableSAX = 1;
1.97 daniel 1766: ctxt->token = '&';
1.119 daniel 1767: xmlFree(name);
1.97 daniel 1768: return;
1769: }
1770: SKIP(xmlStrlen(name) + 1);
1771: if (ctxt->sax != NULL) {
1772: if (ctxt->sax->getEntity != NULL)
1773: ent = ctxt->sax->getEntity(ctxt->userData, name);
1774: }
1.98 daniel 1775:
1776: /*
1777: * [ WFC: Entity Declared ]
1778: * the Name given in the entity reference must match that in an entity
1779: * declaration, except that well-formed documents need not declare any
1780: * of the following entities: amp, lt, gt, apos, quot.
1781: */
1.97 daniel 1782: if (ent == NULL)
1783: ent = xmlGetPredefinedEntity(name);
1784: if (ent == NULL) {
1.123 daniel 1785: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1786: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1787: ctxt->sax->error(ctxt->userData,
1.98 daniel 1788: "Entity reference: entity %s not declared\n",
1789: name);
1.97 daniel 1790: ctxt->wellFormed = 0;
1.180 daniel 1791: ctxt->disableSAX = 1;
1.119 daniel 1792: xmlFree(name);
1.97 daniel 1793: return;
1794: }
1.98 daniel 1795:
1796: /*
1797: * [ WFC: Parsed Entity ]
1798: * An entity reference must not contain the name of an unparsed entity
1799: */
1.159 daniel 1800: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1801: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1803: ctxt->sax->error(ctxt->userData,
1804: "Entity reference to unparsed entity %s\n", name);
1805: ctxt->wellFormed = 0;
1.180 daniel 1806: ctxt->disableSAX = 1;
1.98 daniel 1807: }
1808:
1.159 daniel 1809: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1810: ctxt->token = ent->content[0];
1.119 daniel 1811: xmlFree(name);
1.97 daniel 1812: return;
1813: }
1814: input = xmlNewEntityInputStream(ctxt, ent);
1815: xmlPushInput(ctxt, input);
1.119 daniel 1816: xmlFree(name);
1.96 daniel 1817: return;
1818: }
1819:
1820: /**
1821: * xmlParserHandlePEReference:
1822: * @ctxt: the parser context
1823: *
1824: * [69] PEReference ::= '%' Name ';'
1825: *
1.98 daniel 1826: * [ WFC: No Recursion ]
1827: * TODO A parsed entity must not contain a recursive
1828: * reference to itself, either directly or indirectly.
1829: *
1830: * [ WFC: Entity Declared ]
1831: * In a document without any DTD, a document with only an internal DTD
1832: * subset which contains no parameter entity references, or a document
1833: * with "standalone='yes'", ... ... The declaration of a parameter
1834: * entity must precede any reference to it...
1835: *
1836: * [ VC: Entity Declared ]
1837: * In a document with an external subset or external parameter entities
1838: * with "standalone='no'", ... ... The declaration of a parameter entity
1839: * must precede any reference to it...
1840: *
1841: * [ WFC: In DTD ]
1842: * Parameter-entity references may only appear in the DTD.
1843: * NOTE: misleading but this is handled.
1844: *
1845: * A PEReference may have been detected in the current input stream
1.96 daniel 1846: * the handling is done accordingly to
1847: * http://www.w3.org/TR/REC-xml#entproc
1848: * i.e.
1849: * - Included in literal in entity values
1850: * - Included as Paraemeter Entity reference within DTDs
1851: */
1852: void
1853: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1854: xmlChar *name;
1.96 daniel 1855: xmlEntityPtr entity = NULL;
1856: xmlParserInputPtr input;
1857:
1.126 daniel 1858: if (ctxt->token != 0) {
1859: return;
1860: }
1.152 daniel 1861: if (RAW != '%') return;
1.96 daniel 1862: switch(ctxt->instate) {
1.109 daniel 1863: case XML_PARSER_CDATA_SECTION:
1864: return;
1.97 daniel 1865: case XML_PARSER_COMMENT:
1866: return;
1.140 daniel 1867: case XML_PARSER_START_TAG:
1868: return;
1869: case XML_PARSER_END_TAG:
1870: return;
1.96 daniel 1871: case XML_PARSER_EOF:
1.123 daniel 1872: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1874: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1875: ctxt->wellFormed = 0;
1.180 daniel 1876: ctxt->disableSAX = 1;
1.96 daniel 1877: return;
1878: case XML_PARSER_PROLOG:
1.140 daniel 1879: case XML_PARSER_START:
1880: case XML_PARSER_MISC:
1.123 daniel 1881: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1882: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1883: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1884: ctxt->wellFormed = 0;
1.180 daniel 1885: ctxt->disableSAX = 1;
1.96 daniel 1886: return;
1.97 daniel 1887: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1888: case XML_PARSER_CONTENT:
1889: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1890: case XML_PARSER_PI:
1.168 daniel 1891: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1892: /* we just ignore it there */
1893: return;
1894: case XML_PARSER_EPILOG:
1.123 daniel 1895: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1897: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1898: ctxt->wellFormed = 0;
1.180 daniel 1899: ctxt->disableSAX = 1;
1.96 daniel 1900: return;
1.97 daniel 1901: case XML_PARSER_ENTITY_VALUE:
1902: /*
1903: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1904: * substitution here since we need the literal
1.97 daniel 1905: * entity value to be able to save the internal
1906: * subset of the document.
1907: * This will be handled by xmlDecodeEntities
1908: */
1909: return;
1.96 daniel 1910: case XML_PARSER_DTD:
1.98 daniel 1911: /*
1912: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1913: * In the internal DTD subset, parameter-entity references
1914: * can occur only where markup declarations can occur, not
1915: * within markup declarations.
1916: * In that case this is handled in xmlParseMarkupDecl
1917: */
1918: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1919: return;
1.96 daniel 1920: }
1921:
1922: NEXT;
1923: name = xmlParseName(ctxt);
1924: if (name == NULL) {
1.123 daniel 1925: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1927: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1928: ctxt->wellFormed = 0;
1.180 daniel 1929: ctxt->disableSAX = 1;
1.96 daniel 1930: } else {
1.152 daniel 1931: if (RAW == ';') {
1.96 daniel 1932: NEXT;
1.98 daniel 1933: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1934: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1935: if (entity == NULL) {
1.98 daniel 1936:
1937: /*
1938: * [ WFC: Entity Declared ]
1939: * In a document without any DTD, a document with only an
1940: * internal DTD subset which contains no parameter entity
1941: * references, or a document with "standalone='yes'", ...
1942: * ... The declaration of a parameter entity must precede
1943: * any reference to it...
1944: */
1945: if ((ctxt->standalone == 1) ||
1946: ((ctxt->hasExternalSubset == 0) &&
1947: (ctxt->hasPErefs == 0))) {
1948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1949: ctxt->sax->error(ctxt->userData,
1950: "PEReference: %%%s; not found\n", name);
1951: ctxt->wellFormed = 0;
1.180 daniel 1952: ctxt->disableSAX = 1;
1.98 daniel 1953: } else {
1954: /*
1955: * [ VC: Entity Declared ]
1956: * In a document with an external subset or external
1957: * parameter entities with "standalone='no'", ...
1958: * ... The declaration of a parameter entity must precede
1959: * any reference to it...
1960: */
1961: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1962: ctxt->sax->warning(ctxt->userData,
1963: "PEReference: %%%s; not found\n", name);
1964: ctxt->valid = 0;
1965: }
1.96 daniel 1966: } else {
1.159 daniel 1967: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1968: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1969: /*
1.156 daniel 1970: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1971: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1972: */
1973: input = xmlNewEntityInputStream(ctxt, entity);
1974: xmlPushInput(ctxt, input);
1.164 daniel 1975: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1976: (RAW == '<') && (NXT(1) == '?') &&
1977: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1978: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1979: xmlParseTextDecl(ctxt);
1.164 daniel 1980: }
1981: if (ctxt->token == 0)
1982: ctxt->token = ' ';
1.96 daniel 1983: } else {
1984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1985: ctxt->sax->error(ctxt->userData,
1986: "xmlHandlePEReference: %s is not a parameter entity\n",
1987: name);
1988: ctxt->wellFormed = 0;
1.180 daniel 1989: ctxt->disableSAX = 1;
1.96 daniel 1990: }
1991: }
1992: } else {
1.123 daniel 1993: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1994: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1995: ctxt->sax->error(ctxt->userData,
1996: "xmlHandlePEReference: expecting ';'\n");
1997: ctxt->wellFormed = 0;
1.180 daniel 1998: ctxt->disableSAX = 1;
1.96 daniel 1999: }
1.119 daniel 2000: xmlFree(name);
1.97 daniel 2001: }
2002: }
2003:
2004: /*
2005: * Macro used to grow the current buffer.
2006: */
2007: #define growBuffer(buffer) { \
2008: buffer##_size *= 2; \
1.145 daniel 2009: buffer = (xmlChar *) \
2010: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 2011: if (buffer == NULL) { \
2012: perror("realloc failed"); \
1.145 daniel 2013: return(NULL); \
1.97 daniel 2014: } \
1.96 daniel 2015: }
1.77 daniel 2016:
2017: /**
2018: * xmlDecodeEntities:
2019: * @ctxt: the parser context
2020: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2021: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 2022: * @end: an end marker xmlChar, 0 if none
2023: * @end2: an end marker xmlChar, 0 if none
2024: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 2025: *
2026: * [67] Reference ::= EntityRef | CharRef
2027: *
2028: * [69] PEReference ::= '%' Name ';'
2029: *
2030: * Returns A newly allocated string with the substitution done. The caller
2031: * must deallocate it !
2032: */
1.123 daniel 2033: xmlChar *
1.77 daniel 2034: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 2035: xmlChar end, xmlChar end2, xmlChar end3) {
2036: xmlChar *buffer = NULL;
1.202 daniel 2037: unsigned int buffer_size = 0;
2038: unsigned int nbchars = 0;
1.78 daniel 2039:
1.123 daniel 2040: xmlChar *current = NULL;
1.77 daniel 2041: xmlEntityPtr ent;
2042: unsigned int max = (unsigned int) len;
1.161 daniel 2043: int c,l;
1.77 daniel 2044:
1.185 daniel 2045: if (ctxt->depth > 40) {
2046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2047: ctxt->sax->error(ctxt->userData,
2048: "Detected entity reference loop\n");
2049: ctxt->wellFormed = 0;
2050: ctxt->disableSAX = 1;
2051: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2052: return(NULL);
2053: }
2054:
1.77 daniel 2055: /*
2056: * allocate a translation buffer.
2057: */
1.140 daniel 2058: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2059: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2060: if (buffer == NULL) {
2061: perror("xmlDecodeEntities: malloc failed");
2062: return(NULL);
2063: }
2064:
1.78 daniel 2065: /*
2066: * Ok loop until we reach one of the ending char or a size limit.
2067: */
1.161 daniel 2068: c = CUR_CHAR(l);
2069: while ((nbchars < max) && (c != end) &&
2070: (c != end2) && (c != end3)) {
1.77 daniel 2071:
1.161 daniel 2072: if (c == 0) break;
2073: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2074: int val = xmlParseCharRef(ctxt);
1.161 daniel 2075: COPY_BUF(0,buffer,nbchars,val);
2076: NEXTL(l);
2077: } else if ((c == '&') && (ctxt->token != '&') &&
2078: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2079: ent = xmlParseEntityRef(ctxt);
2080: if ((ent != NULL) &&
2081: (ctxt->replaceEntities != 0)) {
2082: current = ent->content;
2083: while (*current != 0) {
1.161 daniel 2084: buffer[nbchars++] = *current++;
2085: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2086: growBuffer(buffer);
1.77 daniel 2087: }
2088: }
1.98 daniel 2089: } else if (ent != NULL) {
1.123 daniel 2090: const xmlChar *cur = ent->name;
1.98 daniel 2091:
1.161 daniel 2092: buffer[nbchars++] = '&';
2093: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2094: growBuffer(buffer);
2095: }
1.161 daniel 2096: while (*cur != 0) {
2097: buffer[nbchars++] = *cur++;
2098: }
2099: buffer[nbchars++] = ';';
1.77 daniel 2100: }
1.161 daniel 2101: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2102: /*
1.77 daniel 2103: * a PEReference induce to switch the entity flow,
2104: * we break here to flush the current set of chars
2105: * parsed if any. We will be called back later.
1.97 daniel 2106: */
1.91 daniel 2107: if (nbchars != 0) break;
1.77 daniel 2108:
2109: xmlParsePEReference(ctxt);
1.79 daniel 2110:
1.97 daniel 2111: /*
1.79 daniel 2112: * Pop-up of finished entities.
1.97 daniel 2113: */
1.152 daniel 2114: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2115: xmlPopInput(ctxt);
2116:
1.98 daniel 2117: break;
1.77 daniel 2118: } else {
1.161 daniel 2119: COPY_BUF(l,buffer,nbchars,c);
2120: NEXTL(l);
2121: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2122: growBuffer(buffer);
2123: }
1.77 daniel 2124: }
1.161 daniel 2125: c = CUR_CHAR(l);
1.77 daniel 2126: }
1.161 daniel 2127: buffer[nbchars++] = 0;
1.77 daniel 2128: return(buffer);
2129: }
2130:
1.135 daniel 2131: /**
2132: * xmlStringDecodeEntities:
2133: * @ctxt: the parser context
2134: * @str: the input string
2135: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2136: * @end: an end marker xmlChar, 0 if none
2137: * @end2: an end marker xmlChar, 0 if none
2138: * @end3: an end marker xmlChar, 0 if none
2139: *
2140: * [67] Reference ::= EntityRef | CharRef
2141: *
2142: * [69] PEReference ::= '%' Name ';'
2143: *
2144: * Returns A newly allocated string with the substitution done. The caller
2145: * must deallocate it !
2146: */
2147: xmlChar *
2148: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2149: xmlChar end, xmlChar end2, xmlChar end3) {
2150: xmlChar *buffer = NULL;
2151: int buffer_size = 0;
2152:
2153: xmlChar *current = NULL;
2154: xmlEntityPtr ent;
1.176 daniel 2155: int c,l;
2156: int nbchars = 0;
1.135 daniel 2157:
1.211 ! veillard 2158: if (str == NULL)
! 2159: return(NULL);
! 2160:
1.185 daniel 2161: if (ctxt->depth > 40) {
2162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2163: ctxt->sax->error(ctxt->userData,
2164: "Detected entity reference loop\n");
2165: ctxt->wellFormed = 0;
2166: ctxt->disableSAX = 1;
2167: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2168: return(NULL);
2169: }
2170:
1.135 daniel 2171: /*
2172: * allocate a translation buffer.
2173: */
1.140 daniel 2174: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2175: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2176: if (buffer == NULL) {
2177: perror("xmlDecodeEntities: malloc failed");
2178: return(NULL);
2179: }
2180:
2181: /*
2182: * Ok loop until we reach one of the ending char or a size limit.
2183: */
1.176 daniel 2184: c = CUR_SCHAR(str, l);
2185: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2186:
1.176 daniel 2187: if (c == 0) break;
2188: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2189: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2190: if (val != 0) {
2191: COPY_BUF(0,buffer,nbchars,val);
2192: }
2193: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2194: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2195: if ((ent != NULL) && (ent->content != NULL)) {
2196: xmlChar *rep;
2197:
2198: ctxt->depth++;
2199: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2200: 0, 0, 0);
2201: ctxt->depth--;
2202: if (rep != NULL) {
2203: current = rep;
2204: while (*current != 0) {
2205: buffer[nbchars++] = *current++;
2206: if (nbchars >
2207: buffer_size - XML_PARSER_BUFFER_SIZE) {
2208: growBuffer(buffer);
2209: }
1.135 daniel 2210: }
1.185 daniel 2211: xmlFree(rep);
1.135 daniel 2212: }
2213: } else if (ent != NULL) {
2214: int i = xmlStrlen(ent->name);
2215: const xmlChar *cur = ent->name;
2216:
1.176 daniel 2217: buffer[nbchars++] = '&';
2218: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2219: growBuffer(buffer);
2220: }
2221: for (;i > 0;i--)
1.176 daniel 2222: buffer[nbchars++] = *cur++;
2223: buffer[nbchars++] = ';';
1.135 daniel 2224: }
1.176 daniel 2225: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2226: ent = xmlParseStringPEReference(ctxt, &str);
2227: if (ent != NULL) {
1.185 daniel 2228: xmlChar *rep;
2229:
2230: ctxt->depth++;
2231: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2232: 0, 0, 0);
2233: ctxt->depth--;
2234: if (rep != NULL) {
2235: current = rep;
2236: while (*current != 0) {
2237: buffer[nbchars++] = *current++;
2238: if (nbchars >
2239: buffer_size - XML_PARSER_BUFFER_SIZE) {
2240: growBuffer(buffer);
2241: }
1.135 daniel 2242: }
1.185 daniel 2243: xmlFree(rep);
1.135 daniel 2244: }
2245: }
2246: } else {
1.176 daniel 2247: COPY_BUF(l,buffer,nbchars,c);
2248: str += l;
2249: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2250: growBuffer(buffer);
2251: }
2252: }
1.176 daniel 2253: c = CUR_SCHAR(str, l);
1.135 daniel 2254: }
1.176 daniel 2255: buffer[nbchars++] = 0;
1.135 daniel 2256: return(buffer);
2257: }
2258:
1.1 veillard 2259:
1.28 daniel 2260: /************************************************************************
2261: * *
1.75 daniel 2262: * Commodity functions to handle encodings *
2263: * *
2264: ************************************************************************/
2265:
1.172 daniel 2266: /*
2267: * xmlCheckLanguageID
2268: * @lang: pointer to the string value
2269: *
2270: * Checks that the value conforms to the LanguageID production:
2271: *
2272: * [33] LanguageID ::= Langcode ('-' Subcode)*
2273: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2274: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2275: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2276: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2277: * [38] Subcode ::= ([a-z] | [A-Z])+
2278: *
2279: * Returns 1 if correct 0 otherwise
2280: **/
2281: int
2282: xmlCheckLanguageID(const xmlChar *lang) {
2283: const xmlChar *cur = lang;
2284:
2285: if (cur == NULL)
2286: return(0);
2287: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2288: ((cur[0] == 'I') && (cur[1] == '-'))) {
2289: /*
2290: * IANA code
2291: */
2292: cur += 2;
2293: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2294: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2295: cur++;
2296: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2297: ((cur[0] == 'X') && (cur[1] == '-'))) {
2298: /*
2299: * User code
2300: */
2301: cur += 2;
2302: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2303: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2304: cur++;
2305: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2306: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2307: /*
2308: * ISO639
2309: */
2310: cur++;
2311: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2312: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2313: cur++;
2314: else
2315: return(0);
2316: } else
2317: return(0);
2318: while (cur[0] != 0) {
2319: if (cur[0] != '-')
2320: return(0);
2321: cur++;
2322: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2323: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2324: cur++;
2325: else
2326: return(0);
2327: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2328: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2329: cur++;
2330: }
2331: return(1);
2332: }
2333:
1.75 daniel 2334: /**
2335: * xmlSwitchEncoding:
2336: * @ctxt: the parser context
1.124 daniel 2337: * @enc: the encoding value (number)
1.75 daniel 2338: *
2339: * change the input functions when discovering the character encoding
2340: * of a given entity.
1.193 daniel 2341: *
2342: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2343: */
1.193 daniel 2344: int
1.75 daniel 2345: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2346: {
1.156 daniel 2347: xmlCharEncodingHandlerPtr handler;
2348:
1.193 daniel 2349: switch (enc) {
2350: case XML_CHAR_ENCODING_ERROR:
2351: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2354: ctxt->wellFormed = 0;
2355: ctxt->disableSAX = 1;
2356: break;
2357: case XML_CHAR_ENCODING_NONE:
2358: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2359: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2360: return(0);
2361: case XML_CHAR_ENCODING_UTF8:
2362: /* default encoding, no conversion should be needed */
1.198 daniel 2363: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2364: return(0);
2365: default:
2366: break;
2367: }
1.156 daniel 2368: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2369: if (handler == NULL) {
2370: /*
2371: * Default handlers.
2372: */
2373: switch (enc) {
2374: case XML_CHAR_ENCODING_ERROR:
2375: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2377: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2378: ctxt->wellFormed = 0;
2379: ctxt->disableSAX = 1;
1.198 daniel 2380: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2381: break;
2382: case XML_CHAR_ENCODING_NONE:
2383: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2384: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2385: return(0);
2386: case XML_CHAR_ENCODING_UTF8:
1.211 ! veillard 2387: case XML_CHAR_ENCODING_ASCII:
1.193 daniel 2388: /* default encoding, no conversion should be needed */
1.198 daniel 2389: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2390: return(0);
2391: case XML_CHAR_ENCODING_UTF16LE:
2392: break;
2393: case XML_CHAR_ENCODING_UTF16BE:
2394: break;
2395: case XML_CHAR_ENCODING_UCS4LE:
2396: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2398: ctxt->sax->error(ctxt->userData,
2399: "char encoding USC4 little endian not supported\n");
2400: break;
2401: case XML_CHAR_ENCODING_UCS4BE:
2402: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2404: ctxt->sax->error(ctxt->userData,
2405: "char encoding USC4 big endian not supported\n");
2406: break;
2407: case XML_CHAR_ENCODING_EBCDIC:
2408: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2409: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2410: ctxt->sax->error(ctxt->userData,
2411: "char encoding EBCDIC not supported\n");
2412: break;
2413: case XML_CHAR_ENCODING_UCS4_2143:
2414: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2415: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2416: ctxt->sax->error(ctxt->userData,
2417: "char encoding UCS4 2143 not supported\n");
2418: break;
2419: case XML_CHAR_ENCODING_UCS4_3412:
2420: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2422: ctxt->sax->error(ctxt->userData,
2423: "char encoding UCS4 3412 not supported\n");
2424: break;
2425: case XML_CHAR_ENCODING_UCS2:
2426: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2428: ctxt->sax->error(ctxt->userData,
2429: "char encoding UCS2 not supported\n");
2430: break;
2431: case XML_CHAR_ENCODING_8859_1:
2432: case XML_CHAR_ENCODING_8859_2:
2433: case XML_CHAR_ENCODING_8859_3:
2434: case XML_CHAR_ENCODING_8859_4:
2435: case XML_CHAR_ENCODING_8859_5:
2436: case XML_CHAR_ENCODING_8859_6:
2437: case XML_CHAR_ENCODING_8859_7:
2438: case XML_CHAR_ENCODING_8859_8:
2439: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2440: /*
1.203 veillard 2441: * We used to keep the internal content in the
2442: * document encoding however this turns being unmaintainable
2443: * So xmlGetCharEncodingHandler() will return non-null
2444: * values for this now.
1.195 daniel 2445: */
2446: if ((ctxt->inputNr == 1) &&
2447: (ctxt->encoding == NULL) &&
2448: (ctxt->input->encoding != NULL)) {
2449: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2450: }
1.198 daniel 2451: ctxt->charset = enc;
1.195 daniel 2452: return(0);
1.193 daniel 2453: case XML_CHAR_ENCODING_2022_JP:
2454: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2456: ctxt->sax->error(ctxt->userData,
2457: "char encoding ISO-2022-JPnot supported\n");
2458: break;
2459: case XML_CHAR_ENCODING_SHIFT_JIS:
2460: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2461: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2462: ctxt->sax->error(ctxt->userData,
2463: "char encoding Shift_JIS not supported\n");
2464: break;
2465: case XML_CHAR_ENCODING_EUC_JP:
2466: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2468: ctxt->sax->error(ctxt->userData,
2469: "char encoding EUC-JPnot supported\n");
2470: break;
2471: }
2472: }
2473: if (handler == NULL)
2474: return(-1);
1.198 daniel 2475: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2476: return(xmlSwitchToEncoding(ctxt, handler));
2477: }
2478:
2479: /**
2480: * xmlSwitchToEncoding:
2481: * @ctxt: the parser context
2482: * @handler: the encoding handler
2483: *
2484: * change the input functions when discovering the character encoding
2485: * of a given entity.
2486: *
2487: * Returns 0 in case of success, -1 otherwise
2488: */
2489: int
2490: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2491: {
1.194 daniel 2492: int nbchars;
2493:
1.156 daniel 2494: if (handler != NULL) {
2495: if (ctxt->input != NULL) {
2496: if (ctxt->input->buf != NULL) {
2497: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2498: if (ctxt->input->buf->encoder == handler)
2499: return(0);
1.197 daniel 2500: /*
2501: * Note: this is a bit dangerous, but that's what it
2502: * takes to use nearly compatible signature for different
2503: * encodings.
2504: */
2505: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2506: ctxt->input->buf->encoder = handler;
2507: return(0);
1.156 daniel 2508: }
2509: ctxt->input->buf->encoder = handler;
2510:
2511: /*
1.194 daniel 2512: * Is there already some content down the pipe to convert ?
1.156 daniel 2513: */
2514: if ((ctxt->input->buf->buffer != NULL) &&
2515: (ctxt->input->buf->buffer->use > 0)) {
2516: int processed;
2517:
2518: /*
2519: * Specific handling of the Byte Order Mark for
2520: * UTF-16
2521: */
1.195 daniel 2522: if ((handler->name != NULL) &&
2523: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2524: (ctxt->input->cur[0] == 0xFF) &&
2525: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2526: ctxt->input->cur += 2;
1.156 daniel 2527: }
1.195 daniel 2528: if ((handler->name != NULL) &&
2529: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2530: (ctxt->input->cur[0] == 0xFE) &&
2531: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2532: ctxt->input->cur += 2;
1.156 daniel 2533: }
2534:
2535: /*
1.194 daniel 2536: * Shring the current input buffer.
2537: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2538: */
2539: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2540: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2541: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2542: ctxt->input->buf->buffer = xmlBufferCreate();
2543:
2544: /*
1.197 daniel 2545: * convert just enough to get
2546: * '<?xml version="1.0" encoding="xxx"?>'
2547: * parsed with the autodetected encoding
2548: * into the parser reading buffer.
1.194 daniel 2549: */
1.197 daniel 2550: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2551: ctxt->input->buf->buffer,
2552: ctxt->input->buf->raw);
1.194 daniel 2553: if (nbchars < 0) {
2554: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2555: return(-1);
1.156 daniel 2556: }
1.194 daniel 2557: ctxt->input->base =
2558: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2559: }
1.193 daniel 2560: return(0);
1.156 daniel 2561: } else {
1.209 veillard 2562: if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1.156 daniel 2563: /*
2564: * When parsing a static memory array one must know the
2565: * size to be able to convert the buffer.
2566: */
2567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2568: ctxt->sax->error(ctxt->userData,
2569: "xmlSwitchEncoding : no input\n");
1.193 daniel 2570: return(-1);
1.156 daniel 2571: } else {
1.194 daniel 2572: int processed;
2573:
2574: /*
2575: * Shring the current input buffer.
2576: * Move it as the raw buffer and create a new input buffer
2577: */
2578: processed = ctxt->input->cur - ctxt->input->base;
1.209 veillard 2579:
1.194 daniel 2580: ctxt->input->buf->raw = xmlBufferCreate();
2581: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1.209 veillard 2582: ctxt->input->length - processed);
1.194 daniel 2583: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2584:
2585: /*
1.194 daniel 2586: * convert as much as possible of the raw input
2587: * to the parser reading buffer.
2588: */
2589: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2590: ctxt->input->buf->buffer,
2591: ctxt->input->buf->raw);
2592: if (nbchars < 0) {
2593: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2594: return(-1);
1.156 daniel 2595: }
1.194 daniel 2596:
1.156 daniel 2597: /*
2598: * Conversion succeeded, get rid of the old buffer
2599: */
2600: if ((ctxt->input->free != NULL) &&
2601: (ctxt->input->base != NULL))
2602: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2603: ctxt->input->base =
2604: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2605: }
2606: }
2607: } else {
2608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2609: ctxt->sax->error(ctxt->userData,
2610: "xmlSwitchEncoding : no input\n");
1.193 daniel 2611: return(-1);
1.156 daniel 2612: }
1.195 daniel 2613: /*
2614: * The parsing is now done in UTF8 natively
2615: */
1.198 daniel 2616: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2617: } else
2618: return(-1);
2619: return(0);
1.156 daniel 2620:
1.75 daniel 2621: }
2622:
2623: /************************************************************************
2624: * *
1.123 daniel 2625: * Commodity functions to handle xmlChars *
1.28 daniel 2626: * *
2627: ************************************************************************/
2628:
1.50 daniel 2629: /**
2630: * xmlStrndup:
1.123 daniel 2631: * @cur: the input xmlChar *
1.50 daniel 2632: * @len: the len of @cur
2633: *
1.123 daniel 2634: * a strndup for array of xmlChar's
1.68 daniel 2635: *
1.123 daniel 2636: * Returns a new xmlChar * or NULL
1.1 veillard 2637: */
1.123 daniel 2638: xmlChar *
2639: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2640: xmlChar *ret;
2641:
2642: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2643: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2644: if (ret == NULL) {
1.86 daniel 2645: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2646: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2647: return(NULL);
2648: }
1.123 daniel 2649: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2650: ret[len] = 0;
2651: return(ret);
2652: }
2653:
1.50 daniel 2654: /**
2655: * xmlStrdup:
1.123 daniel 2656: * @cur: the input xmlChar *
1.50 daniel 2657: *
1.152 daniel 2658: * a strdup for array of xmlChar's. Since they are supposed to be
2659: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2660: * a termination mark of '0'.
1.68 daniel 2661: *
1.123 daniel 2662: * Returns a new xmlChar * or NULL
1.1 veillard 2663: */
1.123 daniel 2664: xmlChar *
2665: xmlStrdup(const xmlChar *cur) {
2666: const xmlChar *p = cur;
1.1 veillard 2667:
1.135 daniel 2668: if (cur == NULL) return(NULL);
1.152 daniel 2669: while (*p != 0) p++;
1.1 veillard 2670: return(xmlStrndup(cur, p - cur));
2671: }
2672:
1.50 daniel 2673: /**
2674: * xmlCharStrndup:
2675: * @cur: the input char *
2676: * @len: the len of @cur
2677: *
1.123 daniel 2678: * a strndup for char's to xmlChar's
1.68 daniel 2679: *
1.123 daniel 2680: * Returns a new xmlChar * or NULL
1.45 daniel 2681: */
2682:
1.123 daniel 2683: xmlChar *
1.55 daniel 2684: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2685: int i;
1.135 daniel 2686: xmlChar *ret;
2687:
2688: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2689: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2690: if (ret == NULL) {
1.86 daniel 2691: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2692: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2693: return(NULL);
2694: }
2695: for (i = 0;i < len;i++)
1.123 daniel 2696: ret[i] = (xmlChar) cur[i];
1.45 daniel 2697: ret[len] = 0;
2698: return(ret);
2699: }
2700:
1.50 daniel 2701: /**
2702: * xmlCharStrdup:
2703: * @cur: the input char *
2704: * @len: the len of @cur
2705: *
1.123 daniel 2706: * a strdup for char's to xmlChar's
1.68 daniel 2707: *
1.123 daniel 2708: * Returns a new xmlChar * or NULL
1.45 daniel 2709: */
2710:
1.123 daniel 2711: xmlChar *
1.55 daniel 2712: xmlCharStrdup(const char *cur) {
1.45 daniel 2713: const char *p = cur;
2714:
1.135 daniel 2715: if (cur == NULL) return(NULL);
1.45 daniel 2716: while (*p != '\0') p++;
2717: return(xmlCharStrndup(cur, p - cur));
2718: }
2719:
1.50 daniel 2720: /**
2721: * xmlStrcmp:
1.123 daniel 2722: * @str1: the first xmlChar *
2723: * @str2: the second xmlChar *
1.50 daniel 2724: *
1.123 daniel 2725: * a strcmp for xmlChar's
1.68 daniel 2726: *
2727: * Returns the integer result of the comparison
1.14 veillard 2728: */
2729:
1.55 daniel 2730: int
1.123 daniel 2731: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2732: register int tmp;
2733:
1.135 daniel 2734: if ((str1 == NULL) && (str2 == NULL)) return(0);
2735: if (str1 == NULL) return(-1);
2736: if (str2 == NULL) return(1);
1.14 veillard 2737: do {
2738: tmp = *str1++ - *str2++;
2739: if (tmp != 0) return(tmp);
2740: } while ((*str1 != 0) && (*str2 != 0));
2741: return (*str1 - *str2);
2742: }
2743:
1.50 daniel 2744: /**
2745: * xmlStrncmp:
1.123 daniel 2746: * @str1: the first xmlChar *
2747: * @str2: the second xmlChar *
1.50 daniel 2748: * @len: the max comparison length
2749: *
1.123 daniel 2750: * a strncmp for xmlChar's
1.68 daniel 2751: *
2752: * Returns the integer result of the comparison
1.14 veillard 2753: */
2754:
1.55 daniel 2755: int
1.123 daniel 2756: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2757: register int tmp;
2758:
2759: if (len <= 0) return(0);
1.135 daniel 2760: if ((str1 == NULL) && (str2 == NULL)) return(0);
2761: if (str1 == NULL) return(-1);
2762: if (str2 == NULL) return(1);
1.14 veillard 2763: do {
2764: tmp = *str1++ - *str2++;
2765: if (tmp != 0) return(tmp);
2766: len--;
2767: if (len <= 0) return(0);
2768: } while ((*str1 != 0) && (*str2 != 0));
2769: return (*str1 - *str2);
2770: }
2771:
1.50 daniel 2772: /**
2773: * xmlStrchr:
1.123 daniel 2774: * @str: the xmlChar * array
2775: * @val: the xmlChar to search
1.50 daniel 2776: *
1.123 daniel 2777: * a strchr for xmlChar's
1.68 daniel 2778: *
1.123 daniel 2779: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2780: */
2781:
1.123 daniel 2782: const xmlChar *
2783: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2784: if (str == NULL) return(NULL);
1.14 veillard 2785: while (*str != 0) {
1.123 daniel 2786: if (*str == val) return((xmlChar *) str);
1.14 veillard 2787: str++;
2788: }
2789: return(NULL);
1.89 daniel 2790: }
2791:
2792: /**
2793: * xmlStrstr:
1.123 daniel 2794: * @str: the xmlChar * array (haystack)
2795: * @val: the xmlChar to search (needle)
1.89 daniel 2796: *
1.123 daniel 2797: * a strstr for xmlChar's
1.89 daniel 2798: *
1.123 daniel 2799: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2800: */
2801:
1.123 daniel 2802: const xmlChar *
2803: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2804: int n;
2805:
2806: if (str == NULL) return(NULL);
2807: if (val == NULL) return(NULL);
2808: n = xmlStrlen(val);
2809:
2810: if (n == 0) return(str);
2811: while (*str != 0) {
2812: if (*str == *val) {
1.123 daniel 2813: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2814: }
2815: str++;
2816: }
2817: return(NULL);
2818: }
2819:
2820: /**
2821: * xmlStrsub:
1.123 daniel 2822: * @str: the xmlChar * array (haystack)
1.89 daniel 2823: * @start: the index of the first char (zero based)
2824: * @len: the length of the substring
2825: *
2826: * Extract a substring of a given string
2827: *
1.123 daniel 2828: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2829: */
2830:
1.123 daniel 2831: xmlChar *
2832: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2833: int i;
2834:
2835: if (str == NULL) return(NULL);
2836: if (start < 0) return(NULL);
1.90 daniel 2837: if (len < 0) return(NULL);
1.89 daniel 2838:
2839: for (i = 0;i < start;i++) {
2840: if (*str == 0) return(NULL);
2841: str++;
2842: }
2843: if (*str == 0) return(NULL);
2844: return(xmlStrndup(str, len));
1.14 veillard 2845: }
1.28 daniel 2846:
1.50 daniel 2847: /**
2848: * xmlStrlen:
1.123 daniel 2849: * @str: the xmlChar * array
1.50 daniel 2850: *
1.127 daniel 2851: * length of a xmlChar's string
1.68 daniel 2852: *
1.123 daniel 2853: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2854: */
2855:
1.55 daniel 2856: int
1.123 daniel 2857: xmlStrlen(const xmlChar *str) {
1.45 daniel 2858: int len = 0;
2859:
2860: if (str == NULL) return(0);
2861: while (*str != 0) {
2862: str++;
2863: len++;
2864: }
2865: return(len);
2866: }
2867:
1.50 daniel 2868: /**
2869: * xmlStrncat:
1.123 daniel 2870: * @cur: the original xmlChar * array
2871: * @add: the xmlChar * array added
1.50 daniel 2872: * @len: the length of @add
2873: *
1.123 daniel 2874: * a strncat for array of xmlChar's
1.68 daniel 2875: *
1.123 daniel 2876: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2877: */
2878:
1.123 daniel 2879: xmlChar *
2880: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2881: int size;
1.123 daniel 2882: xmlChar *ret;
1.45 daniel 2883:
2884: if ((add == NULL) || (len == 0))
2885: return(cur);
2886: if (cur == NULL)
2887: return(xmlStrndup(add, len));
2888:
2889: size = xmlStrlen(cur);
1.204 veillard 2890: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2891: if (ret == NULL) {
1.86 daniel 2892: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2893: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2894: return(cur);
2895: }
1.123 daniel 2896: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2897: ret[size + len] = 0;
2898: return(ret);
2899: }
2900:
1.50 daniel 2901: /**
2902: * xmlStrcat:
1.123 daniel 2903: * @cur: the original xmlChar * array
2904: * @add: the xmlChar * array added
1.50 daniel 2905: *
1.152 daniel 2906: * a strcat for array of xmlChar's. Since they are supposed to be
2907: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2908: * a termination mark of '0'.
1.68 daniel 2909: *
1.123 daniel 2910: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2911: */
1.123 daniel 2912: xmlChar *
2913: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2914: const xmlChar *p = add;
1.45 daniel 2915:
2916: if (add == NULL) return(cur);
2917: if (cur == NULL)
2918: return(xmlStrdup(add));
2919:
1.152 daniel 2920: while (*p != 0) p++;
1.45 daniel 2921: return(xmlStrncat(cur, add, p - add));
2922: }
2923:
2924: /************************************************************************
2925: * *
2926: * Commodity functions, cleanup needed ? *
2927: * *
2928: ************************************************************************/
2929:
1.50 daniel 2930: /**
2931: * areBlanks:
2932: * @ctxt: an XML parser context
1.123 daniel 2933: * @str: a xmlChar *
1.50 daniel 2934: * @len: the size of @str
2935: *
1.45 daniel 2936: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2937: *
1.68 daniel 2938: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2939: */
2940:
1.123 daniel 2941: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2942: int i, ret;
1.45 daniel 2943: xmlNodePtr lastChild;
2944:
1.176 daniel 2945: /*
2946: * Check for xml:space value.
2947: */
2948: if (*(ctxt->space) == 1)
2949: return(0);
2950:
2951: /*
2952: * Check that the string is made of blanks
2953: */
1.45 daniel 2954: for (i = 0;i < len;i++)
2955: if (!(IS_BLANK(str[i]))) return(0);
2956:
1.176 daniel 2957: /*
2958: * Look if the element is mixed content in the Dtd if available
2959: */
1.104 daniel 2960: if (ctxt->myDoc != NULL) {
2961: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2962: if (ret == 0) return(1);
2963: if (ret == 1) return(0);
2964: }
1.176 daniel 2965:
1.104 daniel 2966: /*
1.176 daniel 2967: * Otherwise, heuristic :-\
1.104 daniel 2968: */
1.179 daniel 2969: if (ctxt->keepBlanks)
2970: return(0);
2971: if (RAW != '<') return(0);
2972: if (ctxt->node == NULL) return(0);
2973: if ((ctxt->node->children == NULL) &&
2974: (RAW == '<') && (NXT(1) == '/')) return(0);
2975:
1.45 daniel 2976: lastChild = xmlGetLastChild(ctxt->node);
2977: if (lastChild == NULL) {
2978: if (ctxt->node->content != NULL) return(0);
2979: } else if (xmlNodeIsText(lastChild))
2980: return(0);
1.157 daniel 2981: else if ((ctxt->node->children != NULL) &&
2982: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2983: return(0);
1.45 daniel 2984: return(1);
2985: }
2986:
1.50 daniel 2987: /**
2988: * xmlHandleEntity:
2989: * @ctxt: an XML parser context
2990: * @entity: an XML entity pointer.
2991: *
2992: * Default handling of defined entities, when should we define a new input
1.45 daniel 2993: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2994: *
2995: * OBSOLETE: to be removed at some point.
1.45 daniel 2996: */
2997:
1.55 daniel 2998: void
2999: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 3000: int len;
1.50 daniel 3001: xmlParserInputPtr input;
1.45 daniel 3002:
3003: if (entity->content == NULL) {
1.123 daniel 3004: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3005: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3006: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 3007: entity->name);
1.59 daniel 3008: ctxt->wellFormed = 0;
1.180 daniel 3009: ctxt->disableSAX = 1;
1.45 daniel 3010: return;
3011: }
3012: len = xmlStrlen(entity->content);
3013: if (len <= 2) goto handle_as_char;
3014:
3015: /*
3016: * Redefine its content as an input stream.
3017: */
1.50 daniel 3018: input = xmlNewEntityInputStream(ctxt, entity);
3019: xmlPushInput(ctxt, input);
1.45 daniel 3020: return;
3021:
3022: handle_as_char:
3023: /*
3024: * Just handle the content as a set of chars.
3025: */
1.171 daniel 3026: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3027: (ctxt->sax->characters != NULL))
1.74 daniel 3028: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 3029:
3030: }
3031:
3032: /*
3033: * Forward definition for recusive behaviour.
3034: */
1.77 daniel 3035: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3036: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 3037:
1.28 daniel 3038: /************************************************************************
3039: * *
3040: * Extra stuff for namespace support *
3041: * Relates to http://www.w3.org/TR/WD-xml-names *
3042: * *
3043: ************************************************************************/
3044:
1.50 daniel 3045: /**
3046: * xmlNamespaceParseNCName:
3047: * @ctxt: an XML parser context
3048: *
3049: * parse an XML namespace name.
1.28 daniel 3050: *
3051: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3052: *
3053: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3054: * CombiningChar | Extender
1.68 daniel 3055: *
3056: * Returns the namespace name or NULL
1.28 daniel 3057: */
3058:
1.123 daniel 3059: xmlChar *
1.55 daniel 3060: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3061: xmlChar buf[XML_MAX_NAMELEN + 5];
3062: int len = 0, l;
3063: int cur = CUR_CHAR(l);
1.28 daniel 3064:
1.156 daniel 3065: /* load first the value of the char !!! */
1.152 daniel 3066: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3067:
1.152 daniel 3068: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3069: (cur == '.') || (cur == '-') ||
3070: (cur == '_') ||
3071: (IS_COMBINING(cur)) ||
3072: (IS_EXTENDER(cur))) {
3073: COPY_BUF(l,buf,len,cur);
3074: NEXTL(l);
3075: cur = CUR_CHAR(l);
1.91 daniel 3076: if (len >= XML_MAX_NAMELEN) {
3077: fprintf(stderr,
3078: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3079: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3080: (cur == '.') || (cur == '-') ||
3081: (cur == '_') ||
3082: (IS_COMBINING(cur)) ||
3083: (IS_EXTENDER(cur))) {
3084: NEXTL(l);
3085: cur = CUR_CHAR(l);
3086: }
1.91 daniel 3087: break;
3088: }
3089: }
3090: return(xmlStrndup(buf, len));
1.28 daniel 3091: }
3092:
1.50 daniel 3093: /**
3094: * xmlNamespaceParseQName:
3095: * @ctxt: an XML parser context
1.123 daniel 3096: * @prefix: a xmlChar **
1.50 daniel 3097: *
3098: * parse an XML qualified name
1.28 daniel 3099: *
3100: * [NS 5] QName ::= (Prefix ':')? LocalPart
3101: *
3102: * [NS 6] Prefix ::= NCName
3103: *
3104: * [NS 7] LocalPart ::= NCName
1.68 daniel 3105: *
1.127 daniel 3106: * Returns the local part, and prefix is updated
1.50 daniel 3107: * to get the Prefix if any.
1.28 daniel 3108: */
3109:
1.123 daniel 3110: xmlChar *
3111: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3112: xmlChar *ret = NULL;
1.28 daniel 3113:
3114: *prefix = NULL;
3115: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3116: if (RAW == ':') {
1.28 daniel 3117: *prefix = ret;
1.40 daniel 3118: NEXT;
1.28 daniel 3119: ret = xmlNamespaceParseNCName(ctxt);
3120: }
3121:
3122: return(ret);
3123: }
3124:
1.50 daniel 3125: /**
1.72 daniel 3126: * xmlSplitQName:
1.162 daniel 3127: * @ctxt: an XML parser context
1.72 daniel 3128: * @name: an XML parser context
1.123 daniel 3129: * @prefix: a xmlChar **
1.72 daniel 3130: *
1.206 veillard 3131: * parse an UTF8 encoded XML qualified name string
1.72 daniel 3132: *
3133: * [NS 5] QName ::= (Prefix ':')? LocalPart
3134: *
3135: * [NS 6] Prefix ::= NCName
3136: *
3137: * [NS 7] LocalPart ::= NCName
3138: *
1.127 daniel 3139: * Returns the local part, and prefix is updated
1.72 daniel 3140: * to get the Prefix if any.
3141: */
3142:
1.123 daniel 3143: xmlChar *
1.162 daniel 3144: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3145: xmlChar buf[XML_MAX_NAMELEN + 5];
3146: int len = 0;
1.123 daniel 3147: xmlChar *ret = NULL;
3148: const xmlChar *cur = name;
1.206 veillard 3149: int c;
1.72 daniel 3150:
3151: *prefix = NULL;
1.113 daniel 3152:
3153: /* xml: prefix is not really a namespace */
3154: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3155: (cur[2] == 'l') && (cur[3] == ':'))
3156: return(xmlStrdup(name));
3157:
1.162 daniel 3158: /* nasty but valid */
3159: if (cur[0] == ':')
3160: return(xmlStrdup(name));
3161:
1.206 veillard 3162: c = *cur++;
3163: while ((c != 0) && (c != ':')) {
3164: buf[len++] = c;
3165: c = *cur++;
1.162 daniel 3166: }
1.72 daniel 3167:
1.162 daniel 3168: ret = xmlStrndup(buf, len);
1.72 daniel 3169:
1.162 daniel 3170: if (c == ':') {
1.206 veillard 3171: c = *cur++;
3172: if (c == 0) return(ret);
1.72 daniel 3173: *prefix = ret;
1.162 daniel 3174: len = 0;
1.72 daniel 3175:
1.206 veillard 3176: while (c != 0) {
3177: buf[len++] = c;
3178: c = *cur++;
1.162 daniel 3179: }
1.72 daniel 3180:
1.162 daniel 3181: ret = xmlStrndup(buf, len);
1.72 daniel 3182: }
3183:
3184: return(ret);
3185: }
1.206 veillard 3186:
1.72 daniel 3187: /**
1.50 daniel 3188: * xmlNamespaceParseNSDef:
3189: * @ctxt: an XML parser context
3190: *
3191: * parse a namespace prefix declaration
1.28 daniel 3192: *
3193: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3194: *
3195: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3196: *
3197: * Returns the namespace name
1.28 daniel 3198: */
3199:
1.123 daniel 3200: xmlChar *
1.55 daniel 3201: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3202: xmlChar *name = NULL;
1.28 daniel 3203:
1.152 daniel 3204: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3205: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3206: (NXT(4) == 's')) {
3207: SKIP(5);
1.152 daniel 3208: if (RAW == ':') {
1.40 daniel 3209: NEXT;
1.28 daniel 3210: name = xmlNamespaceParseNCName(ctxt);
3211: }
3212: }
1.39 daniel 3213: return(name);
1.28 daniel 3214: }
3215:
1.50 daniel 3216: /**
3217: * xmlParseQuotedString:
3218: * @ctxt: an XML parser context
3219: *
1.45 daniel 3220: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3221: * To be removed at next drop of binary compatibility
1.68 daniel 3222: *
3223: * Returns the string parser or NULL.
1.45 daniel 3224: */
1.123 daniel 3225: xmlChar *
1.55 daniel 3226: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3227: xmlChar *buf = NULL;
1.152 daniel 3228: int len = 0,l;
1.140 daniel 3229: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3230: int c;
1.45 daniel 3231:
1.135 daniel 3232: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3233: if (buf == NULL) {
3234: fprintf(stderr, "malloc of %d byte failed\n", size);
3235: return(NULL);
3236: }
1.152 daniel 3237: if (RAW == '"') {
1.45 daniel 3238: NEXT;
1.152 daniel 3239: c = CUR_CHAR(l);
1.135 daniel 3240: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3241: if (len + 5 >= size) {
1.135 daniel 3242: size *= 2;
1.204 veillard 3243: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3244: if (buf == NULL) {
3245: fprintf(stderr, "realloc of %d byte failed\n", size);
3246: return(NULL);
3247: }
3248: }
1.152 daniel 3249: COPY_BUF(l,buf,len,c);
3250: NEXTL(l);
3251: c = CUR_CHAR(l);
1.135 daniel 3252: }
3253: if (c != '"') {
1.123 daniel 3254: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3256: ctxt->sax->error(ctxt->userData,
3257: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3258: ctxt->wellFormed = 0;
1.180 daniel 3259: ctxt->disableSAX = 1;
1.55 daniel 3260: } else {
1.45 daniel 3261: NEXT;
3262: }
1.152 daniel 3263: } else if (RAW == '\''){
1.45 daniel 3264: NEXT;
1.135 daniel 3265: c = CUR;
3266: while (IS_CHAR(c) && (c != '\'')) {
3267: if (len + 1 >= size) {
3268: size *= 2;
1.204 veillard 3269: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3270: if (buf == NULL) {
3271: fprintf(stderr, "realloc of %d byte failed\n", size);
3272: return(NULL);
3273: }
3274: }
3275: buf[len++] = c;
3276: NEXT;
3277: c = CUR;
3278: }
1.152 daniel 3279: if (RAW != '\'') {
1.123 daniel 3280: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3281: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3282: ctxt->sax->error(ctxt->userData,
3283: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3284: ctxt->wellFormed = 0;
1.180 daniel 3285: ctxt->disableSAX = 1;
1.55 daniel 3286: } else {
1.45 daniel 3287: NEXT;
3288: }
3289: }
1.135 daniel 3290: return(buf);
1.45 daniel 3291: }
3292:
1.50 daniel 3293: /**
3294: * xmlParseNamespace:
3295: * @ctxt: an XML parser context
3296: *
1.45 daniel 3297: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3298: *
3299: * This is what the older xml-name Working Draft specified, a bunch of
3300: * other stuff may still rely on it, so support is still here as
1.127 daniel 3301: * if it was declared on the root of the Tree:-(
1.110 daniel 3302: *
3303: * To be removed at next drop of binary compatibility
1.45 daniel 3304: */
3305:
1.55 daniel 3306: void
3307: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3308: xmlChar *href = NULL;
3309: xmlChar *prefix = NULL;
1.45 daniel 3310: int garbage = 0;
3311:
3312: /*
3313: * We just skipped "namespace" or "xml:namespace"
3314: */
3315: SKIP_BLANKS;
3316:
1.153 daniel 3317: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3318: /*
3319: * We can have "ns" or "prefix" attributes
3320: * Old encoding as 'href' or 'AS' attributes is still supported
3321: */
1.152 daniel 3322: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3323: garbage = 0;
3324: SKIP(2);
3325: SKIP_BLANKS;
3326:
1.152 daniel 3327: if (RAW != '=') continue;
1.45 daniel 3328: NEXT;
3329: SKIP_BLANKS;
3330:
3331: href = xmlParseQuotedString(ctxt);
3332: SKIP_BLANKS;
1.152 daniel 3333: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3334: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3335: garbage = 0;
3336: SKIP(4);
3337: SKIP_BLANKS;
3338:
1.152 daniel 3339: if (RAW != '=') continue;
1.45 daniel 3340: NEXT;
3341: SKIP_BLANKS;
3342:
3343: href = xmlParseQuotedString(ctxt);
3344: SKIP_BLANKS;
1.152 daniel 3345: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3346: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3347: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3348: garbage = 0;
3349: SKIP(6);
3350: SKIP_BLANKS;
3351:
1.152 daniel 3352: if (RAW != '=') continue;
1.45 daniel 3353: NEXT;
3354: SKIP_BLANKS;
3355:
3356: prefix = xmlParseQuotedString(ctxt);
3357: SKIP_BLANKS;
1.152 daniel 3358: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3359: garbage = 0;
3360: SKIP(2);
3361: SKIP_BLANKS;
3362:
1.152 daniel 3363: if (RAW != '=') continue;
1.45 daniel 3364: NEXT;
3365: SKIP_BLANKS;
3366:
3367: prefix = xmlParseQuotedString(ctxt);
3368: SKIP_BLANKS;
1.152 daniel 3369: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3370: garbage = 0;
1.91 daniel 3371: NEXT;
1.45 daniel 3372: } else {
3373: /*
3374: * Found garbage when parsing the namespace
3375: */
1.122 daniel 3376: if (!garbage) {
1.55 daniel 3377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3378: ctxt->sax->error(ctxt->userData,
3379: "xmlParseNamespace found garbage\n");
3380: }
1.123 daniel 3381: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3382: ctxt->wellFormed = 0;
1.180 daniel 3383: ctxt->disableSAX = 1;
1.45 daniel 3384: NEXT;
3385: }
3386: }
3387:
3388: MOVETO_ENDTAG(CUR_PTR);
3389: NEXT;
3390:
3391: /*
3392: * Register the DTD.
1.72 daniel 3393: if (href != NULL)
3394: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3395: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3396: */
3397:
1.119 daniel 3398: if (prefix != NULL) xmlFree(prefix);
3399: if (href != NULL) xmlFree(href);
1.45 daniel 3400: }
3401:
1.28 daniel 3402: /************************************************************************
3403: * *
3404: * The parser itself *
3405: * Relates to http://www.w3.org/TR/REC-xml *
3406: * *
3407: ************************************************************************/
1.14 veillard 3408:
1.50 daniel 3409: /**
1.97 daniel 3410: * xmlScanName:
3411: * @ctxt: an XML parser context
3412: *
3413: * Trickery: parse an XML name but without consuming the input flow
3414: * Needed for rollback cases.
3415: *
3416: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3417: * CombiningChar | Extender
3418: *
3419: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3420: *
3421: * [6] Names ::= Name (S Name)*
3422: *
3423: * Returns the Name parsed or NULL
3424: */
3425:
1.123 daniel 3426: xmlChar *
1.97 daniel 3427: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3428: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3429: int len = 0;
3430:
3431: GROW;
1.152 daniel 3432: if (!IS_LETTER(RAW) && (RAW != '_') &&
3433: (RAW != ':')) {
1.97 daniel 3434: return(NULL);
3435: }
3436:
3437: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3438: (NXT(len) == '.') || (NXT(len) == '-') ||
3439: (NXT(len) == '_') || (NXT(len) == ':') ||
3440: (IS_COMBINING(NXT(len))) ||
3441: (IS_EXTENDER(NXT(len)))) {
3442: buf[len] = NXT(len);
3443: len++;
3444: if (len >= XML_MAX_NAMELEN) {
3445: fprintf(stderr,
3446: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3447: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3448: (NXT(len) == '.') || (NXT(len) == '-') ||
3449: (NXT(len) == '_') || (NXT(len) == ':') ||
3450: (IS_COMBINING(NXT(len))) ||
3451: (IS_EXTENDER(NXT(len))))
3452: len++;
3453: break;
3454: }
3455: }
3456: return(xmlStrndup(buf, len));
3457: }
3458:
3459: /**
1.50 daniel 3460: * xmlParseName:
3461: * @ctxt: an XML parser context
3462: *
3463: * parse an XML name.
1.22 daniel 3464: *
3465: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3466: * CombiningChar | Extender
3467: *
3468: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3469: *
3470: * [6] Names ::= Name (S Name)*
1.68 daniel 3471: *
3472: * Returns the Name parsed or NULL
1.1 veillard 3473: */
3474:
1.123 daniel 3475: xmlChar *
1.55 daniel 3476: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3477: xmlChar buf[XML_MAX_NAMELEN + 5];
3478: int len = 0, l;
3479: int c;
1.1 veillard 3480:
1.91 daniel 3481: GROW;
1.160 daniel 3482: c = CUR_CHAR(l);
1.190 daniel 3483: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3484: (!IS_LETTER(c) && (c != '_') &&
3485: (c != ':'))) {
1.91 daniel 3486: return(NULL);
3487: }
1.40 daniel 3488:
1.190 daniel 3489: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3490: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3491: (c == '.') || (c == '-') ||
3492: (c == '_') || (c == ':') ||
3493: (IS_COMBINING(c)) ||
3494: (IS_EXTENDER(c)))) {
1.160 daniel 3495: COPY_BUF(l,buf,len,c);
3496: NEXTL(l);
3497: c = CUR_CHAR(l);
1.91 daniel 3498: if (len >= XML_MAX_NAMELEN) {
3499: fprintf(stderr,
3500: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3501: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3502: (c == '.') || (c == '-') ||
3503: (c == '_') || (c == ':') ||
3504: (IS_COMBINING(c)) ||
3505: (IS_EXTENDER(c))) {
3506: NEXTL(l);
3507: c = CUR_CHAR(l);
1.97 daniel 3508: }
1.91 daniel 3509: break;
3510: }
3511: }
3512: return(xmlStrndup(buf, len));
1.22 daniel 3513: }
3514:
1.50 daniel 3515: /**
1.135 daniel 3516: * xmlParseStringName:
3517: * @ctxt: an XML parser context
3518: * @str: a pointer to an index in the string
3519: *
3520: * parse an XML name.
3521: *
3522: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3523: * CombiningChar | Extender
3524: *
3525: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3526: *
3527: * [6] Names ::= Name (S Name)*
3528: *
3529: * Returns the Name parsed or NULL. The str pointer
3530: * is updated to the current location in the string.
3531: */
3532:
3533: xmlChar *
3534: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3535: xmlChar buf[XML_MAX_NAMELEN + 5];
3536: const xmlChar *cur = *str;
3537: int len = 0, l;
3538: int c;
1.135 daniel 3539:
1.176 daniel 3540: c = CUR_SCHAR(cur, l);
3541: if (!IS_LETTER(c) && (c != '_') &&
3542: (c != ':')) {
1.135 daniel 3543: return(NULL);
3544: }
3545:
1.176 daniel 3546: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3547: (c == '.') || (c == '-') ||
3548: (c == '_') || (c == ':') ||
3549: (IS_COMBINING(c)) ||
3550: (IS_EXTENDER(c))) {
3551: COPY_BUF(l,buf,len,c);
3552: cur += l;
3553: c = CUR_SCHAR(cur, l);
3554: if (len >= XML_MAX_NAMELEN) {
3555: fprintf(stderr,
3556: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3557: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3558: (c == '.') || (c == '-') ||
3559: (c == '_') || (c == ':') ||
3560: (IS_COMBINING(c)) ||
3561: (IS_EXTENDER(c))) {
3562: cur += l;
3563: c = CUR_SCHAR(cur, l);
3564: }
3565: break;
3566: }
1.135 daniel 3567: }
1.176 daniel 3568: *str = cur;
3569: return(xmlStrndup(buf, len));
1.135 daniel 3570: }
3571:
3572: /**
1.50 daniel 3573: * xmlParseNmtoken:
3574: * @ctxt: an XML parser context
3575: *
3576: * parse an XML Nmtoken.
1.22 daniel 3577: *
3578: * [7] Nmtoken ::= (NameChar)+
3579: *
3580: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3581: *
3582: * Returns the Nmtoken parsed or NULL
1.22 daniel 3583: */
3584:
1.123 daniel 3585: xmlChar *
1.55 daniel 3586: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3587: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3588: int len = 0;
1.160 daniel 3589: int c,l;
1.22 daniel 3590:
1.91 daniel 3591: GROW;
1.160 daniel 3592: c = CUR_CHAR(l);
3593: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3594: (c == '.') || (c == '-') ||
3595: (c == '_') || (c == ':') ||
3596: (IS_COMBINING(c)) ||
3597: (IS_EXTENDER(c))) {
3598: COPY_BUF(l,buf,len,c);
3599: NEXTL(l);
3600: c = CUR_CHAR(l);
1.91 daniel 3601: if (len >= XML_MAX_NAMELEN) {
3602: fprintf(stderr,
3603: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3604: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3605: (c == '.') || (c == '-') ||
3606: (c == '_') || (c == ':') ||
3607: (IS_COMBINING(c)) ||
3608: (IS_EXTENDER(c))) {
3609: NEXTL(l);
3610: c = CUR_CHAR(l);
3611: }
1.91 daniel 3612: break;
3613: }
3614: }
1.168 daniel 3615: if (len == 0)
3616: return(NULL);
1.91 daniel 3617: return(xmlStrndup(buf, len));
1.1 veillard 3618: }
3619:
1.50 daniel 3620: /**
3621: * xmlParseEntityValue:
3622: * @ctxt: an XML parser context
1.78 daniel 3623: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3624: *
3625: * parse a value for ENTITY decl.
1.24 daniel 3626: *
3627: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3628: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3629: *
1.78 daniel 3630: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3631: */
3632:
1.123 daniel 3633: xmlChar *
3634: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3635: xmlChar *buf = NULL;
3636: int len = 0;
1.140 daniel 3637: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3638: int c, l;
1.135 daniel 3639: xmlChar stop;
1.123 daniel 3640: xmlChar *ret = NULL;
1.176 daniel 3641: const xmlChar *cur = NULL;
1.98 daniel 3642: xmlParserInputPtr input;
1.24 daniel 3643:
1.152 daniel 3644: if (RAW == '"') stop = '"';
3645: else if (RAW == '\'') stop = '\'';
1.135 daniel 3646: else {
3647: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3649: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3650: ctxt->wellFormed = 0;
1.180 daniel 3651: ctxt->disableSAX = 1;
1.135 daniel 3652: return(NULL);
3653: }
3654: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3655: if (buf == NULL) {
3656: fprintf(stderr, "malloc of %d byte failed\n", size);
3657: return(NULL);
3658: }
1.94 daniel 3659:
1.135 daniel 3660: /*
3661: * The content of the entity definition is copied in a buffer.
3662: */
1.94 daniel 3663:
1.135 daniel 3664: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3665: input = ctxt->input;
3666: GROW;
3667: NEXT;
1.152 daniel 3668: c = CUR_CHAR(l);
1.135 daniel 3669: /*
3670: * NOTE: 4.4.5 Included in Literal
3671: * When a parameter entity reference appears in a literal entity
3672: * value, ... a single or double quote character in the replacement
3673: * text is always treated as a normal data character and will not
3674: * terminate the literal.
3675: * In practice it means we stop the loop only when back at parsing
3676: * the initial entity and the quote is found
3677: */
3678: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3679: if (len + 5 >= size) {
1.135 daniel 3680: size *= 2;
1.204 veillard 3681: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3682: if (buf == NULL) {
3683: fprintf(stderr, "realloc of %d byte failed\n", size);
3684: return(NULL);
1.94 daniel 3685: }
1.79 daniel 3686: }
1.152 daniel 3687: COPY_BUF(l,buf,len,c);
3688: NEXTL(l);
1.98 daniel 3689: /*
1.135 daniel 3690: * Pop-up of finished entities.
1.98 daniel 3691: */
1.152 daniel 3692: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3693: xmlPopInput(ctxt);
1.152 daniel 3694:
3695: c = CUR_CHAR(l);
1.135 daniel 3696: if (c == 0) {
1.94 daniel 3697: GROW;
1.152 daniel 3698: c = CUR_CHAR(l);
1.79 daniel 3699: }
1.135 daniel 3700: }
3701: buf[len] = 0;
3702:
3703: /*
1.176 daniel 3704: * Raise problem w.r.t. '&' and '%' being used in non-entities
3705: * reference constructs. Note Charref will be handled in
3706: * xmlStringDecodeEntities()
3707: */
3708: cur = buf;
3709: while (*cur != 0) {
3710: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3711: xmlChar *name;
3712: xmlChar tmp = *cur;
3713:
3714: cur++;
3715: name = xmlParseStringName(ctxt, &cur);
3716: if ((name == NULL) || (*cur != ';')) {
3717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3718: ctxt->sax->error(ctxt->userData,
3719: "EntityValue: '%c' forbidden except for entities references\n",
3720: tmp);
3721: ctxt->wellFormed = 0;
1.180 daniel 3722: ctxt->disableSAX = 1;
1.176 daniel 3723: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3724: }
3725: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3726: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3727: ctxt->sax->error(ctxt->userData,
3728: "EntityValue: PEReferences forbidden in internal subset\n",
3729: tmp);
3730: ctxt->wellFormed = 0;
1.180 daniel 3731: ctxt->disableSAX = 1;
1.176 daniel 3732: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3733: }
3734: if (name != NULL)
3735: xmlFree(name);
3736: }
3737: cur++;
3738: }
3739:
3740: /*
1.135 daniel 3741: * Then PEReference entities are substituted.
3742: */
3743: if (c != stop) {
3744: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3746: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3747: ctxt->wellFormed = 0;
1.180 daniel 3748: ctxt->disableSAX = 1;
1.170 daniel 3749: xmlFree(buf);
1.135 daniel 3750: } else {
3751: NEXT;
3752: /*
3753: * NOTE: 4.4.7 Bypassed
3754: * When a general entity reference appears in the EntityValue in
3755: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3756: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3757: */
3758: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3759: 0, 0, 0);
3760: if (orig != NULL)
3761: *orig = buf;
3762: else
3763: xmlFree(buf);
1.24 daniel 3764: }
3765:
3766: return(ret);
3767: }
3768:
1.50 daniel 3769: /**
3770: * xmlParseAttValue:
3771: * @ctxt: an XML parser context
3772: *
3773: * parse a value for an attribute
1.78 daniel 3774: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3775: * will be handled later in xmlStringGetNodeList
1.29 daniel 3776: *
3777: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3778: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3779: *
1.129 daniel 3780: * 3.3.3 Attribute-Value Normalization:
3781: * Before the value of an attribute is passed to the application or
3782: * checked for validity, the XML processor must normalize it as follows:
3783: * - a character reference is processed by appending the referenced
3784: * character to the attribute value
3785: * - an entity reference is processed by recursively processing the
3786: * replacement text of the entity
3787: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3788: * appending #x20 to the normalized value, except that only a single
3789: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3790: * parsed entity or the literal entity value of an internal parsed entity
3791: * - other characters are processed by appending them to the normalized value
1.130 daniel 3792: * If the declared value is not CDATA, then the XML processor must further
3793: * process the normalized attribute value by discarding any leading and
3794: * trailing space (#x20) characters, and by replacing sequences of space
3795: * (#x20) characters by a single space (#x20) character.
3796: * All attributes for which no declaration has been read should be treated
3797: * by a non-validating parser as if declared CDATA.
1.129 daniel 3798: *
3799: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3800: */
3801:
1.123 daniel 3802: xmlChar *
1.55 daniel 3803: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3804: xmlChar limit = 0;
1.198 daniel 3805: xmlChar *buf = NULL;
3806: int len = 0;
3807: int buf_size = 0;
3808: int c, l;
1.129 daniel 3809: xmlChar *current = NULL;
3810: xmlEntityPtr ent;
3811:
1.29 daniel 3812:
1.91 daniel 3813: SHRINK;
1.151 daniel 3814: if (NXT(0) == '"') {
1.96 daniel 3815: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3816: limit = '"';
1.40 daniel 3817: NEXT;
1.151 daniel 3818: } else if (NXT(0) == '\'') {
1.129 daniel 3819: limit = '\'';
1.96 daniel 3820: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3821: NEXT;
1.29 daniel 3822: } else {
1.123 daniel 3823: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3824: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3825: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3826: ctxt->wellFormed = 0;
1.180 daniel 3827: ctxt->disableSAX = 1;
1.129 daniel 3828: return(NULL);
1.29 daniel 3829: }
3830:
1.129 daniel 3831: /*
3832: * allocate a translation buffer.
3833: */
1.198 daniel 3834: buf_size = XML_PARSER_BUFFER_SIZE;
3835: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
3836: if (buf == NULL) {
1.129 daniel 3837: perror("xmlParseAttValue: malloc failed");
3838: return(NULL);
3839: }
3840:
3841: /*
3842: * Ok loop until we reach one of the ending char or a size limit.
3843: */
1.198 daniel 3844: c = CUR_CHAR(l);
3845: while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
3846: if (c == 0) break;
1.205 veillard 3847: if (ctxt->token == '&') {
3848: static xmlChar buffer[6] = "&";
3849:
3850: if (len > buf_size - 10) {
3851: growBuffer(buf);
3852: }
3853: current = &buffer[0];
3854: while (*current != 0) {
3855: buf[len++] = *current++;
3856: }
3857: ctxt->token = 0;
3858: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 3859: int val = xmlParseCharRef(ctxt);
1.198 daniel 3860: COPY_BUF(l,buf,len,val);
3861: NEXTL(l);
3862: } else if (c == '&') {
1.129 daniel 3863: ent = xmlParseEntityRef(ctxt);
3864: if ((ent != NULL) &&
3865: (ctxt->replaceEntities != 0)) {
1.185 daniel 3866: xmlChar *rep;
3867:
1.186 daniel 3868: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3869: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3870: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3871: if (rep != NULL) {
3872: current = rep;
3873: while (*current != 0) {
1.198 daniel 3874: buf[len++] = *current++;
3875: if (len > buf_size - 10) {
3876: growBuffer(buf);
1.186 daniel 3877: }
1.185 daniel 3878: }
1.186 daniel 3879: xmlFree(rep);
1.129 daniel 3880: }
1.186 daniel 3881: } else {
3882: if (ent->content != NULL)
1.198 daniel 3883: buf[len++] = ent->content[0];
1.129 daniel 3884: }
3885: } else if (ent != NULL) {
3886: int i = xmlStrlen(ent->name);
3887: const xmlChar *cur = ent->name;
3888:
1.186 daniel 3889: /*
3890: * This may look absurd but is needed to detect
3891: * entities problems
3892: */
1.211 ! veillard 3893: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
! 3894: (ent->content != NULL)) {
1.186 daniel 3895: xmlChar *rep;
3896: rep = xmlStringDecodeEntities(ctxt, ent->content,
3897: XML_SUBSTITUTE_REF, 0, 0, 0);
3898: if (rep != NULL)
3899: xmlFree(rep);
3900: }
3901:
3902: /*
3903: * Just output the reference
3904: */
1.198 daniel 3905: buf[len++] = '&';
3906: if (len > buf_size - i - 10) {
3907: growBuffer(buf);
1.129 daniel 3908: }
3909: for (;i > 0;i--)
1.198 daniel 3910: buf[len++] = *cur++;
3911: buf[len++] = ';';
1.129 daniel 3912: }
3913: } else {
1.198 daniel 3914: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3915: COPY_BUF(l,buf,len,0x20);
3916: if (len > buf_size - 10) {
3917: growBuffer(buf);
1.129 daniel 3918: }
3919: } else {
1.198 daniel 3920: COPY_BUF(l,buf,len,c);
3921: if (len > buf_size - 10) {
3922: growBuffer(buf);
1.129 daniel 3923: }
3924: }
1.198 daniel 3925: NEXTL(l);
1.129 daniel 3926: }
1.198 daniel 3927: GROW;
3928: c = CUR_CHAR(l);
1.129 daniel 3929: }
1.198 daniel 3930: buf[len++] = 0;
1.152 daniel 3931: if (RAW == '<') {
1.129 daniel 3932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3933: ctxt->sax->error(ctxt->userData,
3934: "Unescaped '<' not allowed in attributes values\n");
3935: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3936: ctxt->wellFormed = 0;
1.180 daniel 3937: ctxt->disableSAX = 1;
1.152 daniel 3938: } else if (RAW != limit) {
1.129 daniel 3939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3940: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3941: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3942: ctxt->wellFormed = 0;
1.180 daniel 3943: ctxt->disableSAX = 1;
1.129 daniel 3944: } else
3945: NEXT;
1.198 daniel 3946: return(buf);
1.29 daniel 3947: }
3948:
1.50 daniel 3949: /**
3950: * xmlParseSystemLiteral:
3951: * @ctxt: an XML parser context
3952: *
3953: * parse an XML Literal
1.21 daniel 3954: *
1.22 daniel 3955: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3956: *
3957: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3958: */
3959:
1.123 daniel 3960: xmlChar *
1.55 daniel 3961: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3962: xmlChar *buf = NULL;
3963: int len = 0;
1.140 daniel 3964: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3965: int cur, l;
1.135 daniel 3966: xmlChar stop;
1.168 daniel 3967: int state = ctxt->instate;
1.21 daniel 3968:
1.91 daniel 3969: SHRINK;
1.152 daniel 3970: if (RAW == '"') {
1.40 daniel 3971: NEXT;
1.135 daniel 3972: stop = '"';
1.152 daniel 3973: } else if (RAW == '\'') {
1.40 daniel 3974: NEXT;
1.135 daniel 3975: stop = '\'';
1.21 daniel 3976: } else {
1.55 daniel 3977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3978: ctxt->sax->error(ctxt->userData,
3979: "SystemLiteral \" or ' expected\n");
1.123 daniel 3980: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3981: ctxt->wellFormed = 0;
1.180 daniel 3982: ctxt->disableSAX = 1;
1.135 daniel 3983: return(NULL);
1.21 daniel 3984: }
3985:
1.135 daniel 3986: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3987: if (buf == NULL) {
3988: fprintf(stderr, "malloc of %d byte failed\n", size);
3989: return(NULL);
3990: }
1.168 daniel 3991: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3992: cur = CUR_CHAR(l);
1.135 daniel 3993: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3994: if (len + 5 >= size) {
1.135 daniel 3995: size *= 2;
1.204 veillard 3996: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3997: if (buf == NULL) {
3998: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 3999: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4000: return(NULL);
4001: }
4002: }
1.152 daniel 4003: COPY_BUF(l,buf,len,cur);
4004: NEXTL(l);
4005: cur = CUR_CHAR(l);
1.135 daniel 4006: if (cur == 0) {
4007: GROW;
4008: SHRINK;
1.152 daniel 4009: cur = CUR_CHAR(l);
1.135 daniel 4010: }
4011: }
4012: buf[len] = 0;
1.204 veillard 4013: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4014: if (!IS_CHAR(cur)) {
4015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4016: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4017: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4018: ctxt->wellFormed = 0;
1.180 daniel 4019: ctxt->disableSAX = 1;
1.135 daniel 4020: } else {
4021: NEXT;
4022: }
4023: return(buf);
1.21 daniel 4024: }
4025:
1.50 daniel 4026: /**
4027: * xmlParsePubidLiteral:
4028: * @ctxt: an XML parser context
1.21 daniel 4029: *
1.50 daniel 4030: * parse an XML public literal
1.68 daniel 4031: *
4032: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4033: *
4034: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 4035: */
4036:
1.123 daniel 4037: xmlChar *
1.55 daniel 4038: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4039: xmlChar *buf = NULL;
4040: int len = 0;
1.140 daniel 4041: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 4042: xmlChar cur;
4043: xmlChar stop;
1.125 daniel 4044:
1.91 daniel 4045: SHRINK;
1.152 daniel 4046: if (RAW == '"') {
1.40 daniel 4047: NEXT;
1.135 daniel 4048: stop = '"';
1.152 daniel 4049: } else if (RAW == '\'') {
1.40 daniel 4050: NEXT;
1.135 daniel 4051: stop = '\'';
1.21 daniel 4052: } else {
1.55 daniel 4053: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4054: ctxt->sax->error(ctxt->userData,
4055: "SystemLiteral \" or ' expected\n");
1.123 daniel 4056: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4057: ctxt->wellFormed = 0;
1.180 daniel 4058: ctxt->disableSAX = 1;
1.135 daniel 4059: return(NULL);
4060: }
4061: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4062: if (buf == NULL) {
4063: fprintf(stderr, "malloc of %d byte failed\n", size);
4064: return(NULL);
4065: }
4066: cur = CUR;
4067: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4068: if (len + 1 >= size) {
4069: size *= 2;
1.204 veillard 4070: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4071: if (buf == NULL) {
4072: fprintf(stderr, "realloc of %d byte failed\n", size);
4073: return(NULL);
4074: }
4075: }
4076: buf[len++] = cur;
4077: NEXT;
4078: cur = CUR;
4079: if (cur == 0) {
4080: GROW;
4081: SHRINK;
4082: cur = CUR;
4083: }
4084: }
4085: buf[len] = 0;
4086: if (cur != stop) {
4087: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4088: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4089: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4090: ctxt->wellFormed = 0;
1.180 daniel 4091: ctxt->disableSAX = 1;
1.135 daniel 4092: } else {
4093: NEXT;
1.21 daniel 4094: }
1.135 daniel 4095: return(buf);
1.21 daniel 4096: }
4097:
1.50 daniel 4098: /**
4099: * xmlParseCharData:
4100: * @ctxt: an XML parser context
4101: * @cdata: int indicating whether we are within a CDATA section
4102: *
4103: * parse a CharData section.
4104: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4105: *
1.151 daniel 4106: * The right angle bracket (>) may be represented using the string ">",
4107: * and must, for compatibility, be escaped using ">" or a character
4108: * reference when it appears in the string "]]>" in content, when that
4109: * string is not marking the end of a CDATA section.
4110: *
1.27 daniel 4111: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4112: */
4113:
1.55 daniel 4114: void
4115: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4116: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4117: int nbchar = 0;
1.152 daniel 4118: int cur, l;
1.27 daniel 4119:
1.91 daniel 4120: SHRINK;
1.152 daniel 4121: cur = CUR_CHAR(l);
1.190 daniel 4122: while (((cur != '<') || (ctxt->token == '<')) &&
4123: ((cur != '&') || (ctxt->token == '&')) &&
4124: (IS_CHAR(cur))) {
1.97 daniel 4125: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4126: (NXT(2) == '>')) {
4127: if (cdata) break;
4128: else {
4129: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4130: ctxt->sax->error(ctxt->userData,
1.59 daniel 4131: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4132: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4133: /* Should this be relaxed ??? I see a "must here */
4134: ctxt->wellFormed = 0;
1.180 daniel 4135: ctxt->disableSAX = 1;
1.59 daniel 4136: }
4137: }
1.152 daniel 4138: COPY_BUF(l,buf,nbchar,cur);
4139: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4140: /*
4141: * Ok the segment is to be consumed as chars.
4142: */
1.171 daniel 4143: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4144: if (areBlanks(ctxt, buf, nbchar)) {
4145: if (ctxt->sax->ignorableWhitespace != NULL)
4146: ctxt->sax->ignorableWhitespace(ctxt->userData,
4147: buf, nbchar);
4148: } else {
4149: if (ctxt->sax->characters != NULL)
4150: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4151: }
4152: }
4153: nbchar = 0;
4154: }
1.152 daniel 4155: NEXTL(l);
4156: cur = CUR_CHAR(l);
1.27 daniel 4157: }
1.91 daniel 4158: if (nbchar != 0) {
4159: /*
4160: * Ok the segment is to be consumed as chars.
4161: */
1.171 daniel 4162: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4163: if (areBlanks(ctxt, buf, nbchar)) {
4164: if (ctxt->sax->ignorableWhitespace != NULL)
4165: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4166: } else {
4167: if (ctxt->sax->characters != NULL)
4168: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4169: }
4170: }
1.45 daniel 4171: }
1.27 daniel 4172: }
4173:
1.50 daniel 4174: /**
4175: * xmlParseExternalID:
4176: * @ctxt: an XML parser context
1.123 daniel 4177: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4178: * @strict: indicate whether we should restrict parsing to only
4179: * production [75], see NOTE below
1.50 daniel 4180: *
1.67 daniel 4181: * Parse an External ID or a Public ID
4182: *
4183: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4184: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4185: *
4186: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4187: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4188: *
4189: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4190: *
1.68 daniel 4191: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4192: * case publicID receives PubidLiteral, is strict is off
4193: * it is possible to return NULL and have publicID set.
1.22 daniel 4194: */
4195:
1.123 daniel 4196: xmlChar *
4197: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4198: xmlChar *URI = NULL;
1.22 daniel 4199:
1.91 daniel 4200: SHRINK;
1.152 daniel 4201: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4202: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4203: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4204: SKIP(6);
1.59 daniel 4205: if (!IS_BLANK(CUR)) {
4206: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4207: ctxt->sax->error(ctxt->userData,
1.59 daniel 4208: "Space required after 'SYSTEM'\n");
1.123 daniel 4209: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4210: ctxt->wellFormed = 0;
1.180 daniel 4211: ctxt->disableSAX = 1;
1.59 daniel 4212: }
1.42 daniel 4213: SKIP_BLANKS;
1.39 daniel 4214: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4215: if (URI == NULL) {
1.55 daniel 4216: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4217: ctxt->sax->error(ctxt->userData,
1.39 daniel 4218: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4219: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4220: ctxt->wellFormed = 0;
1.180 daniel 4221: ctxt->disableSAX = 1;
1.59 daniel 4222: }
1.152 daniel 4223: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4224: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4225: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4226: SKIP(6);
1.59 daniel 4227: if (!IS_BLANK(CUR)) {
4228: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4229: ctxt->sax->error(ctxt->userData,
1.59 daniel 4230: "Space required after 'PUBLIC'\n");
1.123 daniel 4231: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4232: ctxt->wellFormed = 0;
1.180 daniel 4233: ctxt->disableSAX = 1;
1.59 daniel 4234: }
1.42 daniel 4235: SKIP_BLANKS;
1.39 daniel 4236: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4237: if (*publicID == NULL) {
1.55 daniel 4238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4239: ctxt->sax->error(ctxt->userData,
1.39 daniel 4240: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4241: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4242: ctxt->wellFormed = 0;
1.180 daniel 4243: ctxt->disableSAX = 1;
1.59 daniel 4244: }
1.67 daniel 4245: if (strict) {
4246: /*
4247: * We don't handle [83] so "S SystemLiteral" is required.
4248: */
4249: if (!IS_BLANK(CUR)) {
4250: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4251: ctxt->sax->error(ctxt->userData,
1.67 daniel 4252: "Space required after the Public Identifier\n");
1.123 daniel 4253: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4254: ctxt->wellFormed = 0;
1.180 daniel 4255: ctxt->disableSAX = 1;
1.67 daniel 4256: }
4257: } else {
4258: /*
4259: * We handle [83] so we return immediately, if
4260: * "S SystemLiteral" is not detected. From a purely parsing
4261: * point of view that's a nice mess.
4262: */
1.135 daniel 4263: const xmlChar *ptr;
4264: GROW;
4265:
4266: ptr = CUR_PTR;
1.67 daniel 4267: if (!IS_BLANK(*ptr)) return(NULL);
4268:
4269: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4270: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4271: }
1.42 daniel 4272: SKIP_BLANKS;
1.39 daniel 4273: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4274: if (URI == NULL) {
1.55 daniel 4275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4276: ctxt->sax->error(ctxt->userData,
1.39 daniel 4277: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4278: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4279: ctxt->wellFormed = 0;
1.180 daniel 4280: ctxt->disableSAX = 1;
1.59 daniel 4281: }
1.22 daniel 4282: }
1.39 daniel 4283: return(URI);
1.22 daniel 4284: }
4285:
1.50 daniel 4286: /**
4287: * xmlParseComment:
1.69 daniel 4288: * @ctxt: an XML parser context
1.50 daniel 4289: *
1.3 veillard 4290: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4291: * The spec says that "For compatibility, the string "--" (double-hyphen)
4292: * must not occur within comments. "
1.22 daniel 4293: *
4294: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4295: */
1.72 daniel 4296: void
1.114 daniel 4297: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4298: xmlChar *buf = NULL;
1.195 daniel 4299: int len;
1.140 daniel 4300: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4301: int q, ql;
4302: int r, rl;
4303: int cur, l;
1.140 daniel 4304: xmlParserInputState state;
1.187 daniel 4305: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4306:
4307: /*
1.22 daniel 4308: * Check that there is a comment right here.
1.3 veillard 4309: */
1.152 daniel 4310: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4311: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4312:
1.140 daniel 4313: state = ctxt->instate;
1.97 daniel 4314: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4315: SHRINK;
1.40 daniel 4316: SKIP(4);
1.135 daniel 4317: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4318: if (buf == NULL) {
4319: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4320: ctxt->instate = state;
1.135 daniel 4321: return;
4322: }
1.152 daniel 4323: q = CUR_CHAR(ql);
4324: NEXTL(ql);
4325: r = CUR_CHAR(rl);
4326: NEXTL(rl);
4327: cur = CUR_CHAR(l);
1.195 daniel 4328: len = 0;
1.135 daniel 4329: while (IS_CHAR(cur) &&
4330: ((cur != '>') ||
4331: (r != '-') || (q != '-'))) {
1.195 daniel 4332: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4333: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4334: ctxt->sax->error(ctxt->userData,
1.38 daniel 4335: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4336: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4337: ctxt->wellFormed = 0;
1.180 daniel 4338: ctxt->disableSAX = 1;
1.59 daniel 4339: }
1.152 daniel 4340: if (len + 5 >= size) {
1.135 daniel 4341: size *= 2;
1.204 veillard 4342: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4343: if (buf == NULL) {
4344: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4345: ctxt->instate = state;
1.135 daniel 4346: return;
4347: }
4348: }
1.152 daniel 4349: COPY_BUF(ql,buf,len,q);
1.135 daniel 4350: q = r;
1.152 daniel 4351: ql = rl;
1.135 daniel 4352: r = cur;
1.152 daniel 4353: rl = l;
4354: NEXTL(l);
4355: cur = CUR_CHAR(l);
1.135 daniel 4356: if (cur == 0) {
4357: SHRINK;
4358: GROW;
1.152 daniel 4359: cur = CUR_CHAR(l);
1.135 daniel 4360: }
1.3 veillard 4361: }
1.135 daniel 4362: buf[len] = 0;
4363: if (!IS_CHAR(cur)) {
1.55 daniel 4364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4365: ctxt->sax->error(ctxt->userData,
1.135 daniel 4366: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4367: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4368: ctxt->wellFormed = 0;
1.180 daniel 4369: ctxt->disableSAX = 1;
1.178 daniel 4370: xmlFree(buf);
1.3 veillard 4371: } else {
1.187 daniel 4372: if (input != ctxt->input) {
4373: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4374: ctxt->sax->error(ctxt->userData,
4375: "Comment doesn't start and stop in the same entity\n");
4376: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4377: ctxt->wellFormed = 0;
4378: ctxt->disableSAX = 1;
4379: }
1.40 daniel 4380: NEXT;
1.171 daniel 4381: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4382: (!ctxt->disableSAX))
1.135 daniel 4383: ctxt->sax->comment(ctxt->userData, buf);
4384: xmlFree(buf);
1.3 veillard 4385: }
1.140 daniel 4386: ctxt->instate = state;
1.3 veillard 4387: }
4388:
1.50 daniel 4389: /**
4390: * xmlParsePITarget:
4391: * @ctxt: an XML parser context
4392: *
4393: * parse the name of a PI
1.22 daniel 4394: *
4395: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4396: *
4397: * Returns the PITarget name or NULL
1.22 daniel 4398: */
4399:
1.123 daniel 4400: xmlChar *
1.55 daniel 4401: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4402: xmlChar *name;
1.22 daniel 4403:
4404: name = xmlParseName(ctxt);
1.139 daniel 4405: if ((name != NULL) &&
1.22 daniel 4406: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4407: ((name[1] == 'm') || (name[1] == 'M')) &&
4408: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4409: int i;
1.177 daniel 4410: if ((name[0] == 'x') && (name[1] == 'm') &&
4411: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4413: ctxt->sax->error(ctxt->userData,
4414: "XML declaration allowed only at the start of the document\n");
4415: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4416: ctxt->wellFormed = 0;
1.180 daniel 4417: ctxt->disableSAX = 1;
1.151 daniel 4418: return(name);
4419: } else if (name[3] == 0) {
4420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4421: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4422: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4423: ctxt->wellFormed = 0;
1.180 daniel 4424: ctxt->disableSAX = 1;
1.151 daniel 4425: return(name);
4426: }
1.139 daniel 4427: for (i = 0;;i++) {
4428: if (xmlW3CPIs[i] == NULL) break;
4429: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4430: return(name);
4431: }
4432: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4433: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4434: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4435: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4436: }
1.22 daniel 4437: }
4438: return(name);
4439: }
4440:
1.50 daniel 4441: /**
4442: * xmlParsePI:
4443: * @ctxt: an XML parser context
4444: *
4445: * parse an XML Processing Instruction.
1.22 daniel 4446: *
4447: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4448: *
1.69 daniel 4449: * The processing is transfered to SAX once parsed.
1.3 veillard 4450: */
4451:
1.55 daniel 4452: void
4453: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4454: xmlChar *buf = NULL;
4455: int len = 0;
1.140 daniel 4456: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4457: int cur, l;
1.123 daniel 4458: xmlChar *target;
1.140 daniel 4459: xmlParserInputState state;
1.22 daniel 4460:
1.152 daniel 4461: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4462: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4463: state = ctxt->instate;
4464: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4465: /*
4466: * this is a Processing Instruction.
4467: */
1.40 daniel 4468: SKIP(2);
1.91 daniel 4469: SHRINK;
1.3 veillard 4470:
4471: /*
1.22 daniel 4472: * Parse the target name and check for special support like
4473: * namespace.
1.3 veillard 4474: */
1.22 daniel 4475: target = xmlParsePITarget(ctxt);
4476: if (target != NULL) {
1.156 daniel 4477: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4478: if (input != ctxt->input) {
4479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4480: ctxt->sax->error(ctxt->userData,
4481: "PI declaration doesn't start and stop in the same entity\n");
4482: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4483: ctxt->wellFormed = 0;
4484: ctxt->disableSAX = 1;
4485: }
1.156 daniel 4486: SKIP(2);
4487:
4488: /*
4489: * SAX: PI detected.
4490: */
1.171 daniel 4491: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4492: (ctxt->sax->processingInstruction != NULL))
4493: ctxt->sax->processingInstruction(ctxt->userData,
4494: target, NULL);
4495: ctxt->instate = state;
1.170 daniel 4496: xmlFree(target);
1.156 daniel 4497: return;
4498: }
1.135 daniel 4499: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4500: if (buf == NULL) {
4501: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4502: ctxt->instate = state;
1.135 daniel 4503: return;
4504: }
4505: cur = CUR;
4506: if (!IS_BLANK(cur)) {
1.114 daniel 4507: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4508: ctxt->sax->error(ctxt->userData,
4509: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4510: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4511: ctxt->wellFormed = 0;
1.180 daniel 4512: ctxt->disableSAX = 1;
1.114 daniel 4513: }
4514: SKIP_BLANKS;
1.152 daniel 4515: cur = CUR_CHAR(l);
1.135 daniel 4516: while (IS_CHAR(cur) &&
4517: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4518: if (len + 5 >= size) {
1.135 daniel 4519: size *= 2;
1.204 veillard 4520: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4521: if (buf == NULL) {
4522: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4523: ctxt->instate = state;
1.135 daniel 4524: return;
4525: }
4526: }
1.152 daniel 4527: COPY_BUF(l,buf,len,cur);
4528: NEXTL(l);
4529: cur = CUR_CHAR(l);
1.135 daniel 4530: if (cur == 0) {
4531: SHRINK;
4532: GROW;
1.152 daniel 4533: cur = CUR_CHAR(l);
1.135 daniel 4534: }
4535: }
4536: buf[len] = 0;
1.152 daniel 4537: if (cur != '?') {
1.72 daniel 4538: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4539: ctxt->sax->error(ctxt->userData,
1.72 daniel 4540: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4541: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4542: ctxt->wellFormed = 0;
1.180 daniel 4543: ctxt->disableSAX = 1;
1.22 daniel 4544: } else {
1.187 daniel 4545: if (input != ctxt->input) {
4546: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4547: ctxt->sax->error(ctxt->userData,
4548: "PI declaration doesn't start and stop in the same entity\n");
4549: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4550: ctxt->wellFormed = 0;
4551: ctxt->disableSAX = 1;
4552: }
1.72 daniel 4553: SKIP(2);
1.44 daniel 4554:
1.72 daniel 4555: /*
4556: * SAX: PI detected.
4557: */
1.171 daniel 4558: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4559: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4560: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4561: target, buf);
1.22 daniel 4562: }
1.135 daniel 4563: xmlFree(buf);
1.119 daniel 4564: xmlFree(target);
1.3 veillard 4565: } else {
1.55 daniel 4566: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4567: ctxt->sax->error(ctxt->userData,
4568: "xmlParsePI : no target name\n");
1.123 daniel 4569: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4570: ctxt->wellFormed = 0;
1.180 daniel 4571: ctxt->disableSAX = 1;
1.22 daniel 4572: }
1.140 daniel 4573: ctxt->instate = state;
1.22 daniel 4574: }
4575: }
4576:
1.50 daniel 4577: /**
4578: * xmlParseNotationDecl:
4579: * @ctxt: an XML parser context
4580: *
4581: * parse a notation declaration
1.22 daniel 4582: *
4583: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4584: *
4585: * Hence there is actually 3 choices:
4586: * 'PUBLIC' S PubidLiteral
4587: * 'PUBLIC' S PubidLiteral S SystemLiteral
4588: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4589: *
1.67 daniel 4590: * See the NOTE on xmlParseExternalID().
1.22 daniel 4591: */
4592:
1.55 daniel 4593: void
4594: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4595: xmlChar *name;
4596: xmlChar *Pubid;
4597: xmlChar *Systemid;
1.22 daniel 4598:
1.152 daniel 4599: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4600: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4601: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4602: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4603: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4604: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4605: SHRINK;
1.40 daniel 4606: SKIP(10);
1.67 daniel 4607: if (!IS_BLANK(CUR)) {
4608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4609: ctxt->sax->error(ctxt->userData,
4610: "Space required after '<!NOTATION'\n");
1.123 daniel 4611: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4612: ctxt->wellFormed = 0;
1.180 daniel 4613: ctxt->disableSAX = 1;
1.67 daniel 4614: return;
4615: }
4616: SKIP_BLANKS;
1.22 daniel 4617:
4618: name = xmlParseName(ctxt);
4619: if (name == NULL) {
1.55 daniel 4620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4621: ctxt->sax->error(ctxt->userData,
4622: "NOTATION: Name expected here\n");
1.123 daniel 4623: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4624: ctxt->wellFormed = 0;
1.180 daniel 4625: ctxt->disableSAX = 1;
1.67 daniel 4626: return;
4627: }
4628: if (!IS_BLANK(CUR)) {
4629: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4630: ctxt->sax->error(ctxt->userData,
1.67 daniel 4631: "Space required after the NOTATION name'\n");
1.123 daniel 4632: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4633: ctxt->wellFormed = 0;
1.180 daniel 4634: ctxt->disableSAX = 1;
1.22 daniel 4635: return;
4636: }
1.42 daniel 4637: SKIP_BLANKS;
1.67 daniel 4638:
1.22 daniel 4639: /*
1.67 daniel 4640: * Parse the IDs.
1.22 daniel 4641: */
1.160 daniel 4642: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4643: SKIP_BLANKS;
4644:
1.152 daniel 4645: if (RAW == '>') {
1.187 daniel 4646: if (input != ctxt->input) {
4647: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4648: ctxt->sax->error(ctxt->userData,
4649: "Notation declaration doesn't start and stop in the same entity\n");
4650: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4651: ctxt->wellFormed = 0;
4652: ctxt->disableSAX = 1;
4653: }
1.40 daniel 4654: NEXT;
1.171 daniel 4655: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4656: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4657: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4658: } else {
4659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4660: ctxt->sax->error(ctxt->userData,
1.67 daniel 4661: "'>' required to close NOTATION declaration\n");
1.123 daniel 4662: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4663: ctxt->wellFormed = 0;
1.180 daniel 4664: ctxt->disableSAX = 1;
1.67 daniel 4665: }
1.119 daniel 4666: xmlFree(name);
4667: if (Systemid != NULL) xmlFree(Systemid);
4668: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4669: }
4670: }
4671:
1.50 daniel 4672: /**
4673: * xmlParseEntityDecl:
4674: * @ctxt: an XML parser context
4675: *
4676: * parse <!ENTITY declarations
1.22 daniel 4677: *
4678: * [70] EntityDecl ::= GEDecl | PEDecl
4679: *
4680: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4681: *
4682: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4683: *
4684: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4685: *
4686: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4687: *
4688: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4689: *
4690: * [ VC: Notation Declared ]
1.116 daniel 4691: * The Name must match the declared name of a notation.
1.22 daniel 4692: */
4693:
1.55 daniel 4694: void
4695: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4696: xmlChar *name = NULL;
4697: xmlChar *value = NULL;
4698: xmlChar *URI = NULL, *literal = NULL;
4699: xmlChar *ndata = NULL;
1.39 daniel 4700: int isParameter = 0;
1.123 daniel 4701: xmlChar *orig = NULL;
1.22 daniel 4702:
1.94 daniel 4703: GROW;
1.152 daniel 4704: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4705: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4706: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4707: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4708: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4709: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4710: SHRINK;
1.40 daniel 4711: SKIP(8);
1.59 daniel 4712: if (!IS_BLANK(CUR)) {
4713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4714: ctxt->sax->error(ctxt->userData,
4715: "Space required after '<!ENTITY'\n");
1.123 daniel 4716: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4717: ctxt->wellFormed = 0;
1.180 daniel 4718: ctxt->disableSAX = 1;
1.59 daniel 4719: }
4720: SKIP_BLANKS;
1.40 daniel 4721:
1.152 daniel 4722: if (RAW == '%') {
1.40 daniel 4723: NEXT;
1.59 daniel 4724: if (!IS_BLANK(CUR)) {
4725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4726: ctxt->sax->error(ctxt->userData,
4727: "Space required after '%'\n");
1.123 daniel 4728: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4729: ctxt->wellFormed = 0;
1.180 daniel 4730: ctxt->disableSAX = 1;
1.59 daniel 4731: }
1.42 daniel 4732: SKIP_BLANKS;
1.39 daniel 4733: isParameter = 1;
1.22 daniel 4734: }
4735:
4736: name = xmlParseName(ctxt);
1.24 daniel 4737: if (name == NULL) {
1.55 daniel 4738: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4739: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4740: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4741: ctxt->wellFormed = 0;
1.180 daniel 4742: ctxt->disableSAX = 1;
1.24 daniel 4743: return;
4744: }
1.59 daniel 4745: if (!IS_BLANK(CUR)) {
4746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4747: ctxt->sax->error(ctxt->userData,
1.59 daniel 4748: "Space required after the entity name\n");
1.123 daniel 4749: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4750: ctxt->wellFormed = 0;
1.180 daniel 4751: ctxt->disableSAX = 1;
1.59 daniel 4752: }
1.42 daniel 4753: SKIP_BLANKS;
1.24 daniel 4754:
1.22 daniel 4755: /*
1.68 daniel 4756: * handle the various case of definitions...
1.22 daniel 4757: */
1.39 daniel 4758: if (isParameter) {
1.152 daniel 4759: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4760: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4761: if (value) {
1.171 daniel 4762: if ((ctxt->sax != NULL) &&
4763: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4764: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4765: XML_INTERNAL_PARAMETER_ENTITY,
4766: NULL, NULL, value);
4767: }
1.24 daniel 4768: else {
1.67 daniel 4769: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4770: if ((URI == NULL) && (literal == NULL)) {
4771: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4772: ctxt->sax->error(ctxt->userData,
4773: "Entity value required\n");
4774: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4775: ctxt->wellFormed = 0;
1.180 daniel 4776: ctxt->disableSAX = 1;
1.169 daniel 4777: }
1.39 daniel 4778: if (URI) {
1.193 daniel 4779: xmlURIPtr uri;
4780:
4781: uri = xmlParseURI((const char *) URI);
4782: if (uri == NULL) {
4783: if ((ctxt->sax != NULL) &&
4784: (!ctxt->disableSAX) &&
4785: (ctxt->sax->error != NULL))
4786: ctxt->sax->error(ctxt->userData,
4787: "Invalid URI: %s\n", URI);
4788: ctxt->wellFormed = 0;
4789: ctxt->errNo = XML_ERR_INVALID_URI;
4790: } else {
4791: if (uri->fragment != NULL) {
4792: if ((ctxt->sax != NULL) &&
4793: (!ctxt->disableSAX) &&
4794: (ctxt->sax->error != NULL))
4795: ctxt->sax->error(ctxt->userData,
4796: "Fragment not allowed: %s\n", URI);
4797: ctxt->wellFormed = 0;
4798: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4799: } else {
4800: if ((ctxt->sax != NULL) &&
4801: (!ctxt->disableSAX) &&
4802: (ctxt->sax->entityDecl != NULL))
4803: ctxt->sax->entityDecl(ctxt->userData, name,
4804: XML_EXTERNAL_PARAMETER_ENTITY,
4805: literal, URI, NULL);
4806: }
4807: xmlFreeURI(uri);
4808: }
1.39 daniel 4809: }
1.24 daniel 4810: }
4811: } else {
1.152 daniel 4812: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4813: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4814: if ((ctxt->sax != NULL) &&
4815: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4816: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4817: XML_INTERNAL_GENERAL_ENTITY,
4818: NULL, NULL, value);
4819: } else {
1.67 daniel 4820: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4821: if ((URI == NULL) && (literal == NULL)) {
4822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4823: ctxt->sax->error(ctxt->userData,
4824: "Entity value required\n");
4825: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4826: ctxt->wellFormed = 0;
1.180 daniel 4827: ctxt->disableSAX = 1;
1.169 daniel 4828: }
1.193 daniel 4829: if (URI) {
4830: xmlURIPtr uri;
4831:
4832: uri = xmlParseURI((const char *)URI);
4833: if (uri == NULL) {
4834: if ((ctxt->sax != NULL) &&
4835: (!ctxt->disableSAX) &&
4836: (ctxt->sax->error != NULL))
4837: ctxt->sax->error(ctxt->userData,
4838: "Invalid URI: %s\n", URI);
4839: ctxt->wellFormed = 0;
4840: ctxt->errNo = XML_ERR_INVALID_URI;
4841: } else {
4842: if (uri->fragment != NULL) {
4843: if ((ctxt->sax != NULL) &&
4844: (!ctxt->disableSAX) &&
4845: (ctxt->sax->error != NULL))
4846: ctxt->sax->error(ctxt->userData,
4847: "Fragment not allowed: %s\n", URI);
4848: ctxt->wellFormed = 0;
4849: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4850: }
4851: xmlFreeURI(uri);
4852: }
4853: }
1.152 daniel 4854: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4855: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4856: ctxt->sax->error(ctxt->userData,
1.59 daniel 4857: "Space required before 'NDATA'\n");
1.123 daniel 4858: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4859: ctxt->wellFormed = 0;
1.180 daniel 4860: ctxt->disableSAX = 1;
1.59 daniel 4861: }
1.42 daniel 4862: SKIP_BLANKS;
1.152 daniel 4863: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4864: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4865: (NXT(4) == 'A')) {
4866: SKIP(5);
1.59 daniel 4867: if (!IS_BLANK(CUR)) {
4868: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4869: ctxt->sax->error(ctxt->userData,
1.59 daniel 4870: "Space required after 'NDATA'\n");
1.123 daniel 4871: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4872: ctxt->wellFormed = 0;
1.180 daniel 4873: ctxt->disableSAX = 1;
1.59 daniel 4874: }
1.42 daniel 4875: SKIP_BLANKS;
1.24 daniel 4876: ndata = xmlParseName(ctxt);
1.171 daniel 4877: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4878: (ctxt->sax->unparsedEntityDecl != NULL))
4879: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4880: literal, URI, ndata);
4881: } else {
1.171 daniel 4882: if ((ctxt->sax != NULL) &&
4883: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4884: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4885: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4886: literal, URI, NULL);
1.24 daniel 4887: }
4888: }
4889: }
1.42 daniel 4890: SKIP_BLANKS;
1.152 daniel 4891: if (RAW != '>') {
1.55 daniel 4892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4893: ctxt->sax->error(ctxt->userData,
1.31 daniel 4894: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4895: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4896: ctxt->wellFormed = 0;
1.180 daniel 4897: ctxt->disableSAX = 1;
1.187 daniel 4898: } else {
4899: if (input != ctxt->input) {
4900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4901: ctxt->sax->error(ctxt->userData,
4902: "Entity declaration doesn't start and stop in the same entity\n");
4903: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4904: ctxt->wellFormed = 0;
4905: ctxt->disableSAX = 1;
4906: }
1.40 daniel 4907: NEXT;
1.187 daniel 4908: }
1.78 daniel 4909: if (orig != NULL) {
4910: /*
1.98 daniel 4911: * Ugly mechanism to save the raw entity value.
1.78 daniel 4912: */
4913: xmlEntityPtr cur = NULL;
4914:
1.98 daniel 4915: if (isParameter) {
4916: if ((ctxt->sax != NULL) &&
4917: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4918: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4919: } else {
4920: if ((ctxt->sax != NULL) &&
4921: (ctxt->sax->getEntity != NULL))
1.120 daniel 4922: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4923: }
4924: if (cur != NULL) {
4925: if (cur->orig != NULL)
1.119 daniel 4926: xmlFree(orig);
1.98 daniel 4927: else
4928: cur->orig = orig;
4929: } else
1.119 daniel 4930: xmlFree(orig);
1.78 daniel 4931: }
1.119 daniel 4932: if (name != NULL) xmlFree(name);
4933: if (value != NULL) xmlFree(value);
4934: if (URI != NULL) xmlFree(URI);
4935: if (literal != NULL) xmlFree(literal);
4936: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4937: }
4938: }
4939:
1.50 daniel 4940: /**
1.59 daniel 4941: * xmlParseDefaultDecl:
4942: * @ctxt: an XML parser context
4943: * @value: Receive a possible fixed default value for the attribute
4944: *
4945: * Parse an attribute default declaration
4946: *
4947: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4948: *
1.99 daniel 4949: * [ VC: Required Attribute ]
1.117 daniel 4950: * if the default declaration is the keyword #REQUIRED, then the
4951: * attribute must be specified for all elements of the type in the
4952: * attribute-list declaration.
1.99 daniel 4953: *
4954: * [ VC: Attribute Default Legal ]
1.102 daniel 4955: * The declared default value must meet the lexical constraints of
4956: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4957: *
4958: * [ VC: Fixed Attribute Default ]
1.117 daniel 4959: * if an attribute has a default value declared with the #FIXED
4960: * keyword, instances of that attribute must match the default value.
1.99 daniel 4961: *
4962: * [ WFC: No < in Attribute Values ]
4963: * handled in xmlParseAttValue()
4964: *
1.59 daniel 4965: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4966: * or XML_ATTRIBUTE_FIXED.
4967: */
4968:
4969: int
1.123 daniel 4970: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4971: int val;
1.123 daniel 4972: xmlChar *ret;
1.59 daniel 4973:
4974: *value = NULL;
1.152 daniel 4975: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4976: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4977: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4978: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4979: (NXT(8) == 'D')) {
4980: SKIP(9);
4981: return(XML_ATTRIBUTE_REQUIRED);
4982: }
1.152 daniel 4983: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4984: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4985: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4986: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4987: SKIP(8);
4988: return(XML_ATTRIBUTE_IMPLIED);
4989: }
4990: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4991: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4992: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4993: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4994: SKIP(6);
4995: val = XML_ATTRIBUTE_FIXED;
4996: if (!IS_BLANK(CUR)) {
4997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4998: ctxt->sax->error(ctxt->userData,
4999: "Space required after '#FIXED'\n");
1.123 daniel 5000: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5001: ctxt->wellFormed = 0;
1.180 daniel 5002: ctxt->disableSAX = 1;
1.59 daniel 5003: }
5004: SKIP_BLANKS;
5005: }
5006: ret = xmlParseAttValue(ctxt);
1.96 daniel 5007: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 5008: if (ret == NULL) {
5009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5010: ctxt->sax->error(ctxt->userData,
1.59 daniel 5011: "Attribute default value declaration error\n");
5012: ctxt->wellFormed = 0;
1.180 daniel 5013: ctxt->disableSAX = 1;
1.59 daniel 5014: } else
5015: *value = ret;
5016: return(val);
5017: }
5018:
5019: /**
1.66 daniel 5020: * xmlParseNotationType:
5021: * @ctxt: an XML parser context
5022: *
5023: * parse an Notation attribute type.
5024: *
1.99 daniel 5025: * Note: the leading 'NOTATION' S part has already being parsed...
5026: *
1.66 daniel 5027: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5028: *
1.99 daniel 5029: * [ VC: Notation Attributes ]
1.117 daniel 5030: * Values of this type must match one of the notation names included
1.99 daniel 5031: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 5032: *
5033: * Returns: the notation attribute tree built while parsing
5034: */
5035:
5036: xmlEnumerationPtr
5037: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5038: xmlChar *name;
1.66 daniel 5039: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5040:
1.152 daniel 5041: if (RAW != '(') {
1.66 daniel 5042: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5043: ctxt->sax->error(ctxt->userData,
5044: "'(' required to start 'NOTATION'\n");
1.123 daniel 5045: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 5046: ctxt->wellFormed = 0;
1.180 daniel 5047: ctxt->disableSAX = 1;
1.66 daniel 5048: return(NULL);
5049: }
1.91 daniel 5050: SHRINK;
1.66 daniel 5051: do {
5052: NEXT;
5053: SKIP_BLANKS;
5054: name = xmlParseName(ctxt);
5055: if (name == NULL) {
5056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5057: ctxt->sax->error(ctxt->userData,
1.66 daniel 5058: "Name expected in NOTATION declaration\n");
1.123 daniel 5059: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 5060: ctxt->wellFormed = 0;
1.180 daniel 5061: ctxt->disableSAX = 1;
1.66 daniel 5062: return(ret);
5063: }
5064: cur = xmlCreateEnumeration(name);
1.119 daniel 5065: xmlFree(name);
1.66 daniel 5066: if (cur == NULL) return(ret);
5067: if (last == NULL) ret = last = cur;
5068: else {
5069: last->next = cur;
5070: last = cur;
5071: }
5072: SKIP_BLANKS;
1.152 daniel 5073: } while (RAW == '|');
5074: if (RAW != ')') {
1.66 daniel 5075: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5076: ctxt->sax->error(ctxt->userData,
1.66 daniel 5077: "')' required to finish NOTATION declaration\n");
1.123 daniel 5078: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5079: ctxt->wellFormed = 0;
1.180 daniel 5080: ctxt->disableSAX = 1;
1.170 daniel 5081: if ((last != NULL) && (last != ret))
5082: xmlFreeEnumeration(last);
1.66 daniel 5083: return(ret);
5084: }
5085: NEXT;
5086: return(ret);
5087: }
5088:
5089: /**
5090: * xmlParseEnumerationType:
5091: * @ctxt: an XML parser context
5092: *
5093: * parse an Enumeration attribute type.
5094: *
5095: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5096: *
1.99 daniel 5097: * [ VC: Enumeration ]
1.117 daniel 5098: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5099: * the declaration
5100: *
1.66 daniel 5101: * Returns: the enumeration attribute tree built while parsing
5102: */
5103:
5104: xmlEnumerationPtr
5105: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5106: xmlChar *name;
1.66 daniel 5107: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5108:
1.152 daniel 5109: if (RAW != '(') {
1.66 daniel 5110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5111: ctxt->sax->error(ctxt->userData,
1.66 daniel 5112: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5113: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5114: ctxt->wellFormed = 0;
1.180 daniel 5115: ctxt->disableSAX = 1;
1.66 daniel 5116: return(NULL);
5117: }
1.91 daniel 5118: SHRINK;
1.66 daniel 5119: do {
5120: NEXT;
5121: SKIP_BLANKS;
5122: name = xmlParseNmtoken(ctxt);
5123: if (name == NULL) {
5124: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5125: ctxt->sax->error(ctxt->userData,
1.66 daniel 5126: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5127: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5128: ctxt->wellFormed = 0;
1.180 daniel 5129: ctxt->disableSAX = 1;
1.66 daniel 5130: return(ret);
5131: }
5132: cur = xmlCreateEnumeration(name);
1.119 daniel 5133: xmlFree(name);
1.66 daniel 5134: if (cur == NULL) return(ret);
5135: if (last == NULL) ret = last = cur;
5136: else {
5137: last->next = cur;
5138: last = cur;
5139: }
5140: SKIP_BLANKS;
1.152 daniel 5141: } while (RAW == '|');
5142: if (RAW != ')') {
1.66 daniel 5143: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5144: ctxt->sax->error(ctxt->userData,
1.66 daniel 5145: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5146: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5147: ctxt->wellFormed = 0;
1.180 daniel 5148: ctxt->disableSAX = 1;
1.66 daniel 5149: return(ret);
5150: }
5151: NEXT;
5152: return(ret);
5153: }
5154:
5155: /**
1.50 daniel 5156: * xmlParseEnumeratedType:
5157: * @ctxt: an XML parser context
1.66 daniel 5158: * @tree: the enumeration tree built while parsing
1.50 daniel 5159: *
1.66 daniel 5160: * parse an Enumerated attribute type.
1.22 daniel 5161: *
5162: * [57] EnumeratedType ::= NotationType | Enumeration
5163: *
5164: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5165: *
1.50 daniel 5166: *
1.66 daniel 5167: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5168: */
5169:
1.66 daniel 5170: int
5171: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5172: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5173: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5174: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5175: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5176: SKIP(8);
5177: if (!IS_BLANK(CUR)) {
5178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5179: ctxt->sax->error(ctxt->userData,
5180: "Space required after 'NOTATION'\n");
1.123 daniel 5181: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5182: ctxt->wellFormed = 0;
1.180 daniel 5183: ctxt->disableSAX = 1;
1.66 daniel 5184: return(0);
5185: }
5186: SKIP_BLANKS;
5187: *tree = xmlParseNotationType(ctxt);
5188: if (*tree == NULL) return(0);
5189: return(XML_ATTRIBUTE_NOTATION);
5190: }
5191: *tree = xmlParseEnumerationType(ctxt);
5192: if (*tree == NULL) return(0);
5193: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5194: }
5195:
1.50 daniel 5196: /**
5197: * xmlParseAttributeType:
5198: * @ctxt: an XML parser context
1.66 daniel 5199: * @tree: the enumeration tree built while parsing
1.50 daniel 5200: *
1.59 daniel 5201: * parse the Attribute list def for an element
1.22 daniel 5202: *
5203: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5204: *
5205: * [55] StringType ::= 'CDATA'
5206: *
5207: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5208: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5209: *
1.102 daniel 5210: * Validity constraints for attribute values syntax are checked in
5211: * xmlValidateAttributeValue()
5212: *
1.99 daniel 5213: * [ VC: ID ]
1.117 daniel 5214: * Values of type ID must match the Name production. A name must not
1.99 daniel 5215: * appear more than once in an XML document as a value of this type;
5216: * i.e., ID values must uniquely identify the elements which bear them.
5217: *
5218: * [ VC: One ID per Element Type ]
1.117 daniel 5219: * No element type may have more than one ID attribute specified.
1.99 daniel 5220: *
5221: * [ VC: ID Attribute Default ]
1.117 daniel 5222: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5223: *
5224: * [ VC: IDREF ]
1.102 daniel 5225: * Values of type IDREF must match the Name production, and values
1.140 daniel 5226: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5227: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5228: * values must match the value of some ID attribute.
5229: *
5230: * [ VC: Entity Name ]
1.102 daniel 5231: * Values of type ENTITY must match the Name production, values
1.140 daniel 5232: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5233: * name of an unparsed entity declared in the DTD.
1.99 daniel 5234: *
5235: * [ VC: Name Token ]
1.102 daniel 5236: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5237: * of type NMTOKENS must match Nmtokens.
5238: *
1.69 daniel 5239: * Returns the attribute type
1.22 daniel 5240: */
1.59 daniel 5241: int
1.66 daniel 5242: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5243: SHRINK;
1.152 daniel 5244: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5245: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5246: (NXT(4) == 'A')) {
5247: SKIP(5);
1.66 daniel 5248: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5249: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5250: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5251: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5252: SKIP(6);
5253: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5254: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5255: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5256: (NXT(4) == 'F')) {
5257: SKIP(5);
1.59 daniel 5258: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5259: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5260: SKIP(2);
5261: return(XML_ATTRIBUTE_ID);
1.152 daniel 5262: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5263: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5264: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5265: SKIP(6);
1.59 daniel 5266: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5267: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5268: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5269: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5270: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5271: SKIP(8);
1.59 daniel 5272: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5273: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5274: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5275: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5276: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5277: SKIP(8);
5278: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5279: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5280: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5281: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5282: (NXT(6) == 'N')) {
5283: SKIP(7);
1.59 daniel 5284: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5285: }
1.66 daniel 5286: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5287: }
5288:
1.50 daniel 5289: /**
5290: * xmlParseAttributeListDecl:
5291: * @ctxt: an XML parser context
5292: *
5293: * : parse the Attribute list def for an element
1.22 daniel 5294: *
5295: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5296: *
5297: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5298: *
1.22 daniel 5299: */
1.55 daniel 5300: void
5301: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5302: xmlChar *elemName;
5303: xmlChar *attrName;
1.103 daniel 5304: xmlEnumerationPtr tree;
1.22 daniel 5305:
1.152 daniel 5306: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5307: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5308: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5309: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5310: (NXT(8) == 'T')) {
1.187 daniel 5311: xmlParserInputPtr input = ctxt->input;
5312:
1.40 daniel 5313: SKIP(9);
1.59 daniel 5314: if (!IS_BLANK(CUR)) {
5315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5316: ctxt->sax->error(ctxt->userData,
5317: "Space required after '<!ATTLIST'\n");
1.123 daniel 5318: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5319: ctxt->wellFormed = 0;
1.180 daniel 5320: ctxt->disableSAX = 1;
1.59 daniel 5321: }
1.42 daniel 5322: SKIP_BLANKS;
1.59 daniel 5323: elemName = xmlParseName(ctxt);
5324: if (elemName == NULL) {
1.55 daniel 5325: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5326: ctxt->sax->error(ctxt->userData,
5327: "ATTLIST: no name for Element\n");
1.123 daniel 5328: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5329: ctxt->wellFormed = 0;
1.180 daniel 5330: ctxt->disableSAX = 1;
1.22 daniel 5331: return;
5332: }
1.42 daniel 5333: SKIP_BLANKS;
1.152 daniel 5334: while (RAW != '>') {
1.123 daniel 5335: const xmlChar *check = CUR_PTR;
1.59 daniel 5336: int type;
5337: int def;
1.123 daniel 5338: xmlChar *defaultValue = NULL;
1.59 daniel 5339:
1.103 daniel 5340: tree = NULL;
1.59 daniel 5341: attrName = xmlParseName(ctxt);
5342: if (attrName == NULL) {
5343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5344: ctxt->sax->error(ctxt->userData,
5345: "ATTLIST: no name for Attribute\n");
1.123 daniel 5346: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5347: ctxt->wellFormed = 0;
1.180 daniel 5348: ctxt->disableSAX = 1;
1.59 daniel 5349: break;
5350: }
1.97 daniel 5351: GROW;
1.59 daniel 5352: if (!IS_BLANK(CUR)) {
5353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5354: ctxt->sax->error(ctxt->userData,
1.59 daniel 5355: "Space required after the attribute name\n");
1.123 daniel 5356: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5357: ctxt->wellFormed = 0;
1.180 daniel 5358: ctxt->disableSAX = 1;
1.170 daniel 5359: if (attrName != NULL)
5360: xmlFree(attrName);
5361: if (defaultValue != NULL)
5362: xmlFree(defaultValue);
1.59 daniel 5363: break;
5364: }
5365: SKIP_BLANKS;
5366:
1.66 daniel 5367: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5368: if (type <= 0) {
5369: if (attrName != NULL)
5370: xmlFree(attrName);
5371: if (defaultValue != NULL)
5372: xmlFree(defaultValue);
5373: break;
5374: }
1.22 daniel 5375:
1.97 daniel 5376: GROW;
1.59 daniel 5377: if (!IS_BLANK(CUR)) {
5378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5379: ctxt->sax->error(ctxt->userData,
1.59 daniel 5380: "Space required after the attribute type\n");
1.123 daniel 5381: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5382: ctxt->wellFormed = 0;
1.180 daniel 5383: ctxt->disableSAX = 1;
1.170 daniel 5384: if (attrName != NULL)
5385: xmlFree(attrName);
5386: if (defaultValue != NULL)
5387: xmlFree(defaultValue);
5388: if (tree != NULL)
5389: xmlFreeEnumeration(tree);
1.59 daniel 5390: break;
5391: }
1.42 daniel 5392: SKIP_BLANKS;
1.59 daniel 5393:
5394: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5395: if (def <= 0) {
5396: if (attrName != NULL)
5397: xmlFree(attrName);
5398: if (defaultValue != NULL)
5399: xmlFree(defaultValue);
5400: if (tree != NULL)
5401: xmlFreeEnumeration(tree);
5402: break;
5403: }
1.59 daniel 5404:
1.97 daniel 5405: GROW;
1.152 daniel 5406: if (RAW != '>') {
1.59 daniel 5407: if (!IS_BLANK(CUR)) {
5408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5409: ctxt->sax->error(ctxt->userData,
1.59 daniel 5410: "Space required after the attribute default value\n");
1.123 daniel 5411: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5412: ctxt->wellFormed = 0;
1.180 daniel 5413: ctxt->disableSAX = 1;
1.170 daniel 5414: if (attrName != NULL)
5415: xmlFree(attrName);
5416: if (defaultValue != NULL)
5417: xmlFree(defaultValue);
5418: if (tree != NULL)
5419: xmlFreeEnumeration(tree);
1.59 daniel 5420: break;
5421: }
5422: SKIP_BLANKS;
5423: }
1.40 daniel 5424: if (check == CUR_PTR) {
1.55 daniel 5425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5426: ctxt->sax->error(ctxt->userData,
1.59 daniel 5427: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5428: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5429: if (attrName != NULL)
5430: xmlFree(attrName);
5431: if (defaultValue != NULL)
5432: xmlFree(defaultValue);
5433: if (tree != NULL)
5434: xmlFreeEnumeration(tree);
1.22 daniel 5435: break;
5436: }
1.171 daniel 5437: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5438: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5439: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5440: type, def, defaultValue, tree);
1.59 daniel 5441: if (attrName != NULL)
1.119 daniel 5442: xmlFree(attrName);
1.59 daniel 5443: if (defaultValue != NULL)
1.119 daniel 5444: xmlFree(defaultValue);
1.97 daniel 5445: GROW;
1.22 daniel 5446: }
1.187 daniel 5447: if (RAW == '>') {
5448: if (input != ctxt->input) {
5449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5450: ctxt->sax->error(ctxt->userData,
5451: "Attribute list declaration doesn't start and stop in the same entity\n");
5452: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5453: ctxt->wellFormed = 0;
5454: ctxt->disableSAX = 1;
5455: }
1.40 daniel 5456: NEXT;
1.187 daniel 5457: }
1.22 daniel 5458:
1.119 daniel 5459: xmlFree(elemName);
1.22 daniel 5460: }
5461: }
5462:
1.50 daniel 5463: /**
1.61 daniel 5464: * xmlParseElementMixedContentDecl:
5465: * @ctxt: an XML parser context
5466: *
5467: * parse the declaration for a Mixed Element content
5468: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5469: *
5470: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5471: * '(' S? '#PCDATA' S? ')'
5472: *
1.99 daniel 5473: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5474: *
5475: * [ VC: No Duplicate Types ]
1.117 daniel 5476: * The same name must not appear more than once in a single
5477: * mixed-content declaration.
1.99 daniel 5478: *
1.61 daniel 5479: * returns: the list of the xmlElementContentPtr describing the element choices
5480: */
5481: xmlElementContentPtr
1.62 daniel 5482: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5483: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5484: xmlChar *elem = NULL;
1.61 daniel 5485:
1.97 daniel 5486: GROW;
1.152 daniel 5487: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5488: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5489: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5490: (NXT(6) == 'A')) {
5491: SKIP(7);
5492: SKIP_BLANKS;
1.91 daniel 5493: SHRINK;
1.152 daniel 5494: if (RAW == ')') {
1.187 daniel 5495: ctxt->entity = ctxt->input;
1.63 daniel 5496: NEXT;
5497: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5498: if (RAW == '*') {
1.136 daniel 5499: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5500: NEXT;
5501: }
1.63 daniel 5502: return(ret);
5503: }
1.152 daniel 5504: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5505: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5506: if (ret == NULL) return(NULL);
1.99 daniel 5507: }
1.152 daniel 5508: while (RAW == '|') {
1.64 daniel 5509: NEXT;
1.61 daniel 5510: if (elem == NULL) {
5511: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5512: if (ret == NULL) return(NULL);
5513: ret->c1 = cur;
1.64 daniel 5514: cur = ret;
1.61 daniel 5515: } else {
1.64 daniel 5516: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5517: if (n == NULL) return(NULL);
5518: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5519: cur->c2 = n;
5520: cur = n;
1.119 daniel 5521: xmlFree(elem);
1.61 daniel 5522: }
5523: SKIP_BLANKS;
5524: elem = xmlParseName(ctxt);
5525: if (elem == NULL) {
5526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5527: ctxt->sax->error(ctxt->userData,
1.61 daniel 5528: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5529: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5530: ctxt->wellFormed = 0;
1.180 daniel 5531: ctxt->disableSAX = 1;
1.61 daniel 5532: xmlFreeElementContent(cur);
5533: return(NULL);
5534: }
5535: SKIP_BLANKS;
1.97 daniel 5536: GROW;
1.61 daniel 5537: }
1.152 daniel 5538: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5539: if (elem != NULL) {
1.61 daniel 5540: cur->c2 = xmlNewElementContent(elem,
5541: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5542: xmlFree(elem);
1.66 daniel 5543: }
1.65 daniel 5544: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5545: ctxt->entity = ctxt->input;
1.64 daniel 5546: SKIP(2);
1.61 daniel 5547: } else {
1.119 daniel 5548: if (elem != NULL) xmlFree(elem);
1.61 daniel 5549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5550: ctxt->sax->error(ctxt->userData,
1.63 daniel 5551: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5552: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5553: ctxt->wellFormed = 0;
1.180 daniel 5554: ctxt->disableSAX = 1;
1.61 daniel 5555: xmlFreeElementContent(ret);
5556: return(NULL);
5557: }
5558:
5559: } else {
5560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5561: ctxt->sax->error(ctxt->userData,
1.61 daniel 5562: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5563: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5564: ctxt->wellFormed = 0;
1.180 daniel 5565: ctxt->disableSAX = 1;
1.61 daniel 5566: }
5567: return(ret);
5568: }
5569:
5570: /**
5571: * xmlParseElementChildrenContentDecl:
1.50 daniel 5572: * @ctxt: an XML parser context
5573: *
1.61 daniel 5574: * parse the declaration for a Mixed Element content
5575: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5576: *
1.61 daniel 5577: *
1.22 daniel 5578: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5579: *
5580: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5581: *
5582: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5583: *
5584: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5585: *
1.99 daniel 5586: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5587: * TODO Parameter-entity replacement text must be properly nested
5588: * with parenthetized groups. That is to say, if either of the
5589: * opening or closing parentheses in a choice, seq, or Mixed
5590: * construct is contained in the replacement text for a parameter
5591: * entity, both must be contained in the same replacement text. For
5592: * interoperability, if a parameter-entity reference appears in a
5593: * choice, seq, or Mixed construct, its replacement text should not
5594: * be empty, and neither the first nor last non-blank character of
5595: * the replacement text should be a connector (| or ,).
5596: *
1.62 daniel 5597: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5598: * hierarchy.
5599: */
5600: xmlElementContentPtr
1.62 daniel 5601: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5602: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5603: xmlChar *elem;
5604: xmlChar type = 0;
1.62 daniel 5605:
5606: SKIP_BLANKS;
1.94 daniel 5607: GROW;
1.152 daniel 5608: if (RAW == '(') {
1.63 daniel 5609: /* Recurse on first child */
1.62 daniel 5610: NEXT;
5611: SKIP_BLANKS;
5612: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5613: SKIP_BLANKS;
1.101 daniel 5614: GROW;
1.62 daniel 5615: } else {
5616: elem = xmlParseName(ctxt);
5617: if (elem == NULL) {
5618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5619: ctxt->sax->error(ctxt->userData,
1.62 daniel 5620: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5621: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5622: ctxt->wellFormed = 0;
1.180 daniel 5623: ctxt->disableSAX = 1;
1.62 daniel 5624: return(NULL);
5625: }
5626: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5627: GROW;
1.152 daniel 5628: if (RAW == '?') {
1.104 daniel 5629: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5630: NEXT;
1.152 daniel 5631: } else if (RAW == '*') {
1.104 daniel 5632: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5633: NEXT;
1.152 daniel 5634: } else if (RAW == '+') {
1.104 daniel 5635: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5636: NEXT;
5637: } else {
1.104 daniel 5638: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5639: }
1.119 daniel 5640: xmlFree(elem);
1.101 daniel 5641: GROW;
1.62 daniel 5642: }
5643: SKIP_BLANKS;
1.91 daniel 5644: SHRINK;
1.152 daniel 5645: while (RAW != ')') {
1.63 daniel 5646: /*
5647: * Each loop we parse one separator and one element.
5648: */
1.152 daniel 5649: if (RAW == ',') {
1.62 daniel 5650: if (type == 0) type = CUR;
5651:
5652: /*
5653: * Detect "Name | Name , Name" error
5654: */
5655: else if (type != CUR) {
5656: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5657: ctxt->sax->error(ctxt->userData,
1.62 daniel 5658: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5659: type);
1.123 daniel 5660: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5661: ctxt->wellFormed = 0;
1.180 daniel 5662: ctxt->disableSAX = 1;
1.170 daniel 5663: if ((op != NULL) && (op != ret))
5664: xmlFreeElementContent(op);
1.211 ! veillard 5665: if ((last != NULL) && (last != ret) &&
! 5666: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5667: xmlFreeElementContent(last);
5668: if (ret != NULL)
5669: xmlFreeElementContent(ret);
1.62 daniel 5670: return(NULL);
5671: }
1.64 daniel 5672: NEXT;
1.62 daniel 5673:
1.63 daniel 5674: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5675: if (op == NULL) {
5676: xmlFreeElementContent(ret);
5677: return(NULL);
5678: }
5679: if (last == NULL) {
5680: op->c1 = ret;
1.65 daniel 5681: ret = cur = op;
1.63 daniel 5682: } else {
5683: cur->c2 = op;
5684: op->c1 = last;
5685: cur =op;
1.65 daniel 5686: last = NULL;
1.63 daniel 5687: }
1.152 daniel 5688: } else if (RAW == '|') {
1.62 daniel 5689: if (type == 0) type = CUR;
5690:
5691: /*
1.63 daniel 5692: * Detect "Name , Name | Name" error
1.62 daniel 5693: */
5694: else if (type != CUR) {
5695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5696: ctxt->sax->error(ctxt->userData,
1.62 daniel 5697: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5698: type);
1.123 daniel 5699: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5700: ctxt->wellFormed = 0;
1.180 daniel 5701: ctxt->disableSAX = 1;
1.211 ! veillard 5702: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 5703: xmlFreeElementContent(op);
1.211 ! veillard 5704: if ((last != NULL) && (last != ret) &&
! 5705: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5706: xmlFreeElementContent(last);
5707: if (ret != NULL)
5708: xmlFreeElementContent(ret);
1.62 daniel 5709: return(NULL);
5710: }
1.64 daniel 5711: NEXT;
1.62 daniel 5712:
1.63 daniel 5713: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5714: if (op == NULL) {
1.170 daniel 5715: if ((op != NULL) && (op != ret))
5716: xmlFreeElementContent(op);
1.211 ! veillard 5717: if ((last != NULL) && (last != ret) &&
! 5718: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5719: xmlFreeElementContent(last);
5720: if (ret != NULL)
5721: xmlFreeElementContent(ret);
1.63 daniel 5722: return(NULL);
5723: }
5724: if (last == NULL) {
5725: op->c1 = ret;
1.65 daniel 5726: ret = cur = op;
1.63 daniel 5727: } else {
5728: cur->c2 = op;
5729: op->c1 = last;
5730: cur =op;
1.65 daniel 5731: last = NULL;
1.63 daniel 5732: }
1.62 daniel 5733: } else {
5734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5735: ctxt->sax->error(ctxt->userData,
1.62 daniel 5736: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5737: ctxt->wellFormed = 0;
1.180 daniel 5738: ctxt->disableSAX = 1;
1.123 daniel 5739: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5740: if ((op != NULL) && (op != ret))
5741: xmlFreeElementContent(op);
1.211 ! veillard 5742: if ((last != NULL) && (last != ret) &&
! 5743: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5744: xmlFreeElementContent(last);
5745: if (ret != NULL)
5746: xmlFreeElementContent(ret);
1.62 daniel 5747: return(NULL);
5748: }
1.101 daniel 5749: GROW;
1.62 daniel 5750: SKIP_BLANKS;
1.101 daniel 5751: GROW;
1.152 daniel 5752: if (RAW == '(') {
1.63 daniel 5753: /* Recurse on second child */
1.62 daniel 5754: NEXT;
5755: SKIP_BLANKS;
1.65 daniel 5756: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5757: SKIP_BLANKS;
5758: } else {
5759: elem = xmlParseName(ctxt);
5760: if (elem == NULL) {
5761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5762: ctxt->sax->error(ctxt->userData,
1.122 daniel 5763: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5764: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5765: ctxt->wellFormed = 0;
1.180 daniel 5766: ctxt->disableSAX = 1;
1.170 daniel 5767: if ((op != NULL) && (op != ret))
5768: xmlFreeElementContent(op);
1.211 ! veillard 5769: if ((last != NULL) && (last != ret) &&
! 5770: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5771: xmlFreeElementContent(last);
5772: if (ret != NULL)
5773: xmlFreeElementContent(ret);
1.62 daniel 5774: return(NULL);
5775: }
1.65 daniel 5776: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5777: xmlFree(elem);
1.152 daniel 5778: if (RAW == '?') {
1.105 daniel 5779: last->ocur = XML_ELEMENT_CONTENT_OPT;
5780: NEXT;
1.152 daniel 5781: } else if (RAW == '*') {
1.105 daniel 5782: last->ocur = XML_ELEMENT_CONTENT_MULT;
5783: NEXT;
1.152 daniel 5784: } else if (RAW == '+') {
1.105 daniel 5785: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5786: NEXT;
5787: } else {
5788: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5789: }
1.63 daniel 5790: }
5791: SKIP_BLANKS;
1.97 daniel 5792: GROW;
1.64 daniel 5793: }
1.65 daniel 5794: if ((cur != NULL) && (last != NULL)) {
5795: cur->c2 = last;
1.62 daniel 5796: }
1.187 daniel 5797: ctxt->entity = ctxt->input;
1.62 daniel 5798: NEXT;
1.152 daniel 5799: if (RAW == '?') {
1.62 daniel 5800: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5801: NEXT;
1.152 daniel 5802: } else if (RAW == '*') {
1.62 daniel 5803: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5804: NEXT;
1.152 daniel 5805: } else if (RAW == '+') {
1.62 daniel 5806: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5807: NEXT;
5808: }
5809: return(ret);
1.61 daniel 5810: }
5811:
5812: /**
5813: * xmlParseElementContentDecl:
5814: * @ctxt: an XML parser context
5815: * @name: the name of the element being defined.
5816: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5817: *
1.61 daniel 5818: * parse the declaration for an Element content either Mixed or Children,
5819: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5820: *
5821: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5822: *
1.61 daniel 5823: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5824: */
5825:
1.61 daniel 5826: int
1.123 daniel 5827: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5828: xmlElementContentPtr *result) {
5829:
5830: xmlElementContentPtr tree = NULL;
1.187 daniel 5831: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5832: int res;
5833:
5834: *result = NULL;
5835:
1.152 daniel 5836: if (RAW != '(') {
1.61 daniel 5837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5838: ctxt->sax->error(ctxt->userData,
1.61 daniel 5839: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5840: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5841: ctxt->wellFormed = 0;
1.180 daniel 5842: ctxt->disableSAX = 1;
1.61 daniel 5843: return(-1);
5844: }
5845: NEXT;
1.97 daniel 5846: GROW;
1.61 daniel 5847: SKIP_BLANKS;
1.152 daniel 5848: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5849: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5850: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5851: (NXT(6) == 'A')) {
1.62 daniel 5852: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5853: res = XML_ELEMENT_TYPE_MIXED;
5854: } else {
1.62 daniel 5855: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5856: res = XML_ELEMENT_TYPE_ELEMENT;
5857: }
1.187 daniel 5858: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5860: ctxt->sax->error(ctxt->userData,
5861: "Element content declaration doesn't start and stop in the same entity\n");
5862: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5863: ctxt->wellFormed = 0;
5864: ctxt->disableSAX = 1;
5865: }
1.61 daniel 5866: SKIP_BLANKS;
1.63 daniel 5867: /****************************
1.152 daniel 5868: if (RAW != ')') {
1.61 daniel 5869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5870: ctxt->sax->error(ctxt->userData,
1.61 daniel 5871: "xmlParseElementContentDecl : ')' expected\n");
5872: ctxt->wellFormed = 0;
1.180 daniel 5873: ctxt->disableSAX = 1;
1.61 daniel 5874: return(-1);
5875: }
1.63 daniel 5876: ****************************/
5877: *result = tree;
1.61 daniel 5878: return(res);
1.22 daniel 5879: }
5880:
1.50 daniel 5881: /**
5882: * xmlParseElementDecl:
5883: * @ctxt: an XML parser context
5884: *
5885: * parse an Element declaration.
1.22 daniel 5886: *
5887: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5888: *
1.99 daniel 5889: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5890: * No element type may be declared more than once
1.69 daniel 5891: *
5892: * Returns the type of the element, or -1 in case of error
1.22 daniel 5893: */
1.59 daniel 5894: int
1.55 daniel 5895: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5896: xmlChar *name;
1.59 daniel 5897: int ret = -1;
1.61 daniel 5898: xmlElementContentPtr content = NULL;
1.22 daniel 5899:
1.97 daniel 5900: GROW;
1.152 daniel 5901: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5902: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5903: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5904: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5905: (NXT(8) == 'T')) {
1.187 daniel 5906: xmlParserInputPtr input = ctxt->input;
5907:
1.40 daniel 5908: SKIP(9);
1.59 daniel 5909: if (!IS_BLANK(CUR)) {
5910: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5911: ctxt->sax->error(ctxt->userData,
1.59 daniel 5912: "Space required after 'ELEMENT'\n");
1.123 daniel 5913: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5914: ctxt->wellFormed = 0;
1.180 daniel 5915: ctxt->disableSAX = 1;
1.59 daniel 5916: }
1.42 daniel 5917: SKIP_BLANKS;
1.22 daniel 5918: name = xmlParseName(ctxt);
5919: if (name == NULL) {
1.55 daniel 5920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5921: ctxt->sax->error(ctxt->userData,
1.59 daniel 5922: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5923: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5924: ctxt->wellFormed = 0;
1.180 daniel 5925: ctxt->disableSAX = 1;
1.59 daniel 5926: return(-1);
5927: }
5928: if (!IS_BLANK(CUR)) {
5929: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5930: ctxt->sax->error(ctxt->userData,
1.59 daniel 5931: "Space required after the element name\n");
1.123 daniel 5932: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5933: ctxt->wellFormed = 0;
1.180 daniel 5934: ctxt->disableSAX = 1;
1.22 daniel 5935: }
1.42 daniel 5936: SKIP_BLANKS;
1.152 daniel 5937: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5938: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5939: (NXT(4) == 'Y')) {
5940: SKIP(5);
1.22 daniel 5941: /*
5942: * Element must always be empty.
5943: */
1.59 daniel 5944: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5945: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5946: (NXT(2) == 'Y')) {
5947: SKIP(3);
1.22 daniel 5948: /*
5949: * Element is a generic container.
5950: */
1.59 daniel 5951: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5952: } else if (RAW == '(') {
1.61 daniel 5953: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5954: } else {
1.98 daniel 5955: /*
5956: * [ WFC: PEs in Internal Subset ] error handling.
5957: */
1.152 daniel 5958: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5959: (ctxt->inputNr == 1)) {
5960: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5961: ctxt->sax->error(ctxt->userData,
5962: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5963: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5964: } else {
5965: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5966: ctxt->sax->error(ctxt->userData,
5967: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5968: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5969: }
1.61 daniel 5970: ctxt->wellFormed = 0;
1.180 daniel 5971: ctxt->disableSAX = 1;
1.119 daniel 5972: if (name != NULL) xmlFree(name);
1.61 daniel 5973: return(-1);
1.22 daniel 5974: }
1.142 daniel 5975:
5976: SKIP_BLANKS;
5977: /*
5978: * Pop-up of finished entities.
5979: */
1.152 daniel 5980: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5981: xmlPopInput(ctxt);
1.42 daniel 5982: SKIP_BLANKS;
1.142 daniel 5983:
1.152 daniel 5984: if (RAW != '>') {
1.55 daniel 5985: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5986: ctxt->sax->error(ctxt->userData,
1.31 daniel 5987: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5988: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5989: ctxt->wellFormed = 0;
1.180 daniel 5990: ctxt->disableSAX = 1;
1.61 daniel 5991: } else {
1.187 daniel 5992: if (input != ctxt->input) {
5993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5994: ctxt->sax->error(ctxt->userData,
5995: "Element declaration doesn't start and stop in the same entity\n");
5996: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5997: ctxt->wellFormed = 0;
5998: ctxt->disableSAX = 1;
5999: }
6000:
1.40 daniel 6001: NEXT;
1.171 daniel 6002: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6003: (ctxt->sax->elementDecl != NULL))
1.76 daniel 6004: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6005: content);
1.61 daniel 6006: }
1.84 daniel 6007: if (content != NULL) {
6008: xmlFreeElementContent(content);
6009: }
1.61 daniel 6010: if (name != NULL) {
1.119 daniel 6011: xmlFree(name);
1.61 daniel 6012: }
1.22 daniel 6013: }
1.59 daniel 6014: return(ret);
1.22 daniel 6015: }
6016:
1.50 daniel 6017: /**
6018: * xmlParseMarkupDecl:
6019: * @ctxt: an XML parser context
6020: *
6021: * parse Markup declarations
1.22 daniel 6022: *
6023: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6024: * NotationDecl | PI | Comment
6025: *
1.98 daniel 6026: * [ VC: Proper Declaration/PE Nesting ]
6027: * TODO Parameter-entity replacement text must be properly nested with
6028: * markup declarations. That is to say, if either the first character
6029: * or the last character of a markup declaration (markupdecl above) is
6030: * contained in the replacement text for a parameter-entity reference,
6031: * both must be contained in the same replacement text.
6032: *
6033: * [ WFC: PEs in Internal Subset ]
6034: * In the internal DTD subset, parameter-entity references can occur
6035: * only where markup declarations can occur, not within markup declarations.
6036: * (This does not apply to references that occur in external parameter
6037: * entities or to the external subset.)
1.22 daniel 6038: */
1.55 daniel 6039: void
6040: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 6041: GROW;
1.22 daniel 6042: xmlParseElementDecl(ctxt);
6043: xmlParseAttributeListDecl(ctxt);
6044: xmlParseEntityDecl(ctxt);
6045: xmlParseNotationDecl(ctxt);
6046: xmlParsePI(ctxt);
1.114 daniel 6047: xmlParseComment(ctxt);
1.98 daniel 6048: /*
6049: * This is only for internal subset. On external entities,
6050: * the replacement is done before parsing stage
6051: */
6052: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6053: xmlParsePEReference(ctxt);
1.97 daniel 6054: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 6055: }
6056:
1.50 daniel 6057: /**
1.76 daniel 6058: * xmlParseTextDecl:
6059: * @ctxt: an XML parser context
6060: *
6061: * parse an XML declaration header for external entities
6062: *
6063: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 6064: *
6065: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6066: */
6067:
1.172 daniel 6068: void
1.76 daniel 6069: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6070: xmlChar *version;
1.76 daniel 6071:
6072: /*
6073: * We know that '<?xml' is here.
6074: */
1.193 daniel 6075: if ((RAW == '<') && (NXT(1) == '?') &&
6076: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6077: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6078: SKIP(5);
6079: } else {
6080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6081: ctxt->sax->error(ctxt->userData,
6082: "Text declaration '<?xml' required\n");
6083: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6084: ctxt->wellFormed = 0;
6085: ctxt->disableSAX = 1;
6086:
6087: return;
6088: }
1.76 daniel 6089:
6090: if (!IS_BLANK(CUR)) {
6091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6092: ctxt->sax->error(ctxt->userData,
6093: "Space needed after '<?xml'\n");
1.123 daniel 6094: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6095: ctxt->wellFormed = 0;
1.180 daniel 6096: ctxt->disableSAX = 1;
1.76 daniel 6097: }
6098: SKIP_BLANKS;
6099:
6100: /*
6101: * We may have the VersionInfo here.
6102: */
6103: version = xmlParseVersionInfo(ctxt);
6104: if (version == NULL)
6105: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6106: ctxt->input->version = version;
1.76 daniel 6107:
6108: /*
6109: * We must have the encoding declaration
6110: */
6111: if (!IS_BLANK(CUR)) {
6112: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6113: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6114: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6115: ctxt->wellFormed = 0;
1.180 daniel 6116: ctxt->disableSAX = 1;
1.76 daniel 6117: }
1.195 daniel 6118: xmlParseEncodingDecl(ctxt);
1.193 daniel 6119: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6120: /*
6121: * The XML REC instructs us to stop parsing right here
6122: */
6123: return;
6124: }
1.76 daniel 6125:
6126: SKIP_BLANKS;
1.152 daniel 6127: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6128: SKIP(2);
1.152 daniel 6129: } else if (RAW == '>') {
1.76 daniel 6130: /* Deprecated old WD ... */
6131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6132: ctxt->sax->error(ctxt->userData,
6133: "XML declaration must end-up with '?>'\n");
1.123 daniel 6134: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6135: ctxt->wellFormed = 0;
1.180 daniel 6136: ctxt->disableSAX = 1;
1.76 daniel 6137: NEXT;
6138: } else {
6139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6140: ctxt->sax->error(ctxt->userData,
6141: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6142: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6143: ctxt->wellFormed = 0;
1.180 daniel 6144: ctxt->disableSAX = 1;
1.76 daniel 6145: MOVETO_ENDTAG(CUR_PTR);
6146: NEXT;
6147: }
6148: }
6149:
6150: /*
6151: * xmlParseConditionalSections
6152: * @ctxt: an XML parser context
6153: *
6154: * TODO : Conditionnal section are not yet supported !
6155: *
6156: * [61] conditionalSect ::= includeSect | ignoreSect
6157: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6158: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6159: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6160: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6161: */
6162:
6163: void
6164: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6165: SKIP(3);
6166: SKIP_BLANKS;
1.168 daniel 6167: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6168: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6169: (NXT(6) == 'E')) {
1.165 daniel 6170: SKIP(7);
1.168 daniel 6171: SKIP_BLANKS;
6172: if (RAW != '[') {
6173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6174: ctxt->sax->error(ctxt->userData,
6175: "XML conditional section '[' expected\n");
6176: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6177: ctxt->wellFormed = 0;
1.180 daniel 6178: ctxt->disableSAX = 1;
1.168 daniel 6179: } else {
6180: NEXT;
6181: }
1.165 daniel 6182: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6183: (NXT(2) != '>'))) {
6184: const xmlChar *check = CUR_PTR;
6185: int cons = ctxt->input->consumed;
6186: int tok = ctxt->token;
6187:
6188: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6189: xmlParseConditionalSections(ctxt);
6190: } else if (IS_BLANK(CUR)) {
6191: NEXT;
6192: } else if (RAW == '%') {
6193: xmlParsePEReference(ctxt);
6194: } else
6195: xmlParseMarkupDecl(ctxt);
6196:
6197: /*
6198: * Pop-up of finished entities.
6199: */
6200: while ((RAW == 0) && (ctxt->inputNr > 1))
6201: xmlPopInput(ctxt);
6202:
6203: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6204: (tok == ctxt->token)) {
6205: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6206: ctxt->sax->error(ctxt->userData,
6207: "Content error in the external subset\n");
6208: ctxt->wellFormed = 0;
1.180 daniel 6209: ctxt->disableSAX = 1;
1.165 daniel 6210: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6211: break;
6212: }
6213: }
1.168 daniel 6214: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6215: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6216: int state;
6217:
1.168 daniel 6218: SKIP(6);
6219: SKIP_BLANKS;
6220: if (RAW != '[') {
6221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6222: ctxt->sax->error(ctxt->userData,
6223: "XML conditional section '[' expected\n");
6224: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6225: ctxt->wellFormed = 0;
1.180 daniel 6226: ctxt->disableSAX = 1;
1.168 daniel 6227: } else {
6228: NEXT;
6229: }
1.171 daniel 6230:
1.143 daniel 6231: /*
1.171 daniel 6232: * Parse up to the end of the conditionnal section
6233: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6234: */
1.171 daniel 6235: state = ctxt->disableSAX;
1.165 daniel 6236: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6237: (NXT(2) != '>'))) {
1.171 daniel 6238: const xmlChar *check = CUR_PTR;
6239: int cons = ctxt->input->consumed;
6240: int tok = ctxt->token;
6241:
6242: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6243: xmlParseConditionalSections(ctxt);
6244: } else if (IS_BLANK(CUR)) {
6245: NEXT;
6246: } else if (RAW == '%') {
6247: xmlParsePEReference(ctxt);
6248: } else
6249: xmlParseMarkupDecl(ctxt);
6250:
1.165 daniel 6251: /*
6252: * Pop-up of finished entities.
6253: */
6254: while ((RAW == 0) && (ctxt->inputNr > 1))
6255: xmlPopInput(ctxt);
1.143 daniel 6256:
1.171 daniel 6257: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6258: (tok == ctxt->token)) {
6259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6260: ctxt->sax->error(ctxt->userData,
6261: "Content error in the external subset\n");
6262: ctxt->wellFormed = 0;
1.180 daniel 6263: ctxt->disableSAX = 1;
1.171 daniel 6264: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6265: break;
6266: }
1.165 daniel 6267: }
1.171 daniel 6268: ctxt->disableSAX = state;
1.168 daniel 6269: } else {
6270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6271: ctxt->sax->error(ctxt->userData,
6272: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6273: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6274: ctxt->wellFormed = 0;
1.180 daniel 6275: ctxt->disableSAX = 1;
1.143 daniel 6276: }
6277:
1.152 daniel 6278: if (RAW == 0)
1.143 daniel 6279: SHRINK;
6280:
1.152 daniel 6281: if (RAW == 0) {
1.76 daniel 6282: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6283: ctxt->sax->error(ctxt->userData,
6284: "XML conditional section not closed\n");
1.123 daniel 6285: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6286: ctxt->wellFormed = 0;
1.180 daniel 6287: ctxt->disableSAX = 1;
1.143 daniel 6288: } else {
6289: SKIP(3);
1.76 daniel 6290: }
6291: }
6292:
6293: /**
1.124 daniel 6294: * xmlParseExternalSubset:
1.76 daniel 6295: * @ctxt: an XML parser context
1.124 daniel 6296: * @ExternalID: the external identifier
6297: * @SystemID: the system identifier (or URL)
1.76 daniel 6298: *
6299: * parse Markup declarations from an external subset
6300: *
6301: * [30] extSubset ::= textDecl? extSubsetDecl
6302: *
6303: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6304: */
6305: void
1.123 daniel 6306: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6307: const xmlChar *SystemID) {
1.132 daniel 6308: GROW;
1.152 daniel 6309: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6310: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6311: (NXT(4) == 'l')) {
1.172 daniel 6312: xmlParseTextDecl(ctxt);
1.193 daniel 6313: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6314: /*
6315: * The XML REC instructs us to stop parsing right here
6316: */
6317: ctxt->instate = XML_PARSER_EOF;
6318: return;
6319: }
1.76 daniel 6320: }
1.79 daniel 6321: if (ctxt->myDoc == NULL) {
1.116 daniel 6322: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6323: }
6324: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6325: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6326:
1.96 daniel 6327: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6328: ctxt->external = 1;
1.152 daniel 6329: while (((RAW == '<') && (NXT(1) == '?')) ||
6330: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6331: IS_BLANK(CUR)) {
1.123 daniel 6332: const xmlChar *check = CUR_PTR;
1.115 daniel 6333: int cons = ctxt->input->consumed;
1.164 daniel 6334: int tok = ctxt->token;
1.115 daniel 6335:
1.152 daniel 6336: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6337: xmlParseConditionalSections(ctxt);
6338: } else if (IS_BLANK(CUR)) {
6339: NEXT;
1.152 daniel 6340: } else if (RAW == '%') {
1.76 daniel 6341: xmlParsePEReference(ctxt);
6342: } else
6343: xmlParseMarkupDecl(ctxt);
1.77 daniel 6344:
6345: /*
6346: * Pop-up of finished entities.
6347: */
1.166 daniel 6348: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6349: xmlPopInput(ctxt);
6350:
1.164 daniel 6351: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6352: (tok == ctxt->token)) {
1.115 daniel 6353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6354: ctxt->sax->error(ctxt->userData,
6355: "Content error in the external subset\n");
6356: ctxt->wellFormed = 0;
1.180 daniel 6357: ctxt->disableSAX = 1;
1.123 daniel 6358: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6359: break;
6360: }
1.76 daniel 6361: }
6362:
1.152 daniel 6363: if (RAW != 0) {
1.76 daniel 6364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6365: ctxt->sax->error(ctxt->userData,
6366: "Extra content at the end of the document\n");
1.123 daniel 6367: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6368: ctxt->wellFormed = 0;
1.180 daniel 6369: ctxt->disableSAX = 1;
1.76 daniel 6370: }
6371:
6372: }
6373:
6374: /**
1.77 daniel 6375: * xmlParseReference:
6376: * @ctxt: an XML parser context
6377: *
6378: * parse and handle entity references in content, depending on the SAX
6379: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6380: * CharRef, a predefined entity, if there is no reference() callback.
6381: * or if the parser was asked to switch to that mode.
1.77 daniel 6382: *
6383: * [67] Reference ::= EntityRef | CharRef
6384: */
6385: void
6386: xmlParseReference(xmlParserCtxtPtr ctxt) {
6387: xmlEntityPtr ent;
1.123 daniel 6388: xmlChar *val;
1.152 daniel 6389: if (RAW != '&') return;
1.77 daniel 6390:
1.113 daniel 6391: if (ctxt->inputNr > 1) {
1.123 daniel 6392: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6393:
1.171 daniel 6394: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6395: (!ctxt->disableSAX))
1.113 daniel 6396: ctxt->sax->characters(ctxt->userData, cur, 1);
6397: if (ctxt->token == '&')
6398: ctxt->token = 0;
6399: else {
6400: SKIP(1);
6401: }
6402: return;
6403: }
1.77 daniel 6404: if (NXT(1) == '#') {
1.152 daniel 6405: int i = 0;
1.153 daniel 6406: xmlChar out[10];
6407: int hex = NXT(2);
1.77 daniel 6408: int val = xmlParseCharRef(ctxt);
1.152 daniel 6409:
1.198 daniel 6410: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 6411: /*
6412: * So we are using non-UTF-8 buffers
6413: * Check that the char fit on 8bits, if not
6414: * generate a CharRef.
6415: */
6416: if (val <= 0xFF) {
6417: out[0] = val;
6418: out[1] = 0;
1.171 daniel 6419: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6420: (!ctxt->disableSAX))
1.153 daniel 6421: ctxt->sax->characters(ctxt->userData, out, 1);
6422: } else {
6423: if ((hex == 'x') || (hex == 'X'))
6424: sprintf((char *)out, "#x%X", val);
6425: else
6426: sprintf((char *)out, "#%d", val);
1.171 daniel 6427: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6428: (!ctxt->disableSAX))
1.153 daniel 6429: ctxt->sax->reference(ctxt->userData, out);
6430: }
6431: } else {
6432: /*
6433: * Just encode the value in UTF-8
6434: */
6435: COPY_BUF(0 ,out, i, val);
6436: out[i] = 0;
1.171 daniel 6437: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6438: (!ctxt->disableSAX))
1.153 daniel 6439: ctxt->sax->characters(ctxt->userData, out, i);
6440: }
1.77 daniel 6441: } else {
6442: ent = xmlParseEntityRef(ctxt);
6443: if (ent == NULL) return;
6444: if ((ent->name != NULL) &&
1.159 daniel 6445: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6446: xmlNodePtr list = NULL;
6447: int ret;
6448:
6449:
6450: /*
6451: * The first reference to the entity trigger a parsing phase
6452: * where the ent->children is filled with the result from
6453: * the parsing.
6454: */
6455: if (ent->children == NULL) {
6456: xmlChar *value;
6457: value = ent->content;
6458:
6459: /*
6460: * Check that this entity is well formed
6461: */
6462: if ((value != NULL) &&
6463: (value[1] == 0) && (value[0] == '<') &&
6464: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6465: /*
6466: * TODO: get definite answer on this !!!
6467: * Lots of entity decls are used to declare a single
6468: * char
6469: * <!ENTITY lt "<">
6470: * Which seems to be valid since
6471: * 2.4: The ampersand character (&) and the left angle
6472: * bracket (<) may appear in their literal form only
6473: * when used ... They are also legal within the literal
6474: * entity value of an internal entity declaration;i
6475: * see "4.3.2 Well-Formed Parsed Entities".
6476: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6477: * Looking at the OASIS test suite and James Clark
6478: * tests, this is broken. However the XML REC uses
6479: * it. Is the XML REC not well-formed ????
6480: * This is a hack to avoid this problem
6481: */
6482: list = xmlNewDocText(ctxt->myDoc, value);
6483: if (list != NULL) {
6484: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6485: (ent->children == NULL)) {
6486: ent->children = list;
6487: ent->last = list;
6488: list->parent = (xmlNodePtr) ent;
6489: } else {
6490: xmlFreeNodeList(list);
6491: }
6492: } else if (list != NULL) {
6493: xmlFreeNodeList(list);
6494: }
1.181 daniel 6495: } else {
1.180 daniel 6496: /*
6497: * 4.3.2: An internal general parsed entity is well-formed
6498: * if its replacement text matches the production labeled
6499: * content.
6500: */
1.185 daniel 6501: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6502: ctxt->depth++;
1.180 daniel 6503: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6504: ctxt->sax, NULL, ctxt->depth,
6505: value, &list);
6506: ctxt->depth--;
6507: } else if (ent->etype ==
6508: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6509: ctxt->depth++;
1.180 daniel 6510: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6511: ctxt->sax, NULL, ctxt->depth,
6512: ent->SystemID, ent->ExternalID, &list);
6513: ctxt->depth--;
6514: } else {
1.180 daniel 6515: ret = -1;
6516: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6517: ctxt->sax->error(ctxt->userData,
6518: "Internal: invalid entity type\n");
6519: }
1.185 daniel 6520: if (ret == XML_ERR_ENTITY_LOOP) {
6521: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6522: ctxt->sax->error(ctxt->userData,
6523: "Detected entity reference loop\n");
6524: ctxt->wellFormed = 0;
6525: ctxt->disableSAX = 1;
6526: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6527: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6528: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6529: (ent->children == NULL)) {
6530: ent->children = list;
6531: while (list != NULL) {
6532: list->parent = (xmlNodePtr) ent;
6533: if (list->next == NULL)
6534: ent->last = list;
6535: list = list->next;
6536: }
6537: } else {
6538: xmlFreeNodeList(list);
6539: }
6540: } else if (ret > 0) {
6541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6542: ctxt->sax->error(ctxt->userData,
6543: "Entity value required\n");
6544: ctxt->errNo = ret;
6545: ctxt->wellFormed = 0;
6546: ctxt->disableSAX = 1;
6547: } else if (list != NULL) {
6548: xmlFreeNodeList(list);
6549: }
6550: }
6551: }
1.113 daniel 6552: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6553: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6554: /*
6555: * Create a node.
6556: */
6557: ctxt->sax->reference(ctxt->userData, ent->name);
6558: return;
6559: } else if (ctxt->replaceEntities) {
6560: xmlParserInputPtr input;
1.79 daniel 6561:
1.113 daniel 6562: input = xmlNewEntityInputStream(ctxt, ent);
6563: xmlPushInput(ctxt, input);
1.167 daniel 6564: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6565: (RAW == '<') && (NXT(1) == '?') &&
6566: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6567: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6568: xmlParseTextDecl(ctxt);
1.193 daniel 6569: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6570: /*
6571: * The XML REC instructs us to stop parsing right here
6572: */
6573: ctxt->instate = XML_PARSER_EOF;
6574: return;
6575: }
1.199 daniel 6576: if (input->standalone == 1) {
1.167 daniel 6577: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6578: ctxt->sax->error(ctxt->userData,
6579: "external parsed entities cannot be standalone\n");
6580: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6581: ctxt->wellFormed = 0;
1.180 daniel 6582: ctxt->disableSAX = 1;
1.167 daniel 6583: }
6584: }
1.179 daniel 6585: /*
6586: * !!! TODO: build the tree under the entity first
6587: * 1234
6588: */
1.113 daniel 6589: return;
6590: }
1.77 daniel 6591: }
6592: val = ent->content;
6593: if (val == NULL) return;
6594: /*
6595: * inline the entity.
6596: */
1.171 daniel 6597: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6598: (!ctxt->disableSAX))
1.77 daniel 6599: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6600: }
1.24 daniel 6601: }
6602:
1.50 daniel 6603: /**
6604: * xmlParseEntityRef:
6605: * @ctxt: an XML parser context
6606: *
6607: * parse ENTITY references declarations
1.24 daniel 6608: *
6609: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6610: *
1.98 daniel 6611: * [ WFC: Entity Declared ]
6612: * In a document without any DTD, a document with only an internal DTD
6613: * subset which contains no parameter entity references, or a document
6614: * with "standalone='yes'", the Name given in the entity reference
6615: * must match that in an entity declaration, except that well-formed
6616: * documents need not declare any of the following entities: amp, lt,
6617: * gt, apos, quot. The declaration of a parameter entity must precede
6618: * any reference to it. Similarly, the declaration of a general entity
6619: * must precede any reference to it which appears in a default value in an
6620: * attribute-list declaration. Note that if entities are declared in the
6621: * external subset or in external parameter entities, a non-validating
6622: * processor is not obligated to read and process their declarations;
6623: * for such documents, the rule that an entity must be declared is a
6624: * well-formedness constraint only if standalone='yes'.
6625: *
6626: * [ WFC: Parsed Entity ]
6627: * An entity reference must not contain the name of an unparsed entity
6628: *
1.77 daniel 6629: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6630: */
1.77 daniel 6631: xmlEntityPtr
1.55 daniel 6632: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6633: xmlChar *name;
1.72 daniel 6634: xmlEntityPtr ent = NULL;
1.24 daniel 6635:
1.91 daniel 6636: GROW;
1.111 daniel 6637:
1.152 daniel 6638: if (RAW == '&') {
1.40 daniel 6639: NEXT;
1.24 daniel 6640: name = xmlParseName(ctxt);
6641: if (name == NULL) {
1.55 daniel 6642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6643: ctxt->sax->error(ctxt->userData,
6644: "xmlParseEntityRef: no name\n");
1.123 daniel 6645: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6646: ctxt->wellFormed = 0;
1.180 daniel 6647: ctxt->disableSAX = 1;
1.24 daniel 6648: } else {
1.152 daniel 6649: if (RAW == ';') {
1.40 daniel 6650: NEXT;
1.24 daniel 6651: /*
1.77 daniel 6652: * Ask first SAX for entity resolution, otherwise try the
6653: * predefined set.
6654: */
6655: if (ctxt->sax != NULL) {
6656: if (ctxt->sax->getEntity != NULL)
6657: ent = ctxt->sax->getEntity(ctxt->userData, name);
6658: if (ent == NULL)
6659: ent = xmlGetPredefinedEntity(name);
6660: }
6661: /*
1.98 daniel 6662: * [ WFC: Entity Declared ]
6663: * In a document without any DTD, a document with only an
6664: * internal DTD subset which contains no parameter entity
6665: * references, or a document with "standalone='yes'", the
6666: * Name given in the entity reference must match that in an
6667: * entity declaration, except that well-formed documents
6668: * need not declare any of the following entities: amp, lt,
6669: * gt, apos, quot.
6670: * The declaration of a parameter entity must precede any
6671: * reference to it.
6672: * Similarly, the declaration of a general entity must
6673: * precede any reference to it which appears in a default
6674: * value in an attribute-list declaration. Note that if
6675: * entities are declared in the external subset or in
6676: * external parameter entities, a non-validating processor
6677: * is not obligated to read and process their declarations;
6678: * for such documents, the rule that an entity must be
6679: * declared is a well-formedness constraint only if
6680: * standalone='yes'.
1.59 daniel 6681: */
1.77 daniel 6682: if (ent == NULL) {
1.98 daniel 6683: if ((ctxt->standalone == 1) ||
6684: ((ctxt->hasExternalSubset == 0) &&
6685: (ctxt->hasPErefs == 0))) {
6686: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6687: ctxt->sax->error(ctxt->userData,
6688: "Entity '%s' not defined\n", name);
1.123 daniel 6689: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6690: ctxt->wellFormed = 0;
1.180 daniel 6691: ctxt->disableSAX = 1;
1.77 daniel 6692: } else {
1.98 daniel 6693: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6694: ctxt->sax->warning(ctxt->userData,
6695: "Entity '%s' not defined\n", name);
1.123 daniel 6696: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6697: }
1.77 daniel 6698: }
1.59 daniel 6699:
6700: /*
1.98 daniel 6701: * [ WFC: Parsed Entity ]
6702: * An entity reference must not contain the name of an
6703: * unparsed entity
6704: */
1.159 daniel 6705: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6706: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6707: ctxt->sax->error(ctxt->userData,
6708: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6709: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6710: ctxt->wellFormed = 0;
1.180 daniel 6711: ctxt->disableSAX = 1;
1.98 daniel 6712: }
6713:
6714: /*
6715: * [ WFC: No External Entity References ]
6716: * Attribute values cannot contain direct or indirect
6717: * entity references to external entities.
6718: */
6719: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6720: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6722: ctxt->sax->error(ctxt->userData,
6723: "Attribute references external entity '%s'\n", name);
1.123 daniel 6724: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6725: ctxt->wellFormed = 0;
1.180 daniel 6726: ctxt->disableSAX = 1;
1.98 daniel 6727: }
6728: /*
6729: * [ WFC: No < in Attribute Values ]
6730: * The replacement text of any entity referred to directly or
6731: * indirectly in an attribute value (other than "<") must
6732: * not contain a <.
1.59 daniel 6733: */
1.98 daniel 6734: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6735: (ent != NULL) &&
6736: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6737: (ent->content != NULL) &&
6738: (xmlStrchr(ent->content, '<'))) {
6739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6740: ctxt->sax->error(ctxt->userData,
6741: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6742: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6743: ctxt->wellFormed = 0;
1.180 daniel 6744: ctxt->disableSAX = 1;
1.98 daniel 6745: }
6746:
6747: /*
6748: * Internal check, no parameter entities here ...
6749: */
6750: else {
1.159 daniel 6751: switch (ent->etype) {
1.59 daniel 6752: case XML_INTERNAL_PARAMETER_ENTITY:
6753: case XML_EXTERNAL_PARAMETER_ENTITY:
6754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6755: ctxt->sax->error(ctxt->userData,
1.59 daniel 6756: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6757: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6758: ctxt->wellFormed = 0;
1.180 daniel 6759: ctxt->disableSAX = 1;
6760: break;
6761: default:
1.59 daniel 6762: break;
6763: }
6764: }
6765:
6766: /*
1.98 daniel 6767: * [ WFC: No Recursion ]
1.117 daniel 6768: * TODO A parsed entity must not contain a recursive reference
6769: * to itself, either directly or indirectly.
1.59 daniel 6770: */
1.77 daniel 6771:
1.24 daniel 6772: } else {
1.55 daniel 6773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6774: ctxt->sax->error(ctxt->userData,
1.59 daniel 6775: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6776: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6777: ctxt->wellFormed = 0;
1.180 daniel 6778: ctxt->disableSAX = 1;
1.24 daniel 6779: }
1.119 daniel 6780: xmlFree(name);
1.24 daniel 6781: }
6782: }
1.77 daniel 6783: return(ent);
1.24 daniel 6784: }
1.135 daniel 6785: /**
6786: * xmlParseStringEntityRef:
6787: * @ctxt: an XML parser context
6788: * @str: a pointer to an index in the string
6789: *
6790: * parse ENTITY references declarations, but this version parses it from
6791: * a string value.
6792: *
6793: * [68] EntityRef ::= '&' Name ';'
6794: *
6795: * [ WFC: Entity Declared ]
6796: * In a document without any DTD, a document with only an internal DTD
6797: * subset which contains no parameter entity references, or a document
6798: * with "standalone='yes'", the Name given in the entity reference
6799: * must match that in an entity declaration, except that well-formed
6800: * documents need not declare any of the following entities: amp, lt,
6801: * gt, apos, quot. The declaration of a parameter entity must precede
6802: * any reference to it. Similarly, the declaration of a general entity
6803: * must precede any reference to it which appears in a default value in an
6804: * attribute-list declaration. Note that if entities are declared in the
6805: * external subset or in external parameter entities, a non-validating
6806: * processor is not obligated to read and process their declarations;
6807: * for such documents, the rule that an entity must be declared is a
6808: * well-formedness constraint only if standalone='yes'.
6809: *
6810: * [ WFC: Parsed Entity ]
6811: * An entity reference must not contain the name of an unparsed entity
6812: *
6813: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6814: * is updated to the current location in the string.
6815: */
6816: xmlEntityPtr
6817: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6818: xmlChar *name;
6819: const xmlChar *ptr;
6820: xmlChar cur;
6821: xmlEntityPtr ent = NULL;
6822:
1.156 daniel 6823: if ((str == NULL) || (*str == NULL))
6824: return(NULL);
1.135 daniel 6825: ptr = *str;
6826: cur = *ptr;
6827: if (cur == '&') {
6828: ptr++;
6829: cur = *ptr;
6830: name = xmlParseStringName(ctxt, &ptr);
6831: if (name == NULL) {
6832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6833: ctxt->sax->error(ctxt->userData,
6834: "xmlParseEntityRef: no name\n");
6835: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6836: ctxt->wellFormed = 0;
1.180 daniel 6837: ctxt->disableSAX = 1;
1.135 daniel 6838: } else {
1.185 daniel 6839: if (*ptr == ';') {
6840: ptr++;
1.135 daniel 6841: /*
6842: * Ask first SAX for entity resolution, otherwise try the
6843: * predefined set.
6844: */
6845: if (ctxt->sax != NULL) {
6846: if (ctxt->sax->getEntity != NULL)
6847: ent = ctxt->sax->getEntity(ctxt->userData, name);
6848: if (ent == NULL)
6849: ent = xmlGetPredefinedEntity(name);
6850: }
6851: /*
6852: * [ WFC: Entity Declared ]
6853: * In a document without any DTD, a document with only an
6854: * internal DTD subset which contains no parameter entity
6855: * references, or a document with "standalone='yes'", the
6856: * Name given in the entity reference must match that in an
6857: * entity declaration, except that well-formed documents
6858: * need not declare any of the following entities: amp, lt,
6859: * gt, apos, quot.
6860: * The declaration of a parameter entity must precede any
6861: * reference to it.
6862: * Similarly, the declaration of a general entity must
6863: * precede any reference to it which appears in a default
6864: * value in an attribute-list declaration. Note that if
6865: * entities are declared in the external subset or in
6866: * external parameter entities, a non-validating processor
6867: * is not obligated to read and process their declarations;
6868: * for such documents, the rule that an entity must be
6869: * declared is a well-formedness constraint only if
6870: * standalone='yes'.
6871: */
6872: if (ent == NULL) {
6873: if ((ctxt->standalone == 1) ||
6874: ((ctxt->hasExternalSubset == 0) &&
6875: (ctxt->hasPErefs == 0))) {
6876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6877: ctxt->sax->error(ctxt->userData,
6878: "Entity '%s' not defined\n", name);
6879: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6880: ctxt->wellFormed = 0;
1.180 daniel 6881: ctxt->disableSAX = 1;
1.135 daniel 6882: } else {
6883: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6884: ctxt->sax->warning(ctxt->userData,
6885: "Entity '%s' not defined\n", name);
6886: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6887: }
6888: }
6889:
6890: /*
6891: * [ WFC: Parsed Entity ]
6892: * An entity reference must not contain the name of an
6893: * unparsed entity
6894: */
1.159 daniel 6895: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6897: ctxt->sax->error(ctxt->userData,
6898: "Entity reference to unparsed entity %s\n", name);
6899: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6900: ctxt->wellFormed = 0;
1.180 daniel 6901: ctxt->disableSAX = 1;
1.135 daniel 6902: }
6903:
6904: /*
6905: * [ WFC: No External Entity References ]
6906: * Attribute values cannot contain direct or indirect
6907: * entity references to external entities.
6908: */
6909: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6910: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6911: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6912: ctxt->sax->error(ctxt->userData,
6913: "Attribute references external entity '%s'\n", name);
6914: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6915: ctxt->wellFormed = 0;
1.180 daniel 6916: ctxt->disableSAX = 1;
1.135 daniel 6917: }
6918: /*
6919: * [ WFC: No < in Attribute Values ]
6920: * The replacement text of any entity referred to directly or
6921: * indirectly in an attribute value (other than "<") must
6922: * not contain a <.
6923: */
6924: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6925: (ent != NULL) &&
6926: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6927: (ent->content != NULL) &&
6928: (xmlStrchr(ent->content, '<'))) {
6929: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6930: ctxt->sax->error(ctxt->userData,
6931: "'<' in entity '%s' is not allowed in attributes values\n", name);
6932: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6933: ctxt->wellFormed = 0;
1.180 daniel 6934: ctxt->disableSAX = 1;
1.135 daniel 6935: }
6936:
6937: /*
6938: * Internal check, no parameter entities here ...
6939: */
6940: else {
1.159 daniel 6941: switch (ent->etype) {
1.135 daniel 6942: case XML_INTERNAL_PARAMETER_ENTITY:
6943: case XML_EXTERNAL_PARAMETER_ENTITY:
6944: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6945: ctxt->sax->error(ctxt->userData,
6946: "Attempt to reference the parameter entity '%s'\n", name);
6947: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6948: ctxt->wellFormed = 0;
1.180 daniel 6949: ctxt->disableSAX = 1;
6950: break;
6951: default:
1.135 daniel 6952: break;
6953: }
6954: }
6955:
6956: /*
6957: * [ WFC: No Recursion ]
6958: * TODO A parsed entity must not contain a recursive reference
6959: * to itself, either directly or indirectly.
6960: */
6961:
6962: } else {
6963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6964: ctxt->sax->error(ctxt->userData,
6965: "xmlParseEntityRef: expecting ';'\n");
6966: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6967: ctxt->wellFormed = 0;
1.180 daniel 6968: ctxt->disableSAX = 1;
1.135 daniel 6969: }
6970: xmlFree(name);
6971: }
6972: }
1.185 daniel 6973: *str = ptr;
1.135 daniel 6974: return(ent);
6975: }
1.24 daniel 6976:
1.50 daniel 6977: /**
6978: * xmlParsePEReference:
6979: * @ctxt: an XML parser context
6980: *
6981: * parse PEReference declarations
1.77 daniel 6982: * The entity content is handled directly by pushing it's content as
6983: * a new input stream.
1.22 daniel 6984: *
6985: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6986: *
1.98 daniel 6987: * [ WFC: No Recursion ]
6988: * TODO A parsed entity must not contain a recursive
6989: * reference to itself, either directly or indirectly.
6990: *
6991: * [ WFC: Entity Declared ]
6992: * In a document without any DTD, a document with only an internal DTD
6993: * subset which contains no parameter entity references, or a document
6994: * with "standalone='yes'", ... ... The declaration of a parameter
6995: * entity must precede any reference to it...
6996: *
6997: * [ VC: Entity Declared ]
6998: * In a document with an external subset or external parameter entities
6999: * with "standalone='no'", ... ... The declaration of a parameter entity
7000: * must precede any reference to it...
7001: *
7002: * [ WFC: In DTD ]
7003: * Parameter-entity references may only appear in the DTD.
7004: * NOTE: misleading but this is handled.
1.22 daniel 7005: */
1.77 daniel 7006: void
1.55 daniel 7007: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 7008: xmlChar *name;
1.72 daniel 7009: xmlEntityPtr entity = NULL;
1.50 daniel 7010: xmlParserInputPtr input;
1.22 daniel 7011:
1.152 daniel 7012: if (RAW == '%') {
1.40 daniel 7013: NEXT;
1.22 daniel 7014: name = xmlParseName(ctxt);
7015: if (name == NULL) {
1.55 daniel 7016: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7017: ctxt->sax->error(ctxt->userData,
7018: "xmlParsePEReference: no name\n");
1.123 daniel 7019: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7020: ctxt->wellFormed = 0;
1.180 daniel 7021: ctxt->disableSAX = 1;
1.22 daniel 7022: } else {
1.152 daniel 7023: if (RAW == ';') {
1.40 daniel 7024: NEXT;
1.98 daniel 7025: if ((ctxt->sax != NULL) &&
7026: (ctxt->sax->getParameterEntity != NULL))
7027: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7028: name);
1.45 daniel 7029: if (entity == NULL) {
1.98 daniel 7030: /*
7031: * [ WFC: Entity Declared ]
7032: * In a document without any DTD, a document with only an
7033: * internal DTD subset which contains no parameter entity
7034: * references, or a document with "standalone='yes'", ...
7035: * ... The declaration of a parameter entity must precede
7036: * any reference to it...
7037: */
7038: if ((ctxt->standalone == 1) ||
7039: ((ctxt->hasExternalSubset == 0) &&
7040: (ctxt->hasPErefs == 0))) {
7041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7042: ctxt->sax->error(ctxt->userData,
7043: "PEReference: %%%s; not found\n", name);
1.123 daniel 7044: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 7045: ctxt->wellFormed = 0;
1.180 daniel 7046: ctxt->disableSAX = 1;
1.98 daniel 7047: } else {
7048: /*
7049: * [ VC: Entity Declared ]
7050: * In a document with an external subset or external
7051: * parameter entities with "standalone='no'", ...
7052: * ... The declaration of a parameter entity must precede
7053: * any reference to it...
7054: */
7055: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7056: ctxt->sax->warning(ctxt->userData,
7057: "PEReference: %%%s; not found\n", name);
7058: ctxt->valid = 0;
7059: }
1.50 daniel 7060: } else {
1.98 daniel 7061: /*
7062: * Internal checking in case the entity quest barfed
7063: */
1.159 daniel 7064: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7065: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7066: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7067: ctxt->sax->warning(ctxt->userData,
7068: "Internal: %%%s; is not a parameter entity\n", name);
7069: } else {
1.164 daniel 7070: /*
7071: * TODO !!!
7072: * handle the extra spaces added before and after
7073: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7074: */
1.98 daniel 7075: input = xmlNewEntityInputStream(ctxt, entity);
7076: xmlPushInput(ctxt, input);
1.164 daniel 7077: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7078: (RAW == '<') && (NXT(1) == '?') &&
7079: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7080: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7081: xmlParseTextDecl(ctxt);
1.193 daniel 7082: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7083: /*
7084: * The XML REC instructs us to stop parsing
7085: * right here
7086: */
7087: ctxt->instate = XML_PARSER_EOF;
7088: xmlFree(name);
7089: return;
7090: }
1.164 daniel 7091: }
7092: if (ctxt->token == 0)
7093: ctxt->token = ' ';
1.98 daniel 7094: }
1.45 daniel 7095: }
1.98 daniel 7096: ctxt->hasPErefs = 1;
1.22 daniel 7097: } else {
1.55 daniel 7098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7099: ctxt->sax->error(ctxt->userData,
1.59 daniel 7100: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7101: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7102: ctxt->wellFormed = 0;
1.180 daniel 7103: ctxt->disableSAX = 1;
1.22 daniel 7104: }
1.119 daniel 7105: xmlFree(name);
1.3 veillard 7106: }
7107: }
7108: }
7109:
1.50 daniel 7110: /**
1.135 daniel 7111: * xmlParseStringPEReference:
7112: * @ctxt: an XML parser context
7113: * @str: a pointer to an index in the string
7114: *
7115: * parse PEReference declarations
7116: *
7117: * [69] PEReference ::= '%' Name ';'
7118: *
7119: * [ WFC: No Recursion ]
7120: * TODO A parsed entity must not contain a recursive
7121: * reference to itself, either directly or indirectly.
7122: *
7123: * [ WFC: Entity Declared ]
7124: * In a document without any DTD, a document with only an internal DTD
7125: * subset which contains no parameter entity references, or a document
7126: * with "standalone='yes'", ... ... The declaration of a parameter
7127: * entity must precede any reference to it...
7128: *
7129: * [ VC: Entity Declared ]
7130: * In a document with an external subset or external parameter entities
7131: * with "standalone='no'", ... ... The declaration of a parameter entity
7132: * must precede any reference to it...
7133: *
7134: * [ WFC: In DTD ]
7135: * Parameter-entity references may only appear in the DTD.
7136: * NOTE: misleading but this is handled.
7137: *
7138: * Returns the string of the entity content.
7139: * str is updated to the current value of the index
7140: */
7141: xmlEntityPtr
7142: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7143: const xmlChar *ptr;
7144: xmlChar cur;
7145: xmlChar *name;
7146: xmlEntityPtr entity = NULL;
7147:
7148: if ((str == NULL) || (*str == NULL)) return(NULL);
7149: ptr = *str;
7150: cur = *ptr;
7151: if (cur == '%') {
7152: ptr++;
7153: cur = *ptr;
7154: name = xmlParseStringName(ctxt, &ptr);
7155: if (name == NULL) {
7156: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7157: ctxt->sax->error(ctxt->userData,
7158: "xmlParseStringPEReference: no name\n");
7159: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7160: ctxt->wellFormed = 0;
1.180 daniel 7161: ctxt->disableSAX = 1;
1.135 daniel 7162: } else {
7163: cur = *ptr;
7164: if (cur == ';') {
7165: ptr++;
7166: cur = *ptr;
7167: if ((ctxt->sax != NULL) &&
7168: (ctxt->sax->getParameterEntity != NULL))
7169: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7170: name);
7171: if (entity == NULL) {
7172: /*
7173: * [ WFC: Entity Declared ]
7174: * In a document without any DTD, a document with only an
7175: * internal DTD subset which contains no parameter entity
7176: * references, or a document with "standalone='yes'", ...
7177: * ... The declaration of a parameter entity must precede
7178: * any reference to it...
7179: */
7180: if ((ctxt->standalone == 1) ||
7181: ((ctxt->hasExternalSubset == 0) &&
7182: (ctxt->hasPErefs == 0))) {
7183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7184: ctxt->sax->error(ctxt->userData,
7185: "PEReference: %%%s; not found\n", name);
7186: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7187: ctxt->wellFormed = 0;
1.180 daniel 7188: ctxt->disableSAX = 1;
1.135 daniel 7189: } else {
7190: /*
7191: * [ VC: Entity Declared ]
7192: * In a document with an external subset or external
7193: * parameter entities with "standalone='no'", ...
7194: * ... The declaration of a parameter entity must
7195: * precede any reference to it...
7196: */
7197: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7198: ctxt->sax->warning(ctxt->userData,
7199: "PEReference: %%%s; not found\n", name);
7200: ctxt->valid = 0;
7201: }
7202: } else {
7203: /*
7204: * Internal checking in case the entity quest barfed
7205: */
1.159 daniel 7206: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7207: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7208: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7209: ctxt->sax->warning(ctxt->userData,
7210: "Internal: %%%s; is not a parameter entity\n", name);
7211: }
7212: }
7213: ctxt->hasPErefs = 1;
7214: } else {
7215: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7216: ctxt->sax->error(ctxt->userData,
7217: "xmlParseStringPEReference: expecting ';'\n");
7218: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7219: ctxt->wellFormed = 0;
1.180 daniel 7220: ctxt->disableSAX = 1;
1.135 daniel 7221: }
7222: xmlFree(name);
7223: }
7224: }
7225: *str = ptr;
7226: return(entity);
7227: }
7228:
7229: /**
1.181 daniel 7230: * xmlParseDocTypeDecl:
1.50 daniel 7231: * @ctxt: an XML parser context
7232: *
7233: * parse a DOCTYPE declaration
1.21 daniel 7234: *
1.22 daniel 7235: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7236: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7237: *
7238: * [ VC: Root Element Type ]
1.99 daniel 7239: * The Name in the document type declaration must match the element
1.98 daniel 7240: * type of the root element.
1.21 daniel 7241: */
7242:
1.55 daniel 7243: void
7244: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7245: xmlChar *name = NULL;
1.123 daniel 7246: xmlChar *ExternalID = NULL;
7247: xmlChar *URI = NULL;
1.21 daniel 7248:
7249: /*
7250: * We know that '<!DOCTYPE' has been detected.
7251: */
1.40 daniel 7252: SKIP(9);
1.21 daniel 7253:
1.42 daniel 7254: SKIP_BLANKS;
1.21 daniel 7255:
7256: /*
7257: * Parse the DOCTYPE name.
7258: */
7259: name = xmlParseName(ctxt);
7260: if (name == NULL) {
1.55 daniel 7261: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7262: ctxt->sax->error(ctxt->userData,
7263: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7264: ctxt->wellFormed = 0;
1.180 daniel 7265: ctxt->disableSAX = 1;
1.123 daniel 7266: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7267: }
1.165 daniel 7268: ctxt->intSubName = name;
1.21 daniel 7269:
1.42 daniel 7270: SKIP_BLANKS;
1.21 daniel 7271:
7272: /*
1.22 daniel 7273: * Check for SystemID and ExternalID
7274: */
1.67 daniel 7275: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7276:
7277: if ((URI != NULL) || (ExternalID != NULL)) {
7278: ctxt->hasExternalSubset = 1;
7279: }
1.165 daniel 7280: ctxt->extSubURI = URI;
7281: ctxt->extSubSystem = ExternalID;
1.98 daniel 7282:
1.42 daniel 7283: SKIP_BLANKS;
1.36 daniel 7284:
1.76 daniel 7285: /*
1.165 daniel 7286: * Create and update the internal subset.
1.76 daniel 7287: */
1.171 daniel 7288: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7289: (!ctxt->disableSAX))
1.74 daniel 7290: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7291:
7292: /*
1.140 daniel 7293: * Is there any internal subset declarations ?
7294: * they are handled separately in xmlParseInternalSubset()
7295: */
1.152 daniel 7296: if (RAW == '[')
1.140 daniel 7297: return;
7298:
7299: /*
7300: * We should be at the end of the DOCTYPE declaration.
7301: */
1.152 daniel 7302: if (RAW != '>') {
1.140 daniel 7303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7304: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7305: ctxt->wellFormed = 0;
1.180 daniel 7306: ctxt->disableSAX = 1;
1.140 daniel 7307: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7308: }
7309: NEXT;
7310: }
7311:
7312: /**
1.181 daniel 7313: * xmlParseInternalsubset:
1.140 daniel 7314: * @ctxt: an XML parser context
7315: *
7316: * parse the internal subset declaration
7317: *
7318: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7319: */
7320:
7321: void
7322: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7323: /*
1.22 daniel 7324: * Is there any DTD definition ?
7325: */
1.152 daniel 7326: if (RAW == '[') {
1.96 daniel 7327: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7328: NEXT;
1.22 daniel 7329: /*
7330: * Parse the succession of Markup declarations and
7331: * PEReferences.
7332: * Subsequence (markupdecl | PEReference | S)*
7333: */
1.152 daniel 7334: while (RAW != ']') {
1.123 daniel 7335: const xmlChar *check = CUR_PTR;
1.115 daniel 7336: int cons = ctxt->input->consumed;
1.22 daniel 7337:
1.42 daniel 7338: SKIP_BLANKS;
1.22 daniel 7339: xmlParseMarkupDecl(ctxt);
1.50 daniel 7340: xmlParsePEReference(ctxt);
1.22 daniel 7341:
1.115 daniel 7342: /*
7343: * Pop-up of finished entities.
7344: */
1.152 daniel 7345: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7346: xmlPopInput(ctxt);
7347:
1.118 daniel 7348: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7350: ctxt->sax->error(ctxt->userData,
1.140 daniel 7351: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7352: ctxt->wellFormed = 0;
1.180 daniel 7353: ctxt->disableSAX = 1;
1.123 daniel 7354: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7355: break;
7356: }
7357: }
1.209 veillard 7358: if (RAW == ']') {
7359: NEXT;
7360: SKIP_BLANKS;
7361: }
1.22 daniel 7362: }
7363:
7364: /*
7365: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7366: */
1.152 daniel 7367: if (RAW != '>') {
1.55 daniel 7368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7369: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7370: ctxt->wellFormed = 0;
1.180 daniel 7371: ctxt->disableSAX = 1;
1.123 daniel 7372: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7373: }
1.40 daniel 7374: NEXT;
1.21 daniel 7375: }
7376:
1.50 daniel 7377: /**
7378: * xmlParseAttribute:
7379: * @ctxt: an XML parser context
1.123 daniel 7380: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7381: *
7382: * parse an attribute
1.3 veillard 7383: *
1.22 daniel 7384: * [41] Attribute ::= Name Eq AttValue
7385: *
1.98 daniel 7386: * [ WFC: No External Entity References ]
7387: * Attribute values cannot contain direct or indirect entity references
7388: * to external entities.
7389: *
7390: * [ WFC: No < in Attribute Values ]
7391: * The replacement text of any entity referred to directly or indirectly in
7392: * an attribute value (other than "<") must not contain a <.
7393: *
7394: * [ VC: Attribute Value Type ]
1.117 daniel 7395: * The attribute must have been declared; the value must be of the type
1.99 daniel 7396: * declared for it.
1.98 daniel 7397: *
1.22 daniel 7398: * [25] Eq ::= S? '=' S?
7399: *
1.29 daniel 7400: * With namespace:
7401: *
7402: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7403: *
7404: * Also the case QName == xmlns:??? is handled independently as a namespace
7405: * definition.
1.69 daniel 7406: *
1.72 daniel 7407: * Returns the attribute name, and the value in *value.
1.3 veillard 7408: */
7409:
1.123 daniel 7410: xmlChar *
7411: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7412: xmlChar *name, *val;
1.3 veillard 7413:
1.72 daniel 7414: *value = NULL;
7415: name = xmlParseName(ctxt);
1.22 daniel 7416: if (name == NULL) {
1.55 daniel 7417: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7418: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7419: ctxt->wellFormed = 0;
1.180 daniel 7420: ctxt->disableSAX = 1;
1.123 daniel 7421: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7422: return(NULL);
1.3 veillard 7423: }
7424:
7425: /*
1.29 daniel 7426: * read the value
1.3 veillard 7427: */
1.42 daniel 7428: SKIP_BLANKS;
1.152 daniel 7429: if (RAW == '=') {
1.40 daniel 7430: NEXT;
1.42 daniel 7431: SKIP_BLANKS;
1.72 daniel 7432: val = xmlParseAttValue(ctxt);
1.96 daniel 7433: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7434: } else {
1.55 daniel 7435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7436: ctxt->sax->error(ctxt->userData,
1.59 daniel 7437: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7438: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7439: ctxt->wellFormed = 0;
1.180 daniel 7440: ctxt->disableSAX = 1;
1.170 daniel 7441: xmlFree(name);
1.52 daniel 7442: return(NULL);
1.43 daniel 7443: }
7444:
1.172 daniel 7445: /*
7446: * Check that xml:lang conforms to the specification
7447: */
7448: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7449: if (!xmlCheckLanguageID(val)) {
7450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7451: ctxt->sax->error(ctxt->userData,
7452: "Invalid value for xml:lang : %s\n", val);
7453: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7454: ctxt->wellFormed = 0;
1.180 daniel 7455: ctxt->disableSAX = 1;
1.172 daniel 7456: }
7457: }
7458:
1.176 daniel 7459: /*
7460: * Check that xml:space conforms to the specification
7461: */
7462: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7463: if (!xmlStrcmp(val, BAD_CAST "default"))
7464: *(ctxt->space) = 0;
7465: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7466: *(ctxt->space) = 1;
7467: else {
7468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7469: ctxt->sax->error(ctxt->userData,
7470: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7471: val);
7472: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7473: ctxt->wellFormed = 0;
1.180 daniel 7474: ctxt->disableSAX = 1;
1.176 daniel 7475: }
7476: }
7477:
1.72 daniel 7478: *value = val;
7479: return(name);
1.3 veillard 7480: }
7481:
1.50 daniel 7482: /**
7483: * xmlParseStartTag:
7484: * @ctxt: an XML parser context
7485: *
7486: * parse a start of tag either for rule element or
7487: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7488: *
7489: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7490: *
1.98 daniel 7491: * [ WFC: Unique Att Spec ]
7492: * No attribute name may appear more than once in the same start-tag or
7493: * empty-element tag.
7494: *
1.29 daniel 7495: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7496: *
1.98 daniel 7497: * [ WFC: Unique Att Spec ]
7498: * No attribute name may appear more than once in the same start-tag or
7499: * empty-element tag.
7500: *
1.29 daniel 7501: * With namespace:
7502: *
7503: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7504: *
7505: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7506: *
1.192 daniel 7507: * Returns the element name parsed
1.2 veillard 7508: */
7509:
1.123 daniel 7510: xmlChar *
1.69 daniel 7511: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7512: xmlChar *name;
7513: xmlChar *attname;
7514: xmlChar *attvalue;
7515: const xmlChar **atts = NULL;
1.72 daniel 7516: int nbatts = 0;
7517: int maxatts = 0;
7518: int i;
1.2 veillard 7519:
1.152 daniel 7520: if (RAW != '<') return(NULL);
1.40 daniel 7521: NEXT;
1.3 veillard 7522:
1.72 daniel 7523: name = xmlParseName(ctxt);
1.59 daniel 7524: if (name == NULL) {
7525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7526: ctxt->sax->error(ctxt->userData,
1.59 daniel 7527: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7528: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7529: ctxt->wellFormed = 0;
1.180 daniel 7530: ctxt->disableSAX = 1;
1.83 daniel 7531: return(NULL);
1.50 daniel 7532: }
7533:
7534: /*
1.3 veillard 7535: * Now parse the attributes, it ends up with the ending
7536: *
7537: * (S Attribute)* S?
7538: */
1.42 daniel 7539: SKIP_BLANKS;
1.91 daniel 7540: GROW;
1.168 daniel 7541:
1.153 daniel 7542: while ((IS_CHAR(RAW)) &&
1.152 daniel 7543: (RAW != '>') &&
7544: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7545: const xmlChar *q = CUR_PTR;
1.91 daniel 7546: int cons = ctxt->input->consumed;
1.29 daniel 7547:
1.72 daniel 7548: attname = xmlParseAttribute(ctxt, &attvalue);
7549: if ((attname != NULL) && (attvalue != NULL)) {
7550: /*
1.98 daniel 7551: * [ WFC: Unique Att Spec ]
7552: * No attribute name may appear more than once in the same
7553: * start-tag or empty-element tag.
1.72 daniel 7554: */
7555: for (i = 0; i < nbatts;i += 2) {
7556: if (!xmlStrcmp(atts[i], attname)) {
7557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7558: ctxt->sax->error(ctxt->userData,
7559: "Attribute %s redefined\n",
7560: attname);
1.72 daniel 7561: ctxt->wellFormed = 0;
1.180 daniel 7562: ctxt->disableSAX = 1;
1.123 daniel 7563: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7564: xmlFree(attname);
7565: xmlFree(attvalue);
1.98 daniel 7566: goto failed;
1.72 daniel 7567: }
7568: }
7569:
7570: /*
7571: * Add the pair to atts
7572: */
7573: if (atts == NULL) {
7574: maxatts = 10;
1.123 daniel 7575: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7576: if (atts == NULL) {
1.86 daniel 7577: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7578: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7579: return(NULL);
1.72 daniel 7580: }
1.127 daniel 7581: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7582: maxatts *= 2;
1.123 daniel 7583: atts = (const xmlChar **) xmlRealloc(atts,
7584: maxatts * sizeof(xmlChar *));
1.72 daniel 7585: if (atts == NULL) {
1.86 daniel 7586: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7587: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7588: return(NULL);
1.72 daniel 7589: }
7590: }
7591: atts[nbatts++] = attname;
7592: atts[nbatts++] = attvalue;
7593: atts[nbatts] = NULL;
7594: atts[nbatts + 1] = NULL;
1.176 daniel 7595: } else {
7596: if (attname != NULL)
7597: xmlFree(attname);
7598: if (attvalue != NULL)
7599: xmlFree(attvalue);
1.72 daniel 7600: }
7601:
1.116 daniel 7602: failed:
1.168 daniel 7603:
7604: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7605: break;
7606: if (!IS_BLANK(RAW)) {
7607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7608: ctxt->sax->error(ctxt->userData,
7609: "attributes construct error\n");
7610: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7611: ctxt->wellFormed = 0;
1.180 daniel 7612: ctxt->disableSAX = 1;
1.168 daniel 7613: }
1.42 daniel 7614: SKIP_BLANKS;
1.91 daniel 7615: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7616: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7617: ctxt->sax->error(ctxt->userData,
1.31 daniel 7618: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7619: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7620: ctxt->wellFormed = 0;
1.180 daniel 7621: ctxt->disableSAX = 1;
1.29 daniel 7622: break;
1.3 veillard 7623: }
1.91 daniel 7624: GROW;
1.3 veillard 7625: }
7626:
1.43 daniel 7627: /*
1.72 daniel 7628: * SAX: Start of Element !
1.43 daniel 7629: */
1.171 daniel 7630: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7631: (!ctxt->disableSAX))
1.74 daniel 7632: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7633:
1.72 daniel 7634: if (atts != NULL) {
1.123 daniel 7635: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7636: xmlFree(atts);
1.72 daniel 7637: }
1.83 daniel 7638: return(name);
1.3 veillard 7639: }
7640:
1.50 daniel 7641: /**
7642: * xmlParseEndTag:
7643: * @ctxt: an XML parser context
7644: *
7645: * parse an end of tag
1.27 daniel 7646: *
7647: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7648: *
7649: * With namespace
7650: *
1.72 daniel 7651: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7652: */
7653:
1.55 daniel 7654: void
1.140 daniel 7655: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7656: xmlChar *name;
1.140 daniel 7657: xmlChar *oldname;
1.7 veillard 7658:
1.91 daniel 7659: GROW;
1.152 daniel 7660: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7662: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7663: ctxt->wellFormed = 0;
1.180 daniel 7664: ctxt->disableSAX = 1;
1.123 daniel 7665: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7666: return;
7667: }
1.40 daniel 7668: SKIP(2);
1.7 veillard 7669:
1.72 daniel 7670: name = xmlParseName(ctxt);
1.7 veillard 7671:
7672: /*
7673: * We should definitely be at the ending "S? '>'" part
7674: */
1.91 daniel 7675: GROW;
1.42 daniel 7676: SKIP_BLANKS;
1.153 daniel 7677: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7679: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7680: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7681: ctxt->wellFormed = 0;
1.180 daniel 7682: ctxt->disableSAX = 1;
1.7 veillard 7683: } else
1.40 daniel 7684: NEXT;
1.7 veillard 7685:
1.72 daniel 7686: /*
1.98 daniel 7687: * [ WFC: Element Type Match ]
7688: * The Name in an element's end-tag must match the element type in the
7689: * start-tag.
7690: *
1.83 daniel 7691: */
1.147 daniel 7692: if ((name == NULL) || (ctxt->name == NULL) ||
7693: (xmlStrcmp(name, ctxt->name))) {
7694: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7695: if ((name != NULL) && (ctxt->name != NULL)) {
7696: ctxt->sax->error(ctxt->userData,
7697: "Opening and ending tag mismatch: %s and %s\n",
7698: ctxt->name, name);
7699: } else if (ctxt->name != NULL) {
7700: ctxt->sax->error(ctxt->userData,
7701: "Ending tag eror for: %s\n", ctxt->name);
7702: } else {
7703: ctxt->sax->error(ctxt->userData,
7704: "Ending tag error: internal error ???\n");
7705: }
1.122 daniel 7706:
1.147 daniel 7707: }
1.123 daniel 7708: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7709: ctxt->wellFormed = 0;
1.180 daniel 7710: ctxt->disableSAX = 1;
1.83 daniel 7711: }
7712:
7713: /*
1.72 daniel 7714: * SAX: End of Tag
7715: */
1.171 daniel 7716: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7717: (!ctxt->disableSAX))
1.74 daniel 7718: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7719:
7720: if (name != NULL)
1.119 daniel 7721: xmlFree(name);
1.140 daniel 7722: oldname = namePop(ctxt);
1.176 daniel 7723: spacePop(ctxt);
1.140 daniel 7724: if (oldname != NULL) {
7725: #ifdef DEBUG_STACK
7726: fprintf(stderr,"Close: popped %s\n", oldname);
7727: #endif
7728: xmlFree(oldname);
7729: }
1.7 veillard 7730: return;
7731: }
7732:
1.50 daniel 7733: /**
7734: * xmlParseCDSect:
7735: * @ctxt: an XML parser context
7736: *
7737: * Parse escaped pure raw content.
1.29 daniel 7738: *
7739: * [18] CDSect ::= CDStart CData CDEnd
7740: *
7741: * [19] CDStart ::= '<![CDATA['
7742: *
7743: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7744: *
7745: * [21] CDEnd ::= ']]>'
1.3 veillard 7746: */
1.55 daniel 7747: void
7748: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7749: xmlChar *buf = NULL;
7750: int len = 0;
1.140 daniel 7751: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7752: int r, rl;
7753: int s, sl;
7754: int cur, l;
1.3 veillard 7755:
1.106 daniel 7756: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7757: (NXT(2) == '[') && (NXT(3) == 'C') &&
7758: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7759: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7760: (NXT(8) == '[')) {
7761: SKIP(9);
1.29 daniel 7762: } else
1.45 daniel 7763: return;
1.109 daniel 7764:
7765: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7766: r = CUR_CHAR(rl);
7767: if (!IS_CHAR(r)) {
1.55 daniel 7768: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7769: ctxt->sax->error(ctxt->userData,
1.135 daniel 7770: "CData section not finished\n");
1.59 daniel 7771: ctxt->wellFormed = 0;
1.180 daniel 7772: ctxt->disableSAX = 1;
1.123 daniel 7773: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7774: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7775: return;
1.3 veillard 7776: }
1.152 daniel 7777: NEXTL(rl);
7778: s = CUR_CHAR(sl);
7779: if (!IS_CHAR(s)) {
1.55 daniel 7780: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7781: ctxt->sax->error(ctxt->userData,
1.135 daniel 7782: "CData section not finished\n");
1.123 daniel 7783: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7784: ctxt->wellFormed = 0;
1.180 daniel 7785: ctxt->disableSAX = 1;
1.109 daniel 7786: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7787: return;
1.3 veillard 7788: }
1.152 daniel 7789: NEXTL(sl);
7790: cur = CUR_CHAR(l);
1.135 daniel 7791: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7792: if (buf == NULL) {
7793: fprintf(stderr, "malloc of %d byte failed\n", size);
7794: return;
7795: }
1.108 veillard 7796: while (IS_CHAR(cur) &&
1.110 daniel 7797: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7798: if (len + 5 >= size) {
1.135 daniel 7799: size *= 2;
1.204 veillard 7800: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 7801: if (buf == NULL) {
7802: fprintf(stderr, "realloc of %d byte failed\n", size);
7803: return;
7804: }
7805: }
1.152 daniel 7806: COPY_BUF(rl,buf,len,r);
1.110 daniel 7807: r = s;
1.152 daniel 7808: rl = sl;
1.110 daniel 7809: s = cur;
1.152 daniel 7810: sl = l;
7811: NEXTL(l);
7812: cur = CUR_CHAR(l);
1.3 veillard 7813: }
1.135 daniel 7814: buf[len] = 0;
1.109 daniel 7815: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7816: if (cur != '>') {
1.55 daniel 7817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7818: ctxt->sax->error(ctxt->userData,
1.135 daniel 7819: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7820: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7821: ctxt->wellFormed = 0;
1.180 daniel 7822: ctxt->disableSAX = 1;
1.135 daniel 7823: xmlFree(buf);
1.45 daniel 7824: return;
1.3 veillard 7825: }
1.152 daniel 7826: NEXTL(l);
1.16 daniel 7827:
1.45 daniel 7828: /*
1.135 daniel 7829: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7830: */
1.171 daniel 7831: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7832: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7833: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7834: }
1.135 daniel 7835: xmlFree(buf);
1.2 veillard 7836: }
7837:
1.50 daniel 7838: /**
7839: * xmlParseContent:
7840: * @ctxt: an XML parser context
7841: *
7842: * Parse a content:
1.2 veillard 7843: *
1.27 daniel 7844: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7845: */
7846:
1.55 daniel 7847: void
7848: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7849: GROW;
1.176 daniel 7850: while (((RAW != 0) || (ctxt->token != 0)) &&
7851: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7852: const xmlChar *test = CUR_PTR;
1.91 daniel 7853: int cons = ctxt->input->consumed;
1.123 daniel 7854: xmlChar tok = ctxt->token;
1.27 daniel 7855:
7856: /*
1.152 daniel 7857: * Handle possible processed charrefs.
7858: */
7859: if (ctxt->token != 0) {
7860: xmlParseCharData(ctxt, 0);
7861: }
7862: /*
1.27 daniel 7863: * First case : a Processing Instruction.
7864: */
1.152 daniel 7865: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7866: xmlParsePI(ctxt);
7867: }
1.72 daniel 7868:
1.27 daniel 7869: /*
7870: * Second case : a CDSection
7871: */
1.152 daniel 7872: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7873: (NXT(2) == '[') && (NXT(3) == 'C') &&
7874: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7875: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7876: (NXT(8) == '[')) {
1.45 daniel 7877: xmlParseCDSect(ctxt);
1.27 daniel 7878: }
1.72 daniel 7879:
1.27 daniel 7880: /*
7881: * Third case : a comment
7882: */
1.152 daniel 7883: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7884: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7885: xmlParseComment(ctxt);
1.97 daniel 7886: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7887: }
1.72 daniel 7888:
1.27 daniel 7889: /*
7890: * Fourth case : a sub-element.
7891: */
1.152 daniel 7892: else if (RAW == '<') {
1.72 daniel 7893: xmlParseElement(ctxt);
1.45 daniel 7894: }
1.72 daniel 7895:
1.45 daniel 7896: /*
1.50 daniel 7897: * Fifth case : a reference. If if has not been resolved,
7898: * parsing returns it's Name, create the node
1.45 daniel 7899: */
1.97 daniel 7900:
1.152 daniel 7901: else if (RAW == '&') {
1.77 daniel 7902: xmlParseReference(ctxt);
1.27 daniel 7903: }
1.72 daniel 7904:
1.27 daniel 7905: /*
7906: * Last case, text. Note that References are handled directly.
7907: */
7908: else {
1.45 daniel 7909: xmlParseCharData(ctxt, 0);
1.3 veillard 7910: }
1.14 veillard 7911:
1.91 daniel 7912: GROW;
1.14 veillard 7913: /*
1.45 daniel 7914: * Pop-up of finished entities.
1.14 veillard 7915: */
1.152 daniel 7916: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7917: xmlPopInput(ctxt);
1.135 daniel 7918: SHRINK;
1.45 daniel 7919:
1.113 daniel 7920: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7921: (tok == ctxt->token)) {
1.55 daniel 7922: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7923: ctxt->sax->error(ctxt->userData,
1.59 daniel 7924: "detected an error in element content\n");
1.123 daniel 7925: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7926: ctxt->wellFormed = 0;
1.180 daniel 7927: ctxt->disableSAX = 1;
1.29 daniel 7928: break;
7929: }
1.3 veillard 7930: }
1.2 veillard 7931: }
7932:
1.50 daniel 7933: /**
7934: * xmlParseElement:
7935: * @ctxt: an XML parser context
7936: *
7937: * parse an XML element, this is highly recursive
1.26 daniel 7938: *
7939: * [39] element ::= EmptyElemTag | STag content ETag
7940: *
1.98 daniel 7941: * [ WFC: Element Type Match ]
7942: * The Name in an element's end-tag must match the element type in the
7943: * start-tag.
7944: *
7945: * [ VC: Element Valid ]
1.117 daniel 7946: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7947: * where the Name matches the element type and one of the following holds:
7948: * - The declaration matches EMPTY and the element has no content.
7949: * - The declaration matches children and the sequence of child elements
7950: * belongs to the language generated by the regular expression in the
7951: * content model, with optional white space (characters matching the
7952: * nonterminal S) between each pair of child elements.
7953: * - The declaration matches Mixed and the content consists of character
7954: * data and child elements whose types match names in the content model.
7955: * - The declaration matches ANY, and the types of any child elements have
7956: * been declared.
1.2 veillard 7957: */
1.26 daniel 7958:
1.72 daniel 7959: void
1.69 daniel 7960: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7961: const xmlChar *openTag = CUR_PTR;
7962: xmlChar *name;
1.140 daniel 7963: xmlChar *oldname;
1.32 daniel 7964: xmlParserNodeInfo node_info;
1.118 daniel 7965: xmlNodePtr ret;
1.2 veillard 7966:
1.32 daniel 7967: /* Capture start position */
1.118 daniel 7968: if (ctxt->record_info) {
7969: node_info.begin_pos = ctxt->input->consumed +
7970: (CUR_PTR - ctxt->input->base);
7971: node_info.begin_line = ctxt->input->line;
7972: }
1.32 daniel 7973:
1.176 daniel 7974: if (ctxt->spaceNr == 0)
7975: spacePush(ctxt, -1);
7976: else
7977: spacePush(ctxt, *ctxt->space);
7978:
1.83 daniel 7979: name = xmlParseStartTag(ctxt);
7980: if (name == NULL) {
1.176 daniel 7981: spacePop(ctxt);
1.83 daniel 7982: return;
7983: }
1.140 daniel 7984: namePush(ctxt, name);
1.118 daniel 7985: ret = ctxt->node;
1.2 veillard 7986:
7987: /*
1.99 daniel 7988: * [ VC: Root Element Type ]
7989: * The Name in the document type declaration must match the element
7990: * type of the root element.
7991: */
1.105 daniel 7992: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7993: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7994: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7995:
7996: /*
1.2 veillard 7997: * Check for an Empty Element.
7998: */
1.152 daniel 7999: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 8000: SKIP(2);
1.171 daniel 8001: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8002: (!ctxt->disableSAX))
1.83 daniel 8003: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 8004: oldname = namePop(ctxt);
1.176 daniel 8005: spacePop(ctxt);
1.140 daniel 8006: if (oldname != NULL) {
8007: #ifdef DEBUG_STACK
8008: fprintf(stderr,"Close: popped %s\n", oldname);
8009: #endif
8010: xmlFree(oldname);
1.211 ! veillard 8011: }
! 8012: if ( ret != NULL && ctxt->record_info ) {
! 8013: node_info.end_pos = ctxt->input->consumed +
! 8014: (CUR_PTR - ctxt->input->base);
! 8015: node_info.end_line = ctxt->input->line;
! 8016: node_info.node = ret;
! 8017: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 8018: }
1.72 daniel 8019: return;
1.2 veillard 8020: }
1.152 daniel 8021: if (RAW == '>') {
1.91 daniel 8022: NEXT;
8023: } else {
1.55 daniel 8024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8025: ctxt->sax->error(ctxt->userData,
8026: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 8027: openTag);
1.59 daniel 8028: ctxt->wellFormed = 0;
1.180 daniel 8029: ctxt->disableSAX = 1;
1.123 daniel 8030: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 8031:
8032: /*
8033: * end of parsing of this node.
8034: */
8035: nodePop(ctxt);
1.140 daniel 8036: oldname = namePop(ctxt);
1.176 daniel 8037: spacePop(ctxt);
1.140 daniel 8038: if (oldname != NULL) {
8039: #ifdef DEBUG_STACK
8040: fprintf(stderr,"Close: popped %s\n", oldname);
8041: #endif
8042: xmlFree(oldname);
8043: }
1.118 daniel 8044:
8045: /*
8046: * Capture end position and add node
8047: */
8048: if ( ret != NULL && ctxt->record_info ) {
8049: node_info.end_pos = ctxt->input->consumed +
8050: (CUR_PTR - ctxt->input->base);
8051: node_info.end_line = ctxt->input->line;
8052: node_info.node = ret;
8053: xmlParserAddNodeInfo(ctxt, &node_info);
8054: }
1.72 daniel 8055: return;
1.2 veillard 8056: }
8057:
8058: /*
8059: * Parse the content of the element:
8060: */
1.45 daniel 8061: xmlParseContent(ctxt);
1.153 daniel 8062: if (!IS_CHAR(RAW)) {
1.55 daniel 8063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8064: ctxt->sax->error(ctxt->userData,
1.57 daniel 8065: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 8066: ctxt->wellFormed = 0;
1.180 daniel 8067: ctxt->disableSAX = 1;
1.123 daniel 8068: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 8069:
8070: /*
8071: * end of parsing of this node.
8072: */
8073: nodePop(ctxt);
1.140 daniel 8074: oldname = namePop(ctxt);
1.176 daniel 8075: spacePop(ctxt);
1.140 daniel 8076: if (oldname != NULL) {
8077: #ifdef DEBUG_STACK
8078: fprintf(stderr,"Close: popped %s\n", oldname);
8079: #endif
8080: xmlFree(oldname);
8081: }
1.72 daniel 8082: return;
1.2 veillard 8083: }
8084:
8085: /*
1.27 daniel 8086: * parse the end of tag: '</' should be here.
1.2 veillard 8087: */
1.140 daniel 8088: xmlParseEndTag(ctxt);
1.118 daniel 8089:
8090: /*
8091: * Capture end position and add node
8092: */
8093: if ( ret != NULL && ctxt->record_info ) {
8094: node_info.end_pos = ctxt->input->consumed +
8095: (CUR_PTR - ctxt->input->base);
8096: node_info.end_line = ctxt->input->line;
8097: node_info.node = ret;
8098: xmlParserAddNodeInfo(ctxt, &node_info);
8099: }
1.2 veillard 8100: }
8101:
1.50 daniel 8102: /**
8103: * xmlParseVersionNum:
8104: * @ctxt: an XML parser context
8105: *
8106: * parse the XML version value.
1.29 daniel 8107: *
8108: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8109: *
8110: * Returns the string giving the XML version number, or NULL
1.29 daniel 8111: */
1.123 daniel 8112: xmlChar *
1.55 daniel 8113: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8114: xmlChar *buf = NULL;
8115: int len = 0;
8116: int size = 10;
8117: xmlChar cur;
1.29 daniel 8118:
1.135 daniel 8119: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8120: if (buf == NULL) {
8121: fprintf(stderr, "malloc of %d byte failed\n", size);
8122: return(NULL);
8123: }
8124: cur = CUR;
1.152 daniel 8125: while (((cur >= 'a') && (cur <= 'z')) ||
8126: ((cur >= 'A') && (cur <= 'Z')) ||
8127: ((cur >= '0') && (cur <= '9')) ||
8128: (cur == '_') || (cur == '.') ||
8129: (cur == ':') || (cur == '-')) {
1.135 daniel 8130: if (len + 1 >= size) {
8131: size *= 2;
1.204 veillard 8132: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8133: if (buf == NULL) {
8134: fprintf(stderr, "realloc of %d byte failed\n", size);
8135: return(NULL);
8136: }
8137: }
8138: buf[len++] = cur;
8139: NEXT;
8140: cur=CUR;
8141: }
8142: buf[len] = 0;
8143: return(buf);
1.29 daniel 8144: }
8145:
1.50 daniel 8146: /**
8147: * xmlParseVersionInfo:
8148: * @ctxt: an XML parser context
8149: *
8150: * parse the XML version.
1.29 daniel 8151: *
8152: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8153: *
8154: * [25] Eq ::= S? '=' S?
1.50 daniel 8155: *
1.68 daniel 8156: * Returns the version string, e.g. "1.0"
1.29 daniel 8157: */
8158:
1.123 daniel 8159: xmlChar *
1.55 daniel 8160: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8161: xmlChar *version = NULL;
8162: const xmlChar *q;
1.29 daniel 8163:
1.152 daniel 8164: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8165: (NXT(2) == 'r') && (NXT(3) == 's') &&
8166: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8167: (NXT(6) == 'n')) {
8168: SKIP(7);
1.42 daniel 8169: SKIP_BLANKS;
1.152 daniel 8170: if (RAW != '=') {
1.55 daniel 8171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8172: ctxt->sax->error(ctxt->userData,
8173: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8174: ctxt->wellFormed = 0;
1.180 daniel 8175: ctxt->disableSAX = 1;
1.123 daniel 8176: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8177: return(NULL);
8178: }
1.40 daniel 8179: NEXT;
1.42 daniel 8180: SKIP_BLANKS;
1.152 daniel 8181: if (RAW == '"') {
1.40 daniel 8182: NEXT;
8183: q = CUR_PTR;
1.29 daniel 8184: version = xmlParseVersionNum(ctxt);
1.152 daniel 8185: if (RAW != '"') {
1.55 daniel 8186: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8187: ctxt->sax->error(ctxt->userData,
8188: "String not closed\n%.50s\n", q);
1.59 daniel 8189: ctxt->wellFormed = 0;
1.180 daniel 8190: ctxt->disableSAX = 1;
1.123 daniel 8191: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8192: } else
1.40 daniel 8193: NEXT;
1.152 daniel 8194: } else if (RAW == '\''){
1.40 daniel 8195: NEXT;
8196: q = CUR_PTR;
1.29 daniel 8197: version = xmlParseVersionNum(ctxt);
1.152 daniel 8198: if (RAW != '\'') {
1.55 daniel 8199: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8200: ctxt->sax->error(ctxt->userData,
8201: "String not closed\n%.50s\n", q);
1.123 daniel 8202: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8203: ctxt->wellFormed = 0;
1.180 daniel 8204: ctxt->disableSAX = 1;
1.55 daniel 8205: } else
1.40 daniel 8206: NEXT;
1.31 daniel 8207: } else {
1.55 daniel 8208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8209: ctxt->sax->error(ctxt->userData,
1.59 daniel 8210: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8211: ctxt->wellFormed = 0;
1.180 daniel 8212: ctxt->disableSAX = 1;
1.123 daniel 8213: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8214: }
8215: }
8216: return(version);
8217: }
8218:
1.50 daniel 8219: /**
8220: * xmlParseEncName:
8221: * @ctxt: an XML parser context
8222: *
8223: * parse the XML encoding name
1.29 daniel 8224: *
8225: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8226: *
1.68 daniel 8227: * Returns the encoding name value or NULL
1.29 daniel 8228: */
1.123 daniel 8229: xmlChar *
1.55 daniel 8230: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8231: xmlChar *buf = NULL;
8232: int len = 0;
8233: int size = 10;
8234: xmlChar cur;
1.29 daniel 8235:
1.135 daniel 8236: cur = CUR;
8237: if (((cur >= 'a') && (cur <= 'z')) ||
8238: ((cur >= 'A') && (cur <= 'Z'))) {
8239: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8240: if (buf == NULL) {
8241: fprintf(stderr, "malloc of %d byte failed\n", size);
8242: return(NULL);
8243: }
8244:
8245: buf[len++] = cur;
1.40 daniel 8246: NEXT;
1.135 daniel 8247: cur = CUR;
1.152 daniel 8248: while (((cur >= 'a') && (cur <= 'z')) ||
8249: ((cur >= 'A') && (cur <= 'Z')) ||
8250: ((cur >= '0') && (cur <= '9')) ||
8251: (cur == '.') || (cur == '_') ||
8252: (cur == '-')) {
1.135 daniel 8253: if (len + 1 >= size) {
8254: size *= 2;
1.204 veillard 8255: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8256: if (buf == NULL) {
8257: fprintf(stderr, "realloc of %d byte failed\n", size);
8258: return(NULL);
8259: }
8260: }
8261: buf[len++] = cur;
8262: NEXT;
8263: cur = CUR;
8264: if (cur == 0) {
8265: SHRINK;
8266: GROW;
8267: cur = CUR;
8268: }
8269: }
8270: buf[len] = 0;
1.29 daniel 8271: } else {
1.55 daniel 8272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8273: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8274: ctxt->wellFormed = 0;
1.180 daniel 8275: ctxt->disableSAX = 1;
1.123 daniel 8276: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8277: }
1.135 daniel 8278: return(buf);
1.29 daniel 8279: }
8280:
1.50 daniel 8281: /**
8282: * xmlParseEncodingDecl:
8283: * @ctxt: an XML parser context
8284: *
8285: * parse the XML encoding declaration
1.29 daniel 8286: *
8287: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8288: *
8289: * TODO: this should setup the conversion filters.
8290: *
1.68 daniel 8291: * Returns the encoding value or NULL
1.29 daniel 8292: */
8293:
1.123 daniel 8294: xmlChar *
1.55 daniel 8295: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8296: xmlChar *encoding = NULL;
8297: const xmlChar *q;
1.29 daniel 8298:
1.42 daniel 8299: SKIP_BLANKS;
1.152 daniel 8300: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8301: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8302: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8303: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8304: SKIP(8);
1.42 daniel 8305: SKIP_BLANKS;
1.152 daniel 8306: if (RAW != '=') {
1.55 daniel 8307: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8308: ctxt->sax->error(ctxt->userData,
8309: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8310: ctxt->wellFormed = 0;
1.180 daniel 8311: ctxt->disableSAX = 1;
1.123 daniel 8312: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8313: return(NULL);
8314: }
1.40 daniel 8315: NEXT;
1.42 daniel 8316: SKIP_BLANKS;
1.152 daniel 8317: if (RAW == '"') {
1.40 daniel 8318: NEXT;
8319: q = CUR_PTR;
1.29 daniel 8320: encoding = xmlParseEncName(ctxt);
1.152 daniel 8321: if (RAW != '"') {
1.55 daniel 8322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8323: ctxt->sax->error(ctxt->userData,
8324: "String not closed\n%.50s\n", q);
1.59 daniel 8325: ctxt->wellFormed = 0;
1.180 daniel 8326: ctxt->disableSAX = 1;
1.123 daniel 8327: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8328: } else
1.40 daniel 8329: NEXT;
1.152 daniel 8330: } else if (RAW == '\''){
1.40 daniel 8331: NEXT;
8332: q = CUR_PTR;
1.29 daniel 8333: encoding = xmlParseEncName(ctxt);
1.152 daniel 8334: if (RAW != '\'') {
1.55 daniel 8335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8336: ctxt->sax->error(ctxt->userData,
8337: "String not closed\n%.50s\n", q);
1.59 daniel 8338: ctxt->wellFormed = 0;
1.180 daniel 8339: ctxt->disableSAX = 1;
1.123 daniel 8340: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8341: } else
1.40 daniel 8342: NEXT;
1.152 daniel 8343: } else if (RAW == '"'){
1.55 daniel 8344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8345: ctxt->sax->error(ctxt->userData,
1.59 daniel 8346: "xmlParseEncodingDecl : expected ' or \"\n");
8347: ctxt->wellFormed = 0;
1.180 daniel 8348: ctxt->disableSAX = 1;
1.123 daniel 8349: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8350: }
1.193 daniel 8351: if (encoding != NULL) {
8352: xmlCharEncoding enc;
8353: xmlCharEncodingHandlerPtr handler;
8354:
1.195 daniel 8355: if (ctxt->input->encoding != NULL)
8356: xmlFree((xmlChar *) ctxt->input->encoding);
8357: ctxt->input->encoding = encoding;
8358:
1.193 daniel 8359: enc = xmlParseCharEncoding((const char *) encoding);
8360: /*
8361: * registered set of known encodings
8362: */
8363: if (enc != XML_CHAR_ENCODING_ERROR) {
8364: xmlSwitchEncoding(ctxt, enc);
8365: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8366: xmlFree(encoding);
8367: return(NULL);
8368: }
8369: } else {
8370: /*
8371: * fallback for unknown encodings
8372: */
8373: handler = xmlFindCharEncodingHandler((const char *) encoding);
8374: if (handler != NULL) {
8375: xmlSwitchToEncoding(ctxt, handler);
8376: } else {
8377: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 8378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8379: ctxt->sax->error(ctxt->userData,
8380: "Unsupported encoding %s\n", encoding);
1.193 daniel 8381: return(NULL);
8382: }
8383: }
8384: }
1.29 daniel 8385: }
8386: return(encoding);
8387: }
8388:
1.50 daniel 8389: /**
8390: * xmlParseSDDecl:
8391: * @ctxt: an XML parser context
8392: *
8393: * parse the XML standalone declaration
1.29 daniel 8394: *
8395: * [32] SDDecl ::= S 'standalone' Eq
8396: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8397: *
8398: * [ VC: Standalone Document Declaration ]
8399: * TODO The standalone document declaration must have the value "no"
8400: * if any external markup declarations contain declarations of:
8401: * - attributes with default values, if elements to which these
8402: * attributes apply appear in the document without specifications
8403: * of values for these attributes, or
8404: * - entities (other than amp, lt, gt, apos, quot), if references
8405: * to those entities appear in the document, or
8406: * - attributes with values subject to normalization, where the
8407: * attribute appears in the document with a value which will change
8408: * as a result of normalization, or
8409: * - element types with element content, if white space occurs directly
8410: * within any instance of those types.
1.68 daniel 8411: *
8412: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8413: */
8414:
1.55 daniel 8415: int
8416: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8417: int standalone = -1;
8418:
1.42 daniel 8419: SKIP_BLANKS;
1.152 daniel 8420: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8421: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8422: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8423: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8424: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8425: SKIP(10);
1.81 daniel 8426: SKIP_BLANKS;
1.152 daniel 8427: if (RAW != '=') {
1.55 daniel 8428: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8429: ctxt->sax->error(ctxt->userData,
1.59 daniel 8430: "XML standalone declaration : expected '='\n");
1.123 daniel 8431: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8432: ctxt->wellFormed = 0;
1.180 daniel 8433: ctxt->disableSAX = 1;
1.32 daniel 8434: return(standalone);
8435: }
1.40 daniel 8436: NEXT;
1.42 daniel 8437: SKIP_BLANKS;
1.152 daniel 8438: if (RAW == '\''){
1.40 daniel 8439: NEXT;
1.152 daniel 8440: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8441: standalone = 0;
1.40 daniel 8442: SKIP(2);
1.152 daniel 8443: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8444: (NXT(2) == 's')) {
1.29 daniel 8445: standalone = 1;
1.40 daniel 8446: SKIP(3);
1.29 daniel 8447: } else {
1.55 daniel 8448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8449: ctxt->sax->error(ctxt->userData,
8450: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8451: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8452: ctxt->wellFormed = 0;
1.180 daniel 8453: ctxt->disableSAX = 1;
1.29 daniel 8454: }
1.152 daniel 8455: if (RAW != '\'') {
1.55 daniel 8456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8457: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8458: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8459: ctxt->wellFormed = 0;
1.180 daniel 8460: ctxt->disableSAX = 1;
1.55 daniel 8461: } else
1.40 daniel 8462: NEXT;
1.152 daniel 8463: } else if (RAW == '"'){
1.40 daniel 8464: NEXT;
1.152 daniel 8465: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8466: standalone = 0;
1.40 daniel 8467: SKIP(2);
1.152 daniel 8468: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8469: (NXT(2) == 's')) {
1.29 daniel 8470: standalone = 1;
1.40 daniel 8471: SKIP(3);
1.29 daniel 8472: } else {
1.55 daniel 8473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8474: ctxt->sax->error(ctxt->userData,
1.59 daniel 8475: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8476: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8477: ctxt->wellFormed = 0;
1.180 daniel 8478: ctxt->disableSAX = 1;
1.29 daniel 8479: }
1.152 daniel 8480: if (RAW != '"') {
1.55 daniel 8481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8482: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8483: ctxt->wellFormed = 0;
1.180 daniel 8484: ctxt->disableSAX = 1;
1.123 daniel 8485: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8486: } else
1.40 daniel 8487: NEXT;
1.37 daniel 8488: } else {
1.55 daniel 8489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8490: ctxt->sax->error(ctxt->userData,
8491: "Standalone value not found\n");
1.59 daniel 8492: ctxt->wellFormed = 0;
1.180 daniel 8493: ctxt->disableSAX = 1;
1.123 daniel 8494: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8495: }
1.29 daniel 8496: }
8497: return(standalone);
8498: }
8499:
1.50 daniel 8500: /**
8501: * xmlParseXMLDecl:
8502: * @ctxt: an XML parser context
8503: *
8504: * parse an XML declaration header
1.29 daniel 8505: *
8506: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8507: */
8508:
1.55 daniel 8509: void
8510: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8511: xmlChar *version;
1.1 veillard 8512:
8513: /*
1.19 daniel 8514: * We know that '<?xml' is here.
1.1 veillard 8515: */
1.40 daniel 8516: SKIP(5);
1.1 veillard 8517:
1.153 daniel 8518: if (!IS_BLANK(RAW)) {
1.59 daniel 8519: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8520: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8521: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8522: ctxt->wellFormed = 0;
1.180 daniel 8523: ctxt->disableSAX = 1;
1.59 daniel 8524: }
1.42 daniel 8525: SKIP_BLANKS;
1.1 veillard 8526:
8527: /*
1.29 daniel 8528: * We should have the VersionInfo here.
1.1 veillard 8529: */
1.29 daniel 8530: version = xmlParseVersionInfo(ctxt);
8531: if (version == NULL)
1.45 daniel 8532: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8533: ctxt->version = xmlStrdup(version);
1.119 daniel 8534: xmlFree(version);
1.29 daniel 8535:
8536: /*
8537: * We may have the encoding declaration
8538: */
1.153 daniel 8539: if (!IS_BLANK(RAW)) {
1.152 daniel 8540: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8541: SKIP(2);
8542: return;
8543: }
8544: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8545: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8546: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8547: ctxt->wellFormed = 0;
1.180 daniel 8548: ctxt->disableSAX = 1;
1.59 daniel 8549: }
1.195 daniel 8550: xmlParseEncodingDecl(ctxt);
1.193 daniel 8551: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8552: /*
8553: * The XML REC instructs us to stop parsing right here
8554: */
8555: return;
8556: }
1.1 veillard 8557:
8558: /*
1.29 daniel 8559: * We may have the standalone status.
1.1 veillard 8560: */
1.164 daniel 8561: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8562: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8563: SKIP(2);
8564: return;
8565: }
8566: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8567: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8568: ctxt->wellFormed = 0;
1.180 daniel 8569: ctxt->disableSAX = 1;
1.123 daniel 8570: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8571: }
8572: SKIP_BLANKS;
1.167 daniel 8573: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8574:
1.42 daniel 8575: SKIP_BLANKS;
1.152 daniel 8576: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8577: SKIP(2);
1.152 daniel 8578: } else if (RAW == '>') {
1.31 daniel 8579: /* Deprecated old WD ... */
1.55 daniel 8580: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8581: ctxt->sax->error(ctxt->userData,
8582: "XML declaration must end-up with '?>'\n");
1.59 daniel 8583: ctxt->wellFormed = 0;
1.180 daniel 8584: ctxt->disableSAX = 1;
1.123 daniel 8585: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8586: NEXT;
1.29 daniel 8587: } else {
1.55 daniel 8588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8589: ctxt->sax->error(ctxt->userData,
8590: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8591: ctxt->wellFormed = 0;
1.180 daniel 8592: ctxt->disableSAX = 1;
1.123 daniel 8593: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8594: MOVETO_ENDTAG(CUR_PTR);
8595: NEXT;
1.29 daniel 8596: }
1.1 veillard 8597: }
8598:
1.50 daniel 8599: /**
8600: * xmlParseMisc:
8601: * @ctxt: an XML parser context
8602: *
8603: * parse an XML Misc* optionnal field.
1.21 daniel 8604: *
1.22 daniel 8605: * [27] Misc ::= Comment | PI | S
1.1 veillard 8606: */
8607:
1.55 daniel 8608: void
8609: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8610: while (((RAW == '<') && (NXT(1) == '?')) ||
8611: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8612: (NXT(2) == '-') && (NXT(3) == '-')) ||
8613: IS_BLANK(CUR)) {
1.152 daniel 8614: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8615: xmlParsePI(ctxt);
1.40 daniel 8616: } else if (IS_BLANK(CUR)) {
8617: NEXT;
1.1 veillard 8618: } else
1.114 daniel 8619: xmlParseComment(ctxt);
1.1 veillard 8620: }
8621: }
8622:
1.50 daniel 8623: /**
1.181 daniel 8624: * xmlParseDocument:
1.50 daniel 8625: * @ctxt: an XML parser context
8626: *
8627: * parse an XML document (and build a tree if using the standard SAX
8628: * interface).
1.21 daniel 8629: *
1.22 daniel 8630: * [1] document ::= prolog element Misc*
1.29 daniel 8631: *
8632: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8633: *
1.68 daniel 8634: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8635: * as a result of the parsing.
1.1 veillard 8636: */
8637:
1.55 daniel 8638: int
8639: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8640: xmlChar start[4];
8641: xmlCharEncoding enc;
8642:
1.45 daniel 8643: xmlDefaultSAXHandlerInit();
8644:
1.91 daniel 8645: GROW;
8646:
1.14 veillard 8647: /*
1.44 daniel 8648: * SAX: beginning of the document processing.
8649: */
1.72 daniel 8650: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8651: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8652:
1.156 daniel 8653: /*
8654: * Get the 4 first bytes and decode the charset
8655: * if enc != XML_CHAR_ENCODING_NONE
8656: * plug some encoding conversion routines.
8657: */
8658: start[0] = RAW;
8659: start[1] = NXT(1);
8660: start[2] = NXT(2);
8661: start[3] = NXT(3);
8662: enc = xmlDetectCharEncoding(start, 4);
8663: if (enc != XML_CHAR_ENCODING_NONE) {
8664: xmlSwitchEncoding(ctxt, enc);
8665: }
8666:
1.1 veillard 8667:
1.59 daniel 8668: if (CUR == 0) {
8669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8670: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8671: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8672: ctxt->wellFormed = 0;
1.180 daniel 8673: ctxt->disableSAX = 1;
1.59 daniel 8674: }
1.1 veillard 8675:
8676: /*
8677: * Check for the XMLDecl in the Prolog.
8678: */
1.91 daniel 8679: GROW;
1.152 daniel 8680: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8681: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8682: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 8683:
8684: /*
8685: * Note that we will switch encoding on the fly.
8686: */
1.19 daniel 8687: xmlParseXMLDecl(ctxt);
1.193 daniel 8688: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8689: /*
8690: * The XML REC instructs us to stop parsing right here
8691: */
8692: return(-1);
8693: }
1.167 daniel 8694: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8695: SKIP_BLANKS;
1.1 veillard 8696: } else {
1.72 daniel 8697: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8698: }
1.171 daniel 8699: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8700: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8701:
8702: /*
8703: * The Misc part of the Prolog
8704: */
1.91 daniel 8705: GROW;
1.16 daniel 8706: xmlParseMisc(ctxt);
1.1 veillard 8707:
8708: /*
1.29 daniel 8709: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8710: * (doctypedecl Misc*)?
8711: */
1.91 daniel 8712: GROW;
1.152 daniel 8713: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8714: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8715: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8716: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8717: (NXT(8) == 'E')) {
1.165 daniel 8718:
1.166 daniel 8719: ctxt->inSubset = 1;
1.22 daniel 8720: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8721: if (RAW == '[') {
1.140 daniel 8722: ctxt->instate = XML_PARSER_DTD;
8723: xmlParseInternalSubset(ctxt);
8724: }
1.165 daniel 8725:
8726: /*
8727: * Create and update the external subset.
8728: */
1.166 daniel 8729: ctxt->inSubset = 2;
1.171 daniel 8730: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8731: (!ctxt->disableSAX))
1.165 daniel 8732: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8733: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8734: ctxt->inSubset = 0;
1.165 daniel 8735:
8736:
1.96 daniel 8737: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8738: xmlParseMisc(ctxt);
1.21 daniel 8739: }
8740:
8741: /*
8742: * Time to start parsing the tree itself
1.1 veillard 8743: */
1.91 daniel 8744: GROW;
1.152 daniel 8745: if (RAW != '<') {
1.59 daniel 8746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8747: ctxt->sax->error(ctxt->userData,
1.151 daniel 8748: "Start tag expected, '<' not found\n");
1.140 daniel 8749: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8750: ctxt->wellFormed = 0;
1.180 daniel 8751: ctxt->disableSAX = 1;
1.140 daniel 8752: ctxt->instate = XML_PARSER_EOF;
8753: } else {
8754: ctxt->instate = XML_PARSER_CONTENT;
8755: xmlParseElement(ctxt);
8756: ctxt->instate = XML_PARSER_EPILOG;
8757:
8758:
8759: /*
8760: * The Misc part at the end
8761: */
8762: xmlParseMisc(ctxt);
8763:
1.152 daniel 8764: if (RAW != 0) {
1.140 daniel 8765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8766: ctxt->sax->error(ctxt->userData,
8767: "Extra content at the end of the document\n");
8768: ctxt->wellFormed = 0;
1.180 daniel 8769: ctxt->disableSAX = 1;
1.140 daniel 8770: ctxt->errNo = XML_ERR_DOCUMENT_END;
8771: }
8772: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8773: }
8774:
1.44 daniel 8775: /*
8776: * SAX: end of the document processing.
8777: */
1.171 daniel 8778: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8779: (!ctxt->disableSAX))
1.74 daniel 8780: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8781:
1.59 daniel 8782: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8783: return(0);
8784: }
8785:
1.98 daniel 8786: /************************************************************************
8787: * *
1.128 daniel 8788: * Progressive parsing interfaces *
8789: * *
8790: ************************************************************************/
8791:
8792: /**
8793: * xmlParseLookupSequence:
8794: * @ctxt: an XML parser context
8795: * @first: the first char to lookup
1.140 daniel 8796: * @next: the next char to lookup or zero
8797: * @third: the next char to lookup or zero
1.128 daniel 8798: *
1.140 daniel 8799: * Try to find if a sequence (first, next, third) or just (first next) or
8800: * (first) is available in the input stream.
8801: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8802: * to avoid rescanning sequences of bytes, it DOES change the state of the
8803: * parser, do not use liberally.
1.128 daniel 8804: *
1.140 daniel 8805: * Returns the index to the current parsing point if the full sequence
8806: * is available, -1 otherwise.
1.128 daniel 8807: */
8808: int
1.140 daniel 8809: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8810: xmlChar next, xmlChar third) {
8811: int base, len;
8812: xmlParserInputPtr in;
8813: const xmlChar *buf;
8814:
8815: in = ctxt->input;
8816: if (in == NULL) return(-1);
8817: base = in->cur - in->base;
8818: if (base < 0) return(-1);
8819: if (ctxt->checkIndex > base)
8820: base = ctxt->checkIndex;
8821: if (in->buf == NULL) {
8822: buf = in->base;
8823: len = in->length;
8824: } else {
8825: buf = in->buf->buffer->content;
8826: len = in->buf->buffer->use;
8827: }
8828: /* take into account the sequence length */
8829: if (third) len -= 2;
8830: else if (next) len --;
8831: for (;base < len;base++) {
8832: if (buf[base] == first) {
8833: if (third != 0) {
8834: if ((buf[base + 1] != next) ||
8835: (buf[base + 2] != third)) continue;
8836: } else if (next != 0) {
8837: if (buf[base + 1] != next) continue;
8838: }
8839: ctxt->checkIndex = 0;
8840: #ifdef DEBUG_PUSH
8841: if (next == 0)
8842: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8843: first, base);
8844: else if (third == 0)
8845: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8846: first, next, base);
8847: else
8848: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8849: first, next, third, base);
8850: #endif
8851: return(base - (in->cur - in->base));
8852: }
8853: }
8854: ctxt->checkIndex = base;
8855: #ifdef DEBUG_PUSH
8856: if (next == 0)
8857: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8858: else if (third == 0)
8859: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8860: else
8861: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8862: #endif
8863: return(-1);
1.128 daniel 8864: }
8865:
8866: /**
1.143 daniel 8867: * xmlParseTryOrFinish:
1.128 daniel 8868: * @ctxt: an XML parser context
1.143 daniel 8869: * @terminate: last chunk indicator
1.128 daniel 8870: *
8871: * Try to progress on parsing
8872: *
8873: * Returns zero if no parsing was possible
8874: */
8875: int
1.143 daniel 8876: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8877: int ret = 0;
1.140 daniel 8878: int avail;
8879: xmlChar cur, next;
8880:
8881: #ifdef DEBUG_PUSH
8882: switch (ctxt->instate) {
8883: case XML_PARSER_EOF:
8884: fprintf(stderr, "PP: try EOF\n"); break;
8885: case XML_PARSER_START:
8886: fprintf(stderr, "PP: try START\n"); break;
8887: case XML_PARSER_MISC:
8888: fprintf(stderr, "PP: try MISC\n");break;
8889: case XML_PARSER_COMMENT:
8890: fprintf(stderr, "PP: try COMMENT\n");break;
8891: case XML_PARSER_PROLOG:
8892: fprintf(stderr, "PP: try PROLOG\n");break;
8893: case XML_PARSER_START_TAG:
8894: fprintf(stderr, "PP: try START_TAG\n");break;
8895: case XML_PARSER_CONTENT:
8896: fprintf(stderr, "PP: try CONTENT\n");break;
8897: case XML_PARSER_CDATA_SECTION:
8898: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8899: case XML_PARSER_END_TAG:
8900: fprintf(stderr, "PP: try END_TAG\n");break;
8901: case XML_PARSER_ENTITY_DECL:
8902: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8903: case XML_PARSER_ENTITY_VALUE:
8904: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8905: case XML_PARSER_ATTRIBUTE_VALUE:
8906: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8907: case XML_PARSER_DTD:
8908: fprintf(stderr, "PP: try DTD\n");break;
8909: case XML_PARSER_EPILOG:
8910: fprintf(stderr, "PP: try EPILOG\n");break;
8911: case XML_PARSER_PI:
8912: fprintf(stderr, "PP: try PI\n");break;
8913: }
8914: #endif
1.128 daniel 8915:
8916: while (1) {
1.140 daniel 8917: /*
8918: * Pop-up of finished entities.
8919: */
1.152 daniel 8920: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8921: xmlPopInput(ctxt);
8922:
1.184 daniel 8923: if (ctxt->input ==NULL) break;
8924: if (ctxt->input->buf == NULL)
8925: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8926: else
1.184 daniel 8927: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8928: if (avail < 1)
8929: goto done;
1.128 daniel 8930: switch (ctxt->instate) {
8931: case XML_PARSER_EOF:
1.140 daniel 8932: /*
8933: * Document parsing is done !
8934: */
8935: goto done;
8936: case XML_PARSER_START:
8937: /*
8938: * Very first chars read from the document flow.
8939: */
1.184 daniel 8940: cur = ctxt->input->cur[0];
1.140 daniel 8941: if (IS_BLANK(cur)) {
8942: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8943: ctxt->sax->setDocumentLocator(ctxt->userData,
8944: &xmlDefaultSAXLocator);
8945: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8946: ctxt->sax->error(ctxt->userData,
8947: "Extra spaces at the beginning of the document are not allowed\n");
8948: ctxt->errNo = XML_ERR_DOCUMENT_START;
8949: ctxt->wellFormed = 0;
1.180 daniel 8950: ctxt->disableSAX = 1;
1.140 daniel 8951: SKIP_BLANKS;
8952: ret++;
1.184 daniel 8953: if (ctxt->input->buf == NULL)
8954: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8955: else
1.184 daniel 8956: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8957: }
8958: if (avail < 2)
8959: goto done;
8960:
1.184 daniel 8961: cur = ctxt->input->cur[0];
8962: next = ctxt->input->cur[1];
1.140 daniel 8963: if (cur == 0) {
8964: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8965: ctxt->sax->setDocumentLocator(ctxt->userData,
8966: &xmlDefaultSAXLocator);
8967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8968: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8969: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8970: ctxt->wellFormed = 0;
1.180 daniel 8971: ctxt->disableSAX = 1;
1.140 daniel 8972: ctxt->instate = XML_PARSER_EOF;
8973: #ifdef DEBUG_PUSH
8974: fprintf(stderr, "PP: entering EOF\n");
8975: #endif
8976: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8977: ctxt->sax->endDocument(ctxt->userData);
8978: goto done;
8979: }
8980: if ((cur == '<') && (next == '?')) {
8981: /* PI or XML decl */
8982: if (avail < 5) return(ret);
1.143 daniel 8983: if ((!terminate) &&
8984: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8985: return(ret);
8986: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8987: ctxt->sax->setDocumentLocator(ctxt->userData,
8988: &xmlDefaultSAXLocator);
1.184 daniel 8989: if ((ctxt->input->cur[2] == 'x') &&
8990: (ctxt->input->cur[3] == 'm') &&
8991: (ctxt->input->cur[4] == 'l') &&
8992: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8993: ret += 5;
8994: #ifdef DEBUG_PUSH
8995: fprintf(stderr, "PP: Parsing XML Decl\n");
8996: #endif
8997: xmlParseXMLDecl(ctxt);
1.193 daniel 8998: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8999: /*
9000: * The XML REC instructs us to stop parsing right
9001: * here
9002: */
9003: ctxt->instate = XML_PARSER_EOF;
9004: return(0);
9005: }
1.167 daniel 9006: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 9007: if ((ctxt->encoding == NULL) &&
9008: (ctxt->input->encoding != NULL))
9009: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 9010: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9011: (!ctxt->disableSAX))
1.140 daniel 9012: ctxt->sax->startDocument(ctxt->userData);
9013: ctxt->instate = XML_PARSER_MISC;
9014: #ifdef DEBUG_PUSH
9015: fprintf(stderr, "PP: entering MISC\n");
9016: #endif
9017: } else {
9018: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9019: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9020: (!ctxt->disableSAX))
1.140 daniel 9021: ctxt->sax->startDocument(ctxt->userData);
9022: ctxt->instate = XML_PARSER_MISC;
9023: #ifdef DEBUG_PUSH
9024: fprintf(stderr, "PP: entering MISC\n");
9025: #endif
9026: }
9027: } else {
9028: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9029: ctxt->sax->setDocumentLocator(ctxt->userData,
9030: &xmlDefaultSAXLocator);
9031: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9032: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9033: (!ctxt->disableSAX))
1.140 daniel 9034: ctxt->sax->startDocument(ctxt->userData);
9035: ctxt->instate = XML_PARSER_MISC;
9036: #ifdef DEBUG_PUSH
9037: fprintf(stderr, "PP: entering MISC\n");
9038: #endif
9039: }
9040: break;
9041: case XML_PARSER_MISC:
9042: SKIP_BLANKS;
1.184 daniel 9043: if (ctxt->input->buf == NULL)
9044: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9045: else
1.184 daniel 9046: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9047: if (avail < 2)
9048: goto done;
1.184 daniel 9049: cur = ctxt->input->cur[0];
9050: next = ctxt->input->cur[1];
1.140 daniel 9051: if ((cur == '<') && (next == '?')) {
1.143 daniel 9052: if ((!terminate) &&
9053: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9054: goto done;
9055: #ifdef DEBUG_PUSH
9056: fprintf(stderr, "PP: Parsing PI\n");
9057: #endif
9058: xmlParsePI(ctxt);
9059: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9060: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9061: if ((!terminate) &&
9062: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9063: goto done;
9064: #ifdef DEBUG_PUSH
9065: fprintf(stderr, "PP: Parsing Comment\n");
9066: #endif
9067: xmlParseComment(ctxt);
9068: ctxt->instate = XML_PARSER_MISC;
9069: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9070: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9071: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9072: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9073: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 9074: if ((!terminate) &&
9075: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9076: goto done;
9077: #ifdef DEBUG_PUSH
9078: fprintf(stderr, "PP: Parsing internal subset\n");
9079: #endif
1.166 daniel 9080: ctxt->inSubset = 1;
1.140 daniel 9081: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9082: if (RAW == '[') {
1.140 daniel 9083: ctxt->instate = XML_PARSER_DTD;
9084: #ifdef DEBUG_PUSH
9085: fprintf(stderr, "PP: entering DTD\n");
9086: #endif
9087: } else {
1.166 daniel 9088: /*
9089: * Create and update the external subset.
9090: */
9091: ctxt->inSubset = 2;
1.171 daniel 9092: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9093: (ctxt->sax->externalSubset != NULL))
9094: ctxt->sax->externalSubset(ctxt->userData,
9095: ctxt->intSubName, ctxt->extSubSystem,
9096: ctxt->extSubURI);
9097: ctxt->inSubset = 0;
1.140 daniel 9098: ctxt->instate = XML_PARSER_PROLOG;
9099: #ifdef DEBUG_PUSH
9100: fprintf(stderr, "PP: entering PROLOG\n");
9101: #endif
9102: }
9103: } else if ((cur == '<') && (next == '!') &&
9104: (avail < 9)) {
9105: goto done;
9106: } else {
9107: ctxt->instate = XML_PARSER_START_TAG;
9108: #ifdef DEBUG_PUSH
9109: fprintf(stderr, "PP: entering START_TAG\n");
9110: #endif
9111: }
9112: break;
1.128 daniel 9113: case XML_PARSER_PROLOG:
1.140 daniel 9114: SKIP_BLANKS;
1.184 daniel 9115: if (ctxt->input->buf == NULL)
9116: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9117: else
1.184 daniel 9118: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9119: if (avail < 2)
9120: goto done;
1.184 daniel 9121: cur = ctxt->input->cur[0];
9122: next = ctxt->input->cur[1];
1.140 daniel 9123: if ((cur == '<') && (next == '?')) {
1.143 daniel 9124: if ((!terminate) &&
9125: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9126: goto done;
9127: #ifdef DEBUG_PUSH
9128: fprintf(stderr, "PP: Parsing PI\n");
9129: #endif
9130: xmlParsePI(ctxt);
9131: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9132: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9133: if ((!terminate) &&
9134: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9135: goto done;
9136: #ifdef DEBUG_PUSH
9137: fprintf(stderr, "PP: Parsing Comment\n");
9138: #endif
9139: xmlParseComment(ctxt);
9140: ctxt->instate = XML_PARSER_PROLOG;
9141: } else if ((cur == '<') && (next == '!') &&
9142: (avail < 4)) {
9143: goto done;
9144: } else {
9145: ctxt->instate = XML_PARSER_START_TAG;
9146: #ifdef DEBUG_PUSH
9147: fprintf(stderr, "PP: entering START_TAG\n");
9148: #endif
9149: }
9150: break;
9151: case XML_PARSER_EPILOG:
9152: SKIP_BLANKS;
1.184 daniel 9153: if (ctxt->input->buf == NULL)
9154: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9155: else
1.184 daniel 9156: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9157: if (avail < 2)
9158: goto done;
1.184 daniel 9159: cur = ctxt->input->cur[0];
9160: next = ctxt->input->cur[1];
1.140 daniel 9161: if ((cur == '<') && (next == '?')) {
1.143 daniel 9162: if ((!terminate) &&
9163: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9164: goto done;
9165: #ifdef DEBUG_PUSH
9166: fprintf(stderr, "PP: Parsing PI\n");
9167: #endif
9168: xmlParsePI(ctxt);
9169: ctxt->instate = XML_PARSER_EPILOG;
9170: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9171: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9172: if ((!terminate) &&
9173: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9174: goto done;
9175: #ifdef DEBUG_PUSH
9176: fprintf(stderr, "PP: Parsing Comment\n");
9177: #endif
9178: xmlParseComment(ctxt);
9179: ctxt->instate = XML_PARSER_EPILOG;
9180: } else if ((cur == '<') && (next == '!') &&
9181: (avail < 4)) {
9182: goto done;
9183: } else {
9184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9185: ctxt->sax->error(ctxt->userData,
9186: "Extra content at the end of the document\n");
9187: ctxt->wellFormed = 0;
1.180 daniel 9188: ctxt->disableSAX = 1;
1.140 daniel 9189: ctxt->errNo = XML_ERR_DOCUMENT_END;
9190: ctxt->instate = XML_PARSER_EOF;
9191: #ifdef DEBUG_PUSH
9192: fprintf(stderr, "PP: entering EOF\n");
9193: #endif
1.171 daniel 9194: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9195: (!ctxt->disableSAX))
1.140 daniel 9196: ctxt->sax->endDocument(ctxt->userData);
9197: goto done;
9198: }
9199: break;
9200: case XML_PARSER_START_TAG: {
9201: xmlChar *name, *oldname;
9202:
1.184 daniel 9203: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9204: goto done;
1.184 daniel 9205: cur = ctxt->input->cur[0];
1.140 daniel 9206: if (cur != '<') {
9207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9208: ctxt->sax->error(ctxt->userData,
9209: "Start tag expect, '<' not found\n");
9210: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9211: ctxt->wellFormed = 0;
1.180 daniel 9212: ctxt->disableSAX = 1;
1.140 daniel 9213: ctxt->instate = XML_PARSER_EOF;
9214: #ifdef DEBUG_PUSH
9215: fprintf(stderr, "PP: entering EOF\n");
9216: #endif
1.171 daniel 9217: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9218: (!ctxt->disableSAX))
1.140 daniel 9219: ctxt->sax->endDocument(ctxt->userData);
9220: goto done;
9221: }
1.143 daniel 9222: if ((!terminate) &&
9223: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9224: goto done;
1.176 daniel 9225: if (ctxt->spaceNr == 0)
9226: spacePush(ctxt, -1);
9227: else
9228: spacePush(ctxt, *ctxt->space);
1.140 daniel 9229: name = xmlParseStartTag(ctxt);
9230: if (name == NULL) {
1.176 daniel 9231: spacePop(ctxt);
1.140 daniel 9232: ctxt->instate = XML_PARSER_EOF;
9233: #ifdef DEBUG_PUSH
9234: fprintf(stderr, "PP: entering EOF\n");
9235: #endif
1.171 daniel 9236: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9237: (!ctxt->disableSAX))
1.140 daniel 9238: ctxt->sax->endDocument(ctxt->userData);
9239: goto done;
9240: }
9241: namePush(ctxt, xmlStrdup(name));
9242:
9243: /*
9244: * [ VC: Root Element Type ]
9245: * The Name in the document type declaration must match
9246: * the element type of the root element.
9247: */
9248: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9249: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9250: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9251:
9252: /*
9253: * Check for an Empty Element.
9254: */
1.152 daniel 9255: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9256: SKIP(2);
1.171 daniel 9257: if ((ctxt->sax != NULL) &&
9258: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9259: ctxt->sax->endElement(ctxt->userData, name);
9260: xmlFree(name);
9261: oldname = namePop(ctxt);
1.176 daniel 9262: spacePop(ctxt);
1.140 daniel 9263: if (oldname != NULL) {
9264: #ifdef DEBUG_STACK
9265: fprintf(stderr,"Close: popped %s\n", oldname);
9266: #endif
9267: xmlFree(oldname);
9268: }
9269: if (ctxt->name == NULL) {
9270: ctxt->instate = XML_PARSER_EPILOG;
9271: #ifdef DEBUG_PUSH
9272: fprintf(stderr, "PP: entering EPILOG\n");
9273: #endif
9274: } else {
9275: ctxt->instate = XML_PARSER_CONTENT;
9276: #ifdef DEBUG_PUSH
9277: fprintf(stderr, "PP: entering CONTENT\n");
9278: #endif
9279: }
9280: break;
9281: }
1.152 daniel 9282: if (RAW == '>') {
1.140 daniel 9283: NEXT;
9284: } else {
9285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9286: ctxt->sax->error(ctxt->userData,
9287: "Couldn't find end of Start Tag %s\n",
9288: name);
9289: ctxt->wellFormed = 0;
1.180 daniel 9290: ctxt->disableSAX = 1;
1.140 daniel 9291: ctxt->errNo = XML_ERR_GT_REQUIRED;
9292:
9293: /*
9294: * end of parsing of this node.
9295: */
9296: nodePop(ctxt);
9297: oldname = namePop(ctxt);
1.176 daniel 9298: spacePop(ctxt);
1.140 daniel 9299: if (oldname != NULL) {
9300: #ifdef DEBUG_STACK
9301: fprintf(stderr,"Close: popped %s\n", oldname);
9302: #endif
9303: xmlFree(oldname);
9304: }
9305: }
9306: xmlFree(name);
9307: ctxt->instate = XML_PARSER_CONTENT;
9308: #ifdef DEBUG_PUSH
9309: fprintf(stderr, "PP: entering CONTENT\n");
9310: #endif
9311: break;
9312: }
1.128 daniel 9313: case XML_PARSER_CONTENT:
1.140 daniel 9314: /*
9315: * Handle preparsed entities and charRef
9316: */
9317: if (ctxt->token != 0) {
9318: xmlChar cur[2] = { 0 , 0 } ;
9319:
9320: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9321: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9322: (ctxt->sax->characters != NULL))
1.140 daniel 9323: ctxt->sax->characters(ctxt->userData, cur, 1);
9324: ctxt->token = 0;
9325: }
1.184 daniel 9326: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9327: goto done;
1.184 daniel 9328: cur = ctxt->input->cur[0];
9329: next = ctxt->input->cur[1];
1.140 daniel 9330: if ((cur == '<') && (next == '?')) {
1.143 daniel 9331: if ((!terminate) &&
9332: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9333: goto done;
9334: #ifdef DEBUG_PUSH
9335: fprintf(stderr, "PP: Parsing PI\n");
9336: #endif
9337: xmlParsePI(ctxt);
9338: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9339: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9340: if ((!terminate) &&
9341: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9342: goto done;
9343: #ifdef DEBUG_PUSH
9344: fprintf(stderr, "PP: Parsing Comment\n");
9345: #endif
9346: xmlParseComment(ctxt);
9347: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9348: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9349: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9350: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9351: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9352: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9353: SKIP(9);
9354: ctxt->instate = XML_PARSER_CDATA_SECTION;
9355: #ifdef DEBUG_PUSH
9356: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9357: #endif
9358: break;
9359: } else if ((cur == '<') && (next == '!') &&
9360: (avail < 9)) {
9361: goto done;
9362: } else if ((cur == '<') && (next == '/')) {
9363: ctxt->instate = XML_PARSER_END_TAG;
9364: #ifdef DEBUG_PUSH
9365: fprintf(stderr, "PP: entering END_TAG\n");
9366: #endif
9367: break;
9368: } else if (cur == '<') {
9369: ctxt->instate = XML_PARSER_START_TAG;
9370: #ifdef DEBUG_PUSH
9371: fprintf(stderr, "PP: entering START_TAG\n");
9372: #endif
9373: break;
9374: } else if (cur == '&') {
1.143 daniel 9375: if ((!terminate) &&
9376: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9377: goto done;
9378: #ifdef DEBUG_PUSH
9379: fprintf(stderr, "PP: Parsing Reference\n");
9380: #endif
9381: /* TODO: check generation of subtrees if noent !!! */
9382: xmlParseReference(ctxt);
9383: } else {
1.156 daniel 9384: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9385: /*
1.181 daniel 9386: * Goal of the following test is:
1.140 daniel 9387: * - minimize calls to the SAX 'character' callback
9388: * when they are mergeable
9389: * - handle an problem for isBlank when we only parse
9390: * a sequence of blank chars and the next one is
9391: * not available to check against '<' presence.
9392: * - tries to homogenize the differences in SAX
9393: * callbacks beween the push and pull versions
9394: * of the parser.
9395: */
9396: if ((ctxt->inputNr == 1) &&
9397: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9398: if ((!terminate) &&
9399: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9400: goto done;
9401: }
9402: ctxt->checkIndex = 0;
9403: #ifdef DEBUG_PUSH
9404: fprintf(stderr, "PP: Parsing char data\n");
9405: #endif
9406: xmlParseCharData(ctxt, 0);
9407: }
9408: /*
9409: * Pop-up of finished entities.
9410: */
1.152 daniel 9411: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9412: xmlPopInput(ctxt);
9413: break;
9414: case XML_PARSER_CDATA_SECTION: {
9415: /*
9416: * The Push mode need to have the SAX callback for
9417: * cdataBlock merge back contiguous callbacks.
9418: */
9419: int base;
9420:
9421: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9422: if (base < 0) {
9423: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9424: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9425: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9426: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9427: XML_PARSER_BIG_BUFFER_SIZE);
9428: }
9429: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9430: ctxt->checkIndex = 0;
9431: }
9432: goto done;
9433: } else {
1.171 daniel 9434: if ((ctxt->sax != NULL) && (base > 0) &&
9435: (!ctxt->disableSAX)) {
1.140 daniel 9436: if (ctxt->sax->cdataBlock != NULL)
9437: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9438: ctxt->input->cur, base);
1.140 daniel 9439: }
9440: SKIP(base + 3);
9441: ctxt->checkIndex = 0;
9442: ctxt->instate = XML_PARSER_CONTENT;
9443: #ifdef DEBUG_PUSH
9444: fprintf(stderr, "PP: entering CONTENT\n");
9445: #endif
9446: }
9447: break;
9448: }
1.141 daniel 9449: case XML_PARSER_END_TAG:
1.140 daniel 9450: if (avail < 2)
9451: goto done;
1.143 daniel 9452: if ((!terminate) &&
9453: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9454: goto done;
9455: xmlParseEndTag(ctxt);
9456: if (ctxt->name == NULL) {
9457: ctxt->instate = XML_PARSER_EPILOG;
9458: #ifdef DEBUG_PUSH
9459: fprintf(stderr, "PP: entering EPILOG\n");
9460: #endif
9461: } else {
9462: ctxt->instate = XML_PARSER_CONTENT;
9463: #ifdef DEBUG_PUSH
9464: fprintf(stderr, "PP: entering CONTENT\n");
9465: #endif
9466: }
9467: break;
9468: case XML_PARSER_DTD: {
9469: /*
9470: * Sorry but progressive parsing of the internal subset
9471: * is not expected to be supported. We first check that
9472: * the full content of the internal subset is available and
9473: * the parsing is launched only at that point.
9474: * Internal subset ends up with "']' S? '>'" in an unescaped
9475: * section and not in a ']]>' sequence which are conditional
9476: * sections (whoever argued to keep that crap in XML deserve
9477: * a place in hell !).
9478: */
9479: int base, i;
9480: xmlChar *buf;
9481: xmlChar quote = 0;
9482:
1.184 daniel 9483: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9484: if (base < 0) return(0);
9485: if (ctxt->checkIndex > base)
9486: base = ctxt->checkIndex;
1.184 daniel 9487: buf = ctxt->input->buf->buffer->content;
1.202 daniel 9488: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9489: base++) {
1.140 daniel 9490: if (quote != 0) {
9491: if (buf[base] == quote)
9492: quote = 0;
9493: continue;
9494: }
9495: if (buf[base] == '"') {
9496: quote = '"';
9497: continue;
9498: }
9499: if (buf[base] == '\'') {
9500: quote = '\'';
9501: continue;
9502: }
9503: if (buf[base] == ']') {
1.202 daniel 9504: if ((unsigned int) base +1 >=
9505: ctxt->input->buf->buffer->use)
1.140 daniel 9506: break;
9507: if (buf[base + 1] == ']') {
9508: /* conditional crap, skip both ']' ! */
9509: base++;
9510: continue;
9511: }
1.202 daniel 9512: for (i = 0;
9513: (unsigned int) base + i < ctxt->input->buf->buffer->use;
9514: i++) {
1.140 daniel 9515: if (buf[base + i] == '>')
9516: goto found_end_int_subset;
9517: }
9518: break;
9519: }
9520: }
9521: /*
9522: * We didn't found the end of the Internal subset
9523: */
9524: if (quote == 0)
9525: ctxt->checkIndex = base;
9526: #ifdef DEBUG_PUSH
9527: if (next == 0)
9528: fprintf(stderr, "PP: lookup of int subset end filed\n");
9529: #endif
9530: goto done;
9531:
9532: found_end_int_subset:
9533: xmlParseInternalSubset(ctxt);
1.166 daniel 9534: ctxt->inSubset = 2;
1.171 daniel 9535: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9536: (ctxt->sax->externalSubset != NULL))
9537: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9538: ctxt->extSubSystem, ctxt->extSubURI);
9539: ctxt->inSubset = 0;
1.140 daniel 9540: ctxt->instate = XML_PARSER_PROLOG;
9541: ctxt->checkIndex = 0;
9542: #ifdef DEBUG_PUSH
9543: fprintf(stderr, "PP: entering PROLOG\n");
9544: #endif
9545: break;
9546: }
9547: case XML_PARSER_COMMENT:
9548: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9549: ctxt->instate = XML_PARSER_CONTENT;
9550: #ifdef DEBUG_PUSH
9551: fprintf(stderr, "PP: entering CONTENT\n");
9552: #endif
9553: break;
9554: case XML_PARSER_PI:
9555: fprintf(stderr, "PP: internal error, state == PI\n");
9556: ctxt->instate = XML_PARSER_CONTENT;
9557: #ifdef DEBUG_PUSH
9558: fprintf(stderr, "PP: entering CONTENT\n");
9559: #endif
9560: break;
1.128 daniel 9561: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9562: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9563: ctxt->instate = XML_PARSER_DTD;
9564: #ifdef DEBUG_PUSH
9565: fprintf(stderr, "PP: entering DTD\n");
9566: #endif
9567: break;
1.128 daniel 9568: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9569: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9570: ctxt->instate = XML_PARSER_CONTENT;
9571: #ifdef DEBUG_PUSH
9572: fprintf(stderr, "PP: entering DTD\n");
9573: #endif
9574: break;
1.128 daniel 9575: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9576: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9577: ctxt->instate = XML_PARSER_START_TAG;
9578: #ifdef DEBUG_PUSH
9579: fprintf(stderr, "PP: entering START_TAG\n");
9580: #endif
9581: break;
9582: case XML_PARSER_SYSTEM_LITERAL:
9583: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9584: ctxt->instate = XML_PARSER_START_TAG;
9585: #ifdef DEBUG_PUSH
9586: fprintf(stderr, "PP: entering START_TAG\n");
9587: #endif
9588: break;
1.128 daniel 9589: }
9590: }
1.140 daniel 9591: done:
9592: #ifdef DEBUG_PUSH
9593: fprintf(stderr, "PP: done %d\n", ret);
9594: #endif
1.128 daniel 9595: return(ret);
9596: }
9597:
9598: /**
1.143 daniel 9599: * xmlParseTry:
9600: * @ctxt: an XML parser context
9601: *
9602: * Try to progress on parsing
9603: *
9604: * Returns zero if no parsing was possible
9605: */
9606: int
9607: xmlParseTry(xmlParserCtxtPtr ctxt) {
9608: return(xmlParseTryOrFinish(ctxt, 0));
9609: }
9610:
9611: /**
1.128 daniel 9612: * xmlParseChunk:
9613: * @ctxt: an XML parser context
9614: * @chunk: an char array
9615: * @size: the size in byte of the chunk
9616: * @terminate: last chunk indicator
9617: *
9618: * Parse a Chunk of memory
9619: *
9620: * Returns zero if no error, the xmlParserErrors otherwise.
9621: */
1.140 daniel 9622: int
1.128 daniel 9623: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9624: int terminate) {
1.132 daniel 9625: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9626: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9627: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9628: int cur = ctxt->input->cur - ctxt->input->base;
9629:
1.132 daniel 9630: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9631: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9632: ctxt->input->cur = ctxt->input->base + cur;
9633: #ifdef DEBUG_PUSH
9634: fprintf(stderr, "PP: pushed %d\n", size);
9635: #endif
9636:
1.150 daniel 9637: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9638: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9639: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9640: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9641: if (terminate) {
1.151 daniel 9642: /*
9643: * Check for termination
9644: */
1.140 daniel 9645: if ((ctxt->instate != XML_PARSER_EOF) &&
9646: (ctxt->instate != XML_PARSER_EPILOG)) {
9647: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9648: ctxt->sax->error(ctxt->userData,
9649: "Extra content at the end of the document\n");
9650: ctxt->wellFormed = 0;
1.180 daniel 9651: ctxt->disableSAX = 1;
1.140 daniel 9652: ctxt->errNo = XML_ERR_DOCUMENT_END;
9653: }
9654: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9655: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9656: (!ctxt->disableSAX))
1.140 daniel 9657: ctxt->sax->endDocument(ctxt->userData);
9658: }
9659: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9660: }
9661: return((xmlParserErrors) ctxt->errNo);
9662: }
9663:
9664: /************************************************************************
9665: * *
1.98 daniel 9666: * I/O front end functions to the parser *
9667: * *
9668: ************************************************************************/
1.201 daniel 9669:
9670: /**
9671: * xmlCreatePushParserCtxt:
9672: * @ctxt: an XML parser context
9673: *
9674: * Blocks further parser processing
9675: */
9676: void
9677: xmlStopParser(xmlParserCtxtPtr ctxt) {
9678: ctxt->instate = XML_PARSER_EOF;
9679: if (ctxt->input != NULL)
9680: ctxt->input->cur = BAD_CAST"";
9681: }
1.98 daniel 9682:
1.50 daniel 9683: /**
1.181 daniel 9684: * xmlCreatePushParserCtxt:
1.140 daniel 9685: * @sax: a SAX handler
9686: * @user_data: The user data returned on SAX callbacks
9687: * @chunk: a pointer to an array of chars
9688: * @size: number of chars in the array
9689: * @filename: an optional file name or URI
9690: *
9691: * Create a parser context for using the XML parser in push mode
9692: * To allow content encoding detection, @size should be >= 4
9693: * The value of @filename is used for fetching external entities
9694: * and error/warning reports.
9695: *
9696: * Returns the new parser context or NULL
9697: */
9698: xmlParserCtxtPtr
9699: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9700: const char *chunk, int size, const char *filename) {
9701: xmlParserCtxtPtr ctxt;
9702: xmlParserInputPtr inputStream;
9703: xmlParserInputBufferPtr buf;
9704: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9705:
9706: /*
1.156 daniel 9707: * plug some encoding conversion routines
1.140 daniel 9708: */
9709: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9710: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9711:
9712: buf = xmlAllocParserInputBuffer(enc);
9713: if (buf == NULL) return(NULL);
9714:
9715: ctxt = xmlNewParserCtxt();
9716: if (ctxt == NULL) {
9717: xmlFree(buf);
9718: return(NULL);
9719: }
9720: if (sax != NULL) {
9721: if (ctxt->sax != &xmlDefaultSAXHandler)
9722: xmlFree(ctxt->sax);
9723: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9724: if (ctxt->sax == NULL) {
9725: xmlFree(buf);
9726: xmlFree(ctxt);
9727: return(NULL);
9728: }
9729: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9730: if (user_data != NULL)
9731: ctxt->userData = user_data;
9732: }
9733: if (filename == NULL) {
9734: ctxt->directory = NULL;
9735: } else {
9736: ctxt->directory = xmlParserGetDirectory(filename);
9737: }
9738:
9739: inputStream = xmlNewInputStream(ctxt);
9740: if (inputStream == NULL) {
9741: xmlFreeParserCtxt(ctxt);
9742: return(NULL);
9743: }
9744:
9745: if (filename == NULL)
9746: inputStream->filename = NULL;
9747: else
9748: inputStream->filename = xmlMemStrdup(filename);
9749: inputStream->buf = buf;
9750: inputStream->base = inputStream->buf->buffer->content;
9751: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9752: if (enc != XML_CHAR_ENCODING_NONE) {
9753: xmlSwitchEncoding(ctxt, enc);
9754: }
1.140 daniel 9755:
9756: inputPush(ctxt, inputStream);
9757:
9758: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9759: (ctxt->input->buf != NULL)) {
9760: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9761: #ifdef DEBUG_PUSH
9762: fprintf(stderr, "PP: pushed %d\n", size);
9763: #endif
9764: }
1.190 daniel 9765:
9766: return(ctxt);
9767: }
9768:
9769: /**
9770: * xmlCreateIOParserCtxt:
9771: * @sax: a SAX handler
9772: * @user_data: The user data returned on SAX callbacks
9773: * @ioread: an I/O read function
9774: * @ioclose: an I/O close function
9775: * @ioctx: an I/O handler
9776: * @enc: the charset encoding if known
9777: *
9778: * Create a parser context for using the XML parser with an existing
9779: * I/O stream
9780: *
9781: * Returns the new parser context or NULL
9782: */
9783: xmlParserCtxtPtr
9784: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9785: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9786: void *ioctx, xmlCharEncoding enc) {
9787: xmlParserCtxtPtr ctxt;
9788: xmlParserInputPtr inputStream;
9789: xmlParserInputBufferPtr buf;
9790:
9791: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9792: if (buf == NULL) return(NULL);
9793:
9794: ctxt = xmlNewParserCtxt();
9795: if (ctxt == NULL) {
9796: xmlFree(buf);
9797: return(NULL);
9798: }
9799: if (sax != NULL) {
9800: if (ctxt->sax != &xmlDefaultSAXHandler)
9801: xmlFree(ctxt->sax);
9802: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9803: if (ctxt->sax == NULL) {
9804: xmlFree(buf);
9805: xmlFree(ctxt);
9806: return(NULL);
9807: }
9808: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9809: if (user_data != NULL)
9810: ctxt->userData = user_data;
9811: }
9812:
9813: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9814: if (inputStream == NULL) {
9815: xmlFreeParserCtxt(ctxt);
9816: return(NULL);
9817: }
9818: inputPush(ctxt, inputStream);
1.140 daniel 9819:
9820: return(ctxt);
9821: }
9822:
9823: /**
1.181 daniel 9824: * xmlCreateDocParserCtxt:
1.123 daniel 9825: * @cur: a pointer to an array of xmlChar
1.50 daniel 9826: *
1.192 daniel 9827: * Creates a parser context for an XML in-memory document.
1.69 daniel 9828: *
9829: * Returns the new parser context or NULL
1.16 daniel 9830: */
1.69 daniel 9831: xmlParserCtxtPtr
1.123 daniel 9832: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9833: xmlParserCtxtPtr ctxt;
1.40 daniel 9834: xmlParserInputPtr input;
1.16 daniel 9835:
1.97 daniel 9836: ctxt = xmlNewParserCtxt();
1.16 daniel 9837: if (ctxt == NULL) {
9838: return(NULL);
9839: }
1.96 daniel 9840: input = xmlNewInputStream(ctxt);
1.40 daniel 9841: if (input == NULL) {
1.97 daniel 9842: xmlFreeParserCtxt(ctxt);
1.40 daniel 9843: return(NULL);
9844: }
9845:
9846: input->base = cur;
9847: input->cur = cur;
9848:
9849: inputPush(ctxt, input);
1.69 daniel 9850: return(ctxt);
9851: }
9852:
9853: /**
1.181 daniel 9854: * xmlSAXParseDoc:
1.69 daniel 9855: * @sax: the SAX handler block
1.123 daniel 9856: * @cur: a pointer to an array of xmlChar
1.69 daniel 9857: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9858: * documents
9859: *
9860: * parse an XML in-memory document and build a tree.
9861: * It use the given SAX function block to handle the parsing callback.
9862: * If sax is NULL, fallback to the default DOM tree building routines.
9863: *
9864: * Returns the resulting document tree
9865: */
9866:
9867: xmlDocPtr
1.123 daniel 9868: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9869: xmlDocPtr ret;
9870: xmlParserCtxtPtr ctxt;
9871:
9872: if (cur == NULL) return(NULL);
1.16 daniel 9873:
9874:
1.69 daniel 9875: ctxt = xmlCreateDocParserCtxt(cur);
9876: if (ctxt == NULL) return(NULL);
1.74 daniel 9877: if (sax != NULL) {
9878: ctxt->sax = sax;
9879: ctxt->userData = NULL;
9880: }
1.69 daniel 9881:
1.16 daniel 9882: xmlParseDocument(ctxt);
1.72 daniel 9883: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9884: else {
9885: ret = NULL;
1.72 daniel 9886: xmlFreeDoc(ctxt->myDoc);
9887: ctxt->myDoc = NULL;
1.59 daniel 9888: }
1.86 daniel 9889: if (sax != NULL)
9890: ctxt->sax = NULL;
1.69 daniel 9891: xmlFreeParserCtxt(ctxt);
1.16 daniel 9892:
1.1 veillard 9893: return(ret);
9894: }
9895:
1.50 daniel 9896: /**
1.181 daniel 9897: * xmlParseDoc:
1.123 daniel 9898: * @cur: a pointer to an array of xmlChar
1.55 daniel 9899: *
9900: * parse an XML in-memory document and build a tree.
9901: *
1.68 daniel 9902: * Returns the resulting document tree
1.55 daniel 9903: */
9904:
1.69 daniel 9905: xmlDocPtr
1.123 daniel 9906: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9907: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9908: }
9909:
9910: /**
1.181 daniel 9911: * xmlSAXParseDTD:
1.76 daniel 9912: * @sax: the SAX handler block
9913: * @ExternalID: a NAME* containing the External ID of the DTD
9914: * @SystemID: a NAME* containing the URL to the DTD
9915: *
9916: * Load and parse an external subset.
9917: *
9918: * Returns the resulting xmlDtdPtr or NULL in case of error.
9919: */
9920:
9921: xmlDtdPtr
1.123 daniel 9922: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9923: const xmlChar *SystemID) {
1.76 daniel 9924: xmlDtdPtr ret = NULL;
9925: xmlParserCtxtPtr ctxt;
1.83 daniel 9926: xmlParserInputPtr input = NULL;
1.76 daniel 9927: xmlCharEncoding enc;
9928:
9929: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9930:
1.97 daniel 9931: ctxt = xmlNewParserCtxt();
1.76 daniel 9932: if (ctxt == NULL) {
9933: return(NULL);
9934: }
9935:
9936: /*
9937: * Set-up the SAX context
9938: */
9939: if (ctxt == NULL) return(NULL);
9940: if (sax != NULL) {
1.93 veillard 9941: if (ctxt->sax != NULL)
1.119 daniel 9942: xmlFree(ctxt->sax);
1.76 daniel 9943: ctxt->sax = sax;
9944: ctxt->userData = NULL;
9945: }
9946:
9947: /*
9948: * Ask the Entity resolver to load the damn thing
9949: */
9950:
9951: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9952: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9953: if (input == NULL) {
1.86 daniel 9954: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9955: xmlFreeParserCtxt(ctxt);
9956: return(NULL);
9957: }
9958:
9959: /*
1.156 daniel 9960: * plug some encoding conversion routines here.
1.76 daniel 9961: */
9962: xmlPushInput(ctxt, input);
1.156 daniel 9963: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9964: xmlSwitchEncoding(ctxt, enc);
9965:
1.95 veillard 9966: if (input->filename == NULL)
1.156 daniel 9967: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9968: input->line = 1;
9969: input->col = 1;
9970: input->base = ctxt->input->cur;
9971: input->cur = ctxt->input->cur;
9972: input->free = NULL;
9973:
9974: /*
9975: * let's parse that entity knowing it's an external subset.
9976: */
1.191 daniel 9977: ctxt->inSubset = 2;
9978: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9979: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9980: ExternalID, SystemID);
1.79 daniel 9981: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9982:
9983: if (ctxt->myDoc != NULL) {
9984: if (ctxt->wellFormed) {
1.191 daniel 9985: ret = ctxt->myDoc->extSubset;
9986: ctxt->myDoc->extSubset = NULL;
1.76 daniel 9987: } else {
9988: ret = NULL;
9989: }
9990: xmlFreeDoc(ctxt->myDoc);
9991: ctxt->myDoc = NULL;
9992: }
1.86 daniel 9993: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9994: xmlFreeParserCtxt(ctxt);
9995:
9996: return(ret);
9997: }
9998:
9999: /**
1.181 daniel 10000: * xmlParseDTD:
1.76 daniel 10001: * @ExternalID: a NAME* containing the External ID of the DTD
10002: * @SystemID: a NAME* containing the URL to the DTD
10003: *
10004: * Load and parse an external subset.
10005: *
10006: * Returns the resulting xmlDtdPtr or NULL in case of error.
10007: */
10008:
10009: xmlDtdPtr
1.123 daniel 10010: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 10011: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 10012: }
10013:
10014: /**
1.181 daniel 10015: * xmlSAXParseBalancedChunk:
1.144 daniel 10016: * @ctx: an XML parser context (possibly NULL)
10017: * @sax: the SAX handler bloc (possibly NULL)
10018: * @user_data: The user data returned on SAX callbacks (possibly NULL)
10019: * @input: a parser input stream
10020: * @enc: the encoding
10021: *
10022: * Parse a well-balanced chunk of an XML document
10023: * The user has to provide SAX callback block whose routines will be
10024: * called by the parser
10025: * The allowed sequence for the Well Balanced Chunk is the one defined by
10026: * the content production in the XML grammar:
10027: *
10028: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10029: *
1.176 daniel 10030: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 10031: * the error code otherwise
10032: */
10033:
10034: int
10035: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10036: void *user_data, xmlParserInputPtr input,
10037: xmlCharEncoding enc) {
10038: xmlParserCtxtPtr ctxt;
10039: int ret;
10040:
10041: if (input == NULL) return(-1);
10042:
10043: if (ctx != NULL)
10044: ctxt = ctx;
10045: else {
10046: ctxt = xmlNewParserCtxt();
10047: if (ctxt == NULL)
10048: return(-1);
10049: if (sax == NULL)
10050: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10051: }
10052:
10053: /*
10054: * Set-up the SAX context
10055: */
10056: if (sax != NULL) {
10057: if (ctxt->sax != NULL)
10058: xmlFree(ctxt->sax);
10059: ctxt->sax = sax;
10060: ctxt->userData = user_data;
10061: }
10062:
10063: /*
10064: * plug some encoding conversion routines here.
10065: */
10066: xmlPushInput(ctxt, input);
10067: if (enc != XML_CHAR_ENCODING_NONE)
10068: xmlSwitchEncoding(ctxt, enc);
10069:
10070: /*
10071: * let's parse that entity knowing it's an external subset.
10072: */
10073: xmlParseContent(ctxt);
10074: ret = ctxt->errNo;
10075:
10076: if (ctx == NULL) {
10077: if (sax != NULL)
10078: ctxt->sax = NULL;
10079: else
10080: xmlFreeDoc(ctxt->myDoc);
10081: xmlFreeParserCtxt(ctxt);
10082: }
10083: return(ret);
10084: }
10085:
10086: /**
1.181 daniel 10087: * xmlParseExternalEntity:
10088: * @doc: the document the chunk pertains to
10089: * @sax: the SAX handler bloc (possibly NULL)
10090: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10091: * @depth: Used for loop detection, use 0
1.181 daniel 10092: * @URL: the URL for the entity to load
10093: * @ID: the System ID for the entity to load
10094: * @list: the return value for the set of parsed nodes
10095: *
10096: * Parse an external general entity
10097: * An external general parsed entity is well-formed if it matches the
10098: * production labeled extParsedEnt.
10099: *
10100: * [78] extParsedEnt ::= TextDecl? content
10101: *
10102: * Returns 0 if the entity is well formed, -1 in case of args problem and
10103: * the parser error code otherwise
10104: */
10105:
10106: int
10107: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10108: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10109: xmlParserCtxtPtr ctxt;
10110: xmlDocPtr newDoc;
10111: xmlSAXHandlerPtr oldsax = NULL;
10112: int ret = 0;
10113:
1.185 daniel 10114: if (depth > 40) {
10115: return(XML_ERR_ENTITY_LOOP);
10116: }
10117:
10118:
1.181 daniel 10119:
10120: if (list != NULL)
10121: *list = NULL;
10122: if ((URL == NULL) && (ID == NULL))
10123: return(-1);
10124:
10125:
10126: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10127: if (ctxt == NULL) return(-1);
10128: ctxt->userData = ctxt;
10129: if (sax != NULL) {
10130: oldsax = ctxt->sax;
10131: ctxt->sax = sax;
10132: if (user_data != NULL)
10133: ctxt->userData = user_data;
10134: }
10135: newDoc = xmlNewDoc(BAD_CAST "1.0");
10136: if (newDoc == NULL) {
10137: xmlFreeParserCtxt(ctxt);
10138: return(-1);
10139: }
10140: if (doc != NULL) {
10141: newDoc->intSubset = doc->intSubset;
10142: newDoc->extSubset = doc->extSubset;
10143: }
10144: if (doc->URL != NULL) {
10145: newDoc->URL = xmlStrdup(doc->URL);
10146: }
10147: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10148: if (newDoc->children == NULL) {
10149: if (sax != NULL)
10150: ctxt->sax = oldsax;
10151: xmlFreeParserCtxt(ctxt);
10152: newDoc->intSubset = NULL;
10153: newDoc->extSubset = NULL;
10154: xmlFreeDoc(newDoc);
10155: return(-1);
10156: }
10157: nodePush(ctxt, newDoc->children);
10158: if (doc == NULL) {
10159: ctxt->myDoc = newDoc;
10160: } else {
10161: ctxt->myDoc = doc;
10162: newDoc->children->doc = doc;
10163: }
10164:
10165: /*
10166: * Parse a possible text declaration first
10167: */
10168: GROW;
10169: if ((RAW == '<') && (NXT(1) == '?') &&
10170: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10171: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10172: xmlParseTextDecl(ctxt);
10173: }
10174:
10175: /*
10176: * Doing validity checking on chunk doesn't make sense
10177: */
10178: ctxt->instate = XML_PARSER_CONTENT;
10179: ctxt->validate = 0;
1.185 daniel 10180: ctxt->depth = depth;
1.181 daniel 10181:
10182: xmlParseContent(ctxt);
10183:
10184: if ((RAW == '<') && (NXT(1) == '/')) {
10185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10186: ctxt->sax->error(ctxt->userData,
10187: "chunk is not well balanced\n");
10188: ctxt->wellFormed = 0;
10189: ctxt->disableSAX = 1;
10190: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10191: } else if (RAW != 0) {
10192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10193: ctxt->sax->error(ctxt->userData,
10194: "extra content at the end of well balanced chunk\n");
10195: ctxt->wellFormed = 0;
10196: ctxt->disableSAX = 1;
10197: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10198: }
10199: if (ctxt->node != newDoc->children) {
10200: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10201: ctxt->sax->error(ctxt->userData,
10202: "chunk is not well balanced\n");
10203: ctxt->wellFormed = 0;
10204: ctxt->disableSAX = 1;
10205: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10206: }
10207:
10208: if (!ctxt->wellFormed) {
10209: if (ctxt->errNo == 0)
10210: ret = 1;
10211: else
10212: ret = ctxt->errNo;
10213: } else {
10214: if (list != NULL) {
10215: xmlNodePtr cur;
10216:
10217: /*
10218: * Return the newly created nodeset after unlinking it from
10219: * they pseudo parent.
10220: */
10221: cur = newDoc->children->children;
10222: *list = cur;
10223: while (cur != NULL) {
10224: cur->parent = NULL;
10225: cur = cur->next;
10226: }
10227: newDoc->children->children = NULL;
10228: }
10229: ret = 0;
10230: }
10231: if (sax != NULL)
10232: ctxt->sax = oldsax;
10233: xmlFreeParserCtxt(ctxt);
10234: newDoc->intSubset = NULL;
10235: newDoc->extSubset = NULL;
10236: xmlFreeDoc(newDoc);
10237:
10238: return(ret);
10239: }
10240:
10241: /**
10242: * xmlParseBalancedChunk:
1.176 daniel 10243: * @doc: the document the chunk pertains to
10244: * @sax: the SAX handler bloc (possibly NULL)
10245: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10246: * @depth: Used for loop detection, use 0
1.176 daniel 10247: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10248: * @list: the return value for the set of parsed nodes
10249: *
10250: * Parse a well-balanced chunk of an XML document
10251: * called by the parser
10252: * The allowed sequence for the Well Balanced Chunk is the one defined by
10253: * the content production in the XML grammar:
1.144 daniel 10254: *
1.175 daniel 10255: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10256: *
1.176 daniel 10257: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10258: * the parser error code otherwise
1.144 daniel 10259: */
10260:
1.175 daniel 10261: int
10262: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10263: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10264: xmlParserCtxtPtr ctxt;
1.175 daniel 10265: xmlDocPtr newDoc;
1.181 daniel 10266: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10267: int size;
1.176 daniel 10268: int ret = 0;
1.175 daniel 10269:
1.185 daniel 10270: if (depth > 40) {
10271: return(XML_ERR_ENTITY_LOOP);
10272: }
10273:
1.175 daniel 10274:
1.176 daniel 10275: if (list != NULL)
10276: *list = NULL;
10277: if (string == NULL)
10278: return(-1);
10279:
10280: size = xmlStrlen(string);
10281:
1.183 daniel 10282: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10283: if (ctxt == NULL) return(-1);
10284: ctxt->userData = ctxt;
1.175 daniel 10285: if (sax != NULL) {
1.176 daniel 10286: oldsax = ctxt->sax;
10287: ctxt->sax = sax;
10288: if (user_data != NULL)
10289: ctxt->userData = user_data;
1.175 daniel 10290: }
10291: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10292: if (newDoc == NULL) {
10293: xmlFreeParserCtxt(ctxt);
10294: return(-1);
10295: }
1.175 daniel 10296: if (doc != NULL) {
10297: newDoc->intSubset = doc->intSubset;
10298: newDoc->extSubset = doc->extSubset;
10299: }
1.176 daniel 10300: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10301: if (newDoc->children == NULL) {
10302: if (sax != NULL)
10303: ctxt->sax = oldsax;
10304: xmlFreeParserCtxt(ctxt);
10305: newDoc->intSubset = NULL;
10306: newDoc->extSubset = NULL;
10307: xmlFreeDoc(newDoc);
10308: return(-1);
10309: }
10310: nodePush(ctxt, newDoc->children);
10311: if (doc == NULL) {
10312: ctxt->myDoc = newDoc;
10313: } else {
10314: ctxt->myDoc = doc;
10315: newDoc->children->doc = doc;
10316: }
10317: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10318: ctxt->depth = depth;
1.176 daniel 10319:
10320: /*
10321: * Doing validity checking on chunk doesn't make sense
10322: */
10323: ctxt->validate = 0;
10324:
1.175 daniel 10325: xmlParseContent(ctxt);
1.176 daniel 10326:
10327: if ((RAW == '<') && (NXT(1) == '/')) {
10328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10329: ctxt->sax->error(ctxt->userData,
10330: "chunk is not well balanced\n");
10331: ctxt->wellFormed = 0;
1.180 daniel 10332: ctxt->disableSAX = 1;
1.176 daniel 10333: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10334: } else if (RAW != 0) {
10335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10336: ctxt->sax->error(ctxt->userData,
10337: "extra content at the end of well balanced chunk\n");
10338: ctxt->wellFormed = 0;
1.180 daniel 10339: ctxt->disableSAX = 1;
1.176 daniel 10340: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10341: }
10342: if (ctxt->node != newDoc->children) {
10343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10344: ctxt->sax->error(ctxt->userData,
10345: "chunk is not well balanced\n");
10346: ctxt->wellFormed = 0;
1.180 daniel 10347: ctxt->disableSAX = 1;
1.176 daniel 10348: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10349: }
1.175 daniel 10350:
1.176 daniel 10351: if (!ctxt->wellFormed) {
10352: if (ctxt->errNo == 0)
10353: ret = 1;
10354: else
10355: ret = ctxt->errNo;
10356: } else {
10357: if (list != NULL) {
10358: xmlNodePtr cur;
1.175 daniel 10359:
1.176 daniel 10360: /*
10361: * Return the newly created nodeset after unlinking it from
10362: * they pseudo parent.
10363: */
10364: cur = newDoc->children->children;
10365: *list = cur;
10366: while (cur != NULL) {
10367: cur->parent = NULL;
10368: cur = cur->next;
10369: }
10370: newDoc->children->children = NULL;
10371: }
10372: ret = 0;
1.175 daniel 10373: }
1.176 daniel 10374: if (sax != NULL)
10375: ctxt->sax = oldsax;
1.175 daniel 10376: xmlFreeParserCtxt(ctxt);
10377: newDoc->intSubset = NULL;
10378: newDoc->extSubset = NULL;
1.176 daniel 10379: xmlFreeDoc(newDoc);
1.175 daniel 10380:
1.176 daniel 10381: return(ret);
1.144 daniel 10382: }
10383:
10384: /**
1.181 daniel 10385: * xmlParseBalancedChunkFile:
1.144 daniel 10386: * @doc: the document the chunk pertains to
10387: *
10388: * Parse a well-balanced chunk of an XML document contained in a file
10389: *
10390: * Returns the resulting list of nodes resulting from the parsing,
10391: * they are not added to @node
10392: */
10393:
10394: xmlNodePtr
10395: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10396: /* TODO !!! */
10397: return(NULL);
1.144 daniel 10398: }
10399:
10400: /**
1.181 daniel 10401: * xmlRecoverDoc:
1.123 daniel 10402: * @cur: a pointer to an array of xmlChar
1.59 daniel 10403: *
10404: * parse an XML in-memory document and build a tree.
10405: * In the case the document is not Well Formed, a tree is built anyway
10406: *
1.68 daniel 10407: * Returns the resulting document tree
1.59 daniel 10408: */
10409:
1.69 daniel 10410: xmlDocPtr
1.123 daniel 10411: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10412: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10413: }
10414:
10415: /**
1.181 daniel 10416: * xmlCreateEntityParserCtxt:
10417: * @URL: the entity URL
10418: * @ID: the entity PUBLIC ID
10419: * @base: a posible base for the target URI
10420: *
10421: * Create a parser context for an external entity
10422: * Automatic support for ZLIB/Compress compressed document is provided
10423: * by default if found at compile-time.
10424: *
10425: * Returns the new parser context or NULL
10426: */
10427: xmlParserCtxtPtr
10428: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10429: const xmlChar *base) {
10430: xmlParserCtxtPtr ctxt;
10431: xmlParserInputPtr inputStream;
10432: char *directory = NULL;
1.210 veillard 10433: xmlChar *uri;
10434:
1.181 daniel 10435: ctxt = xmlNewParserCtxt();
10436: if (ctxt == NULL) {
10437: return(NULL);
10438: }
10439:
1.210 veillard 10440: uri = xmlBuildURI(URL, base);
10441:
10442: if (uri == NULL) {
10443: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10444: if (inputStream == NULL) {
10445: xmlFreeParserCtxt(ctxt);
10446: return(NULL);
10447: }
10448:
10449: inputPush(ctxt, inputStream);
10450:
10451: if ((ctxt->directory == NULL) && (directory == NULL))
10452: directory = xmlParserGetDirectory((char *)URL);
10453: if ((ctxt->directory == NULL) && (directory != NULL))
10454: ctxt->directory = directory;
10455: } else {
10456: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10457: if (inputStream == NULL) {
10458: xmlFreeParserCtxt(ctxt);
10459: return(NULL);
10460: }
1.181 daniel 10461:
1.210 veillard 10462: inputPush(ctxt, inputStream);
1.181 daniel 10463:
1.210 veillard 10464: if ((ctxt->directory == NULL) && (directory == NULL))
10465: directory = xmlParserGetDirectory((char *)uri);
10466: if ((ctxt->directory == NULL) && (directory != NULL))
10467: ctxt->directory = directory;
10468: xmlFree(uri);
10469: }
1.181 daniel 10470:
10471: return(ctxt);
10472: }
10473:
10474: /**
10475: * xmlCreateFileParserCtxt:
1.50 daniel 10476: * @filename: the filename
10477: *
1.69 daniel 10478: * Create a parser context for a file content.
10479: * Automatic support for ZLIB/Compress compressed document is provided
10480: * by default if found at compile-time.
1.50 daniel 10481: *
1.69 daniel 10482: * Returns the new parser context or NULL
1.9 httpng 10483: */
1.69 daniel 10484: xmlParserCtxtPtr
10485: xmlCreateFileParserCtxt(const char *filename)
10486: {
10487: xmlParserCtxtPtr ctxt;
1.40 daniel 10488: xmlParserInputPtr inputStream;
1.91 daniel 10489: xmlParserInputBufferPtr buf;
1.111 daniel 10490: char *directory = NULL;
1.9 httpng 10491:
1.91 daniel 10492: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10493: if (buf == NULL) return(NULL);
1.9 httpng 10494:
1.97 daniel 10495: ctxt = xmlNewParserCtxt();
1.16 daniel 10496: if (ctxt == NULL) {
10497: return(NULL);
10498: }
1.97 daniel 10499:
1.96 daniel 10500: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10501: if (inputStream == NULL) {
1.97 daniel 10502: xmlFreeParserCtxt(ctxt);
1.40 daniel 10503: return(NULL);
10504: }
10505:
1.119 daniel 10506: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10507: inputStream->buf = buf;
10508: inputStream->base = inputStream->buf->buffer->content;
10509: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10510:
1.40 daniel 10511: inputPush(ctxt, inputStream);
1.110 daniel 10512: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10513: directory = xmlParserGetDirectory(filename);
10514: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10515: ctxt->directory = directory;
1.106 daniel 10516:
1.69 daniel 10517: return(ctxt);
10518: }
10519:
10520: /**
1.181 daniel 10521: * xmlSAXParseFile:
1.69 daniel 10522: * @sax: the SAX handler block
10523: * @filename: the filename
10524: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10525: * documents
10526: *
10527: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10528: * compressed document is provided by default if found at compile-time.
10529: * It use the given SAX function block to handle the parsing callback.
10530: * If sax is NULL, fallback to the default DOM tree building routines.
10531: *
10532: * Returns the resulting document tree
10533: */
10534:
1.79 daniel 10535: xmlDocPtr
10536: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10537: int recovery) {
10538: xmlDocPtr ret;
10539: xmlParserCtxtPtr ctxt;
1.111 daniel 10540: char *directory = NULL;
1.69 daniel 10541:
10542: ctxt = xmlCreateFileParserCtxt(filename);
10543: if (ctxt == NULL) return(NULL);
1.74 daniel 10544: if (sax != NULL) {
1.93 veillard 10545: if (ctxt->sax != NULL)
1.119 daniel 10546: xmlFree(ctxt->sax);
1.74 daniel 10547: ctxt->sax = sax;
10548: ctxt->userData = NULL;
10549: }
1.106 daniel 10550:
1.110 daniel 10551: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10552: directory = xmlParserGetDirectory(filename);
10553: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10554: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10555:
10556: xmlParseDocument(ctxt);
1.40 daniel 10557:
1.72 daniel 10558: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10559: else {
10560: ret = NULL;
1.72 daniel 10561: xmlFreeDoc(ctxt->myDoc);
10562: ctxt->myDoc = NULL;
1.59 daniel 10563: }
1.86 daniel 10564: if (sax != NULL)
10565: ctxt->sax = NULL;
1.69 daniel 10566: xmlFreeParserCtxt(ctxt);
1.20 daniel 10567:
10568: return(ret);
10569: }
10570:
1.55 daniel 10571: /**
1.181 daniel 10572: * xmlParseFile:
1.55 daniel 10573: * @filename: the filename
10574: *
10575: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10576: * compressed document is provided by default if found at compile-time.
10577: *
1.68 daniel 10578: * Returns the resulting document tree
1.55 daniel 10579: */
10580:
1.79 daniel 10581: xmlDocPtr
10582: xmlParseFile(const char *filename) {
1.59 daniel 10583: return(xmlSAXParseFile(NULL, filename, 0));
10584: }
10585:
10586: /**
1.181 daniel 10587: * xmlRecoverFile:
1.59 daniel 10588: * @filename: the filename
10589: *
10590: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10591: * compressed document is provided by default if found at compile-time.
10592: * In the case the document is not Well Formed, a tree is built anyway
10593: *
1.68 daniel 10594: * Returns the resulting document tree
1.59 daniel 10595: */
10596:
1.79 daniel 10597: xmlDocPtr
10598: xmlRecoverFile(const char *filename) {
1.59 daniel 10599: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10600: }
1.32 daniel 10601:
1.50 daniel 10602: /**
1.181 daniel 10603: * xmlCreateMemoryParserCtxt:
10604: * @buffer: a pointer to a zero terminated char array
10605: * @size: the size of the array (without the trailing 0)
1.50 daniel 10606: *
1.69 daniel 10607: * Create a parser context for an XML in-memory document.
1.50 daniel 10608: *
1.69 daniel 10609: * Returns the new parser context or NULL
1.20 daniel 10610: */
1.69 daniel 10611: xmlParserCtxtPtr
10612: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10613: xmlParserCtxtPtr ctxt;
1.40 daniel 10614: xmlParserInputPtr input;
1.209 veillard 10615: xmlParserInputBufferPtr buf;
1.40 daniel 10616:
1.179 daniel 10617: if (buffer[size] != 0)
1.181 daniel 10618: return(NULL);
1.40 daniel 10619:
1.97 daniel 10620: ctxt = xmlNewParserCtxt();
1.181 daniel 10621: if (ctxt == NULL)
1.20 daniel 10622: return(NULL);
1.97 daniel 10623:
1.209 veillard 10624: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10625: if (buf == NULL) return(NULL);
10626:
1.96 daniel 10627: input = xmlNewInputStream(ctxt);
1.40 daniel 10628: if (input == NULL) {
1.97 daniel 10629: xmlFreeParserCtxt(ctxt);
1.40 daniel 10630: return(NULL);
10631: }
1.20 daniel 10632:
1.40 daniel 10633: input->filename = NULL;
1.209 veillard 10634: input->buf = buf;
10635: input->base = input->buf->buffer->content;
10636: input->cur = input->buf->buffer->content;
1.20 daniel 10637:
1.40 daniel 10638: inputPush(ctxt, input);
1.69 daniel 10639: return(ctxt);
10640: }
10641:
10642: /**
1.181 daniel 10643: * xmlSAXParseMemory:
1.69 daniel 10644: * @sax: the SAX handler block
10645: * @buffer: an pointer to a char array
1.127 daniel 10646: * @size: the size of the array
10647: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10648: * documents
10649: *
10650: * parse an XML in-memory block and use the given SAX function block
10651: * to handle the parsing callback. If sax is NULL, fallback to the default
10652: * DOM tree building routines.
10653: *
10654: * Returns the resulting document tree
10655: */
10656: xmlDocPtr
10657: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10658: xmlDocPtr ret;
10659: xmlParserCtxtPtr ctxt;
10660:
10661: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10662: if (ctxt == NULL) return(NULL);
1.74 daniel 10663: if (sax != NULL) {
10664: ctxt->sax = sax;
10665: ctxt->userData = NULL;
10666: }
1.20 daniel 10667:
10668: xmlParseDocument(ctxt);
1.40 daniel 10669:
1.72 daniel 10670: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10671: else {
10672: ret = NULL;
1.72 daniel 10673: xmlFreeDoc(ctxt->myDoc);
10674: ctxt->myDoc = NULL;
1.59 daniel 10675: }
1.86 daniel 10676: if (sax != NULL)
10677: ctxt->sax = NULL;
1.69 daniel 10678: xmlFreeParserCtxt(ctxt);
1.16 daniel 10679:
1.9 httpng 10680: return(ret);
1.17 daniel 10681: }
10682:
1.55 daniel 10683: /**
1.181 daniel 10684: * xmlParseMemory:
1.68 daniel 10685: * @buffer: an pointer to a char array
1.55 daniel 10686: * @size: the size of the array
10687: *
10688: * parse an XML in-memory block and build a tree.
10689: *
1.68 daniel 10690: * Returns the resulting document tree
1.55 daniel 10691: */
10692:
10693: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10694: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10695: }
10696:
10697: /**
1.181 daniel 10698: * xmlRecoverMemory:
1.68 daniel 10699: * @buffer: an pointer to a char array
1.59 daniel 10700: * @size: the size of the array
10701: *
10702: * parse an XML in-memory block and build a tree.
10703: * In the case the document is not Well Formed, a tree is built anyway
10704: *
1.68 daniel 10705: * Returns the resulting document tree
1.59 daniel 10706: */
10707:
10708: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10709: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10710: }
10711:
10712:
1.50 daniel 10713: /**
10714: * xmlSetupParserForBuffer:
10715: * @ctxt: an XML parser context
1.123 daniel 10716: * @buffer: a xmlChar * buffer
1.50 daniel 10717: * @filename: a file name
10718: *
1.19 daniel 10719: * Setup the parser context to parse a new buffer; Clears any prior
10720: * contents from the parser context. The buffer parameter must not be
10721: * NULL, but the filename parameter can be
10722: */
1.55 daniel 10723: void
1.123 daniel 10724: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10725: const char* filename)
10726: {
1.96 daniel 10727: xmlParserInputPtr input;
1.40 daniel 10728:
1.96 daniel 10729: input = xmlNewInputStream(ctxt);
10730: if (input == NULL) {
10731: perror("malloc");
1.119 daniel 10732: xmlFree(ctxt);
1.145 daniel 10733: return;
1.96 daniel 10734: }
10735:
10736: xmlClearParserCtxt(ctxt);
10737: if (filename != NULL)
1.119 daniel 10738: input->filename = xmlMemStrdup(filename);
1.96 daniel 10739: input->base = buffer;
10740: input->cur = buffer;
10741: inputPush(ctxt, input);
1.17 daniel 10742: }
10743:
1.123 daniel 10744: /**
10745: * xmlSAXUserParseFile:
10746: * @sax: a SAX handler
10747: * @user_data: The user data returned on SAX callbacks
10748: * @filename: a file name
10749: *
10750: * parse an XML file and call the given SAX handler routines.
10751: * Automatic support for ZLIB/Compress compressed document is provided
10752: *
10753: * Returns 0 in case of success or a error number otherwise
10754: */
1.131 daniel 10755: int
10756: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10757: const char *filename) {
1.123 daniel 10758: int ret = 0;
10759: xmlParserCtxtPtr ctxt;
10760:
10761: ctxt = xmlCreateFileParserCtxt(filename);
10762: if (ctxt == NULL) return -1;
1.134 daniel 10763: if (ctxt->sax != &xmlDefaultSAXHandler)
10764: xmlFree(ctxt->sax);
1.123 daniel 10765: ctxt->sax = sax;
1.140 daniel 10766: if (user_data != NULL)
10767: ctxt->userData = user_data;
1.123 daniel 10768:
10769: xmlParseDocument(ctxt);
10770:
10771: if (ctxt->wellFormed)
10772: ret = 0;
10773: else {
10774: if (ctxt->errNo != 0)
10775: ret = ctxt->errNo;
10776: else
10777: ret = -1;
10778: }
10779: if (sax != NULL)
10780: ctxt->sax = NULL;
10781: xmlFreeParserCtxt(ctxt);
10782:
10783: return ret;
10784: }
10785:
10786: /**
10787: * xmlSAXUserParseMemory:
10788: * @sax: a SAX handler
10789: * @user_data: The user data returned on SAX callbacks
10790: * @buffer: an in-memory XML document input
1.127 daniel 10791: * @size: the length of the XML document in bytes
1.123 daniel 10792: *
10793: * A better SAX parsing routine.
10794: * parse an XML in-memory buffer and call the given SAX handler routines.
10795: *
10796: * Returns 0 in case of success or a error number otherwise
10797: */
10798: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10799: char *buffer, int size) {
10800: int ret = 0;
10801: xmlParserCtxtPtr ctxt;
10802:
10803: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10804: if (ctxt == NULL) return -1;
10805: ctxt->sax = sax;
10806: ctxt->userData = user_data;
10807:
10808: xmlParseDocument(ctxt);
10809:
10810: if (ctxt->wellFormed)
10811: ret = 0;
10812: else {
10813: if (ctxt->errNo != 0)
10814: ret = ctxt->errNo;
10815: else
10816: ret = -1;
10817: }
10818: if (sax != NULL)
10819: ctxt->sax = NULL;
10820: xmlFreeParserCtxt(ctxt);
10821:
10822: return ret;
10823: }
10824:
1.32 daniel 10825:
1.98 daniel 10826: /************************************************************************
10827: * *
1.127 daniel 10828: * Miscellaneous *
1.98 daniel 10829: * *
10830: ************************************************************************/
10831:
1.132 daniel 10832: /**
10833: * xmlCleanupParser:
10834: *
10835: * Cleanup function for the XML parser. It tries to reclaim all
10836: * parsing related global memory allocated for the parser processing.
10837: * It doesn't deallocate any document related memory. Calling this
10838: * function should not prevent reusing the parser.
10839: */
10840:
10841: void
10842: xmlCleanupParser(void) {
10843: xmlCleanupCharEncodingHandlers();
1.133 daniel 10844: xmlCleanupPredefinedEntities();
1.132 daniel 10845: }
1.98 daniel 10846:
1.50 daniel 10847: /**
10848: * xmlParserFindNodeInfo:
10849: * @ctxt: an XML parser context
10850: * @node: an XML node within the tree
10851: *
10852: * Find the parser node info struct for a given node
10853: *
1.68 daniel 10854: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 10855: */
10856: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
10857: const xmlNode* node)
10858: {
10859: unsigned long pos;
10860:
10861: /* Find position where node should be at */
10862: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
10863: if ( ctx->node_seq.buffer[pos].node == node )
10864: return &ctx->node_seq.buffer[pos];
10865: else
10866: return NULL;
10867: }
10868:
10869:
1.50 daniel 10870: /**
1.181 daniel 10871: * xmlInitNodeInfoSeq:
1.50 daniel 10872: * @seq: a node info sequence pointer
10873: *
10874: * -- Initialize (set to initial state) node info sequence
1.32 daniel 10875: */
1.55 daniel 10876: void
10877: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10878: {
10879: seq->length = 0;
10880: seq->maximum = 0;
10881: seq->buffer = NULL;
10882: }
10883:
1.50 daniel 10884: /**
1.181 daniel 10885: * xmlClearNodeInfoSeq:
1.50 daniel 10886: * @seq: a node info sequence pointer
10887: *
10888: * -- Clear (release memory and reinitialize) node
1.32 daniel 10889: * info sequence
10890: */
1.55 daniel 10891: void
10892: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 10893: {
10894: if ( seq->buffer != NULL )
1.119 daniel 10895: xmlFree(seq->buffer);
1.32 daniel 10896: xmlInitNodeInfoSeq(seq);
10897: }
10898:
10899:
1.50 daniel 10900: /**
10901: * xmlParserFindNodeInfoIndex:
10902: * @seq: a node info sequence pointer
10903: * @node: an XML node pointer
10904: *
10905: *
1.32 daniel 10906: * xmlParserFindNodeInfoIndex : Find the index that the info record for
10907: * the given node is or should be at in a sorted sequence
1.68 daniel 10908: *
10909: * Returns a long indicating the position of the record
1.32 daniel 10910: */
10911: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
10912: const xmlNode* node)
10913: {
10914: unsigned long upper, lower, middle;
10915: int found = 0;
10916:
10917: /* Do a binary search for the key */
10918: lower = 1;
10919: upper = seq->length;
10920: middle = 0;
10921: while ( lower <= upper && !found) {
10922: middle = lower + (upper - lower) / 2;
10923: if ( node == seq->buffer[middle - 1].node )
10924: found = 1;
10925: else if ( node < seq->buffer[middle - 1].node )
10926: upper = middle - 1;
10927: else
10928: lower = middle + 1;
10929: }
10930:
10931: /* Return position */
10932: if ( middle == 0 || seq->buffer[middle - 1].node < node )
10933: return middle;
10934: else
10935: return middle - 1;
10936: }
10937:
10938:
1.50 daniel 10939: /**
10940: * xmlParserAddNodeInfo:
10941: * @ctxt: an XML parser context
1.68 daniel 10942: * @info: a node info sequence pointer
1.50 daniel 10943: *
10944: * Insert node info record into the sorted sequence
1.32 daniel 10945: */
1.55 daniel 10946: void
10947: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 10948: const xmlParserNodeInfo* info)
1.32 daniel 10949: {
10950: unsigned long pos;
10951: static unsigned int block_size = 5;
10952:
10953: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 10954: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
10955: if ( pos < ctxt->node_seq.length
10956: && ctxt->node_seq.buffer[pos].node == info->node ) {
10957: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 10958: }
10959:
10960: /* Otherwise, we need to add new node to buffer */
10961: else {
10962: /* Expand buffer by 5 if needed */
1.55 daniel 10963: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 10964: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 10965: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
10966: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 10967:
1.55 daniel 10968: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 10969: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 10970: else
1.119 daniel 10971: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 10972:
10973: if ( tmp_buffer == NULL ) {
1.55 daniel 10974: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 10975: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 10976: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 10977: return;
10978: }
1.55 daniel 10979: ctxt->node_seq.buffer = tmp_buffer;
10980: ctxt->node_seq.maximum += block_size;
1.32 daniel 10981: }
10982:
10983: /* If position is not at end, move elements out of the way */
1.55 daniel 10984: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 10985: unsigned long i;
10986:
1.55 daniel 10987: for ( i = ctxt->node_seq.length; i > pos; i-- )
10988: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 10989: }
10990:
10991: /* Copy element and increase length */
1.55 daniel 10992: ctxt->node_seq.buffer[pos] = *info;
10993: ctxt->node_seq.length++;
1.32 daniel 10994: }
10995: }
1.77 daniel 10996:
1.98 daniel 10997:
10998: /**
1.181 daniel 10999: * xmlSubstituteEntitiesDefault:
1.98 daniel 11000: * @val: int 0 or 1
11001: *
11002: * Set and return the previous value for default entity support.
11003: * Initially the parser always keep entity references instead of substituting
11004: * entity values in the output. This function has to be used to change the
11005: * default parser behaviour
11006: * SAX::subtituteEntities() has to be used for changing that on a file by
11007: * file basis.
11008: *
11009: * Returns the last value for 0 for no substitution, 1 for substitution.
11010: */
11011:
11012: int
11013: xmlSubstituteEntitiesDefault(int val) {
11014: int old = xmlSubstituteEntitiesDefaultValue;
11015:
11016: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 11017: return(old);
11018: }
11019:
11020: /**
11021: * xmlKeepBlanksDefault:
11022: * @val: int 0 or 1
11023: *
11024: * Set and return the previous value for default blanks text nodes support.
11025: * The 1.x version of the parser used an heuristic to try to detect
11026: * ignorable white spaces. As a result the SAX callback was generating
11027: * ignorableWhitespace() callbacks instead of characters() one, and when
11028: * using the DOM output text nodes containing those blanks were not generated.
11029: * The 2.x and later version will switch to the XML standard way and
11030: * ignorableWhitespace() are only generated when running the parser in
11031: * validating mode and when the current element doesn't allow CDATA or
11032: * mixed content.
11033: * This function is provided as a way to force the standard behaviour
11034: * on 1.X libs and to switch back to the old mode for compatibility when
11035: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
11036: * by using xmlIsBlankNode() commodity function to detect the "empty"
11037: * nodes generated.
11038: * This value also affect autogeneration of indentation when saving code
11039: * if blanks sections are kept, indentation is not generated.
11040: *
11041: * Returns the last value for 0 for no substitution, 1 for substitution.
11042: */
11043:
11044: int
11045: xmlKeepBlanksDefault(int val) {
11046: int old = xmlKeepBlanksDefaultValue;
11047:
11048: xmlKeepBlanksDefaultValue = val;
11049: xmlIndentTreeOutput = !val;
1.98 daniel 11050: return(old);
11051: }
1.77 daniel 11052:
Webmaster