Annotation of XML/parser.c, revision 1.213
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
1.138 daniel 10: #include "win32config.h"
1.26 daniel 11: #else
1.121 daniel 12: #include "config.h"
1.26 daniel 13: #endif
1.121 daniel 14:
1.1 veillard 15: #include <stdio.h>
1.204 veillard 16: #include <string.h>
1.121 daniel 17: #ifdef HAVE_CTYPE_H
1.1 veillard 18: #include <ctype.h>
1.121 daniel 19: #endif
20: #ifdef HAVE_STDLIB_H
1.50 daniel 21: #include <stdlib.h>
1.121 daniel 22: #endif
23: #ifdef HAVE_SYS_STAT_H
1.9 httpng 24: #include <sys/stat.h>
1.121 daniel 25: #endif
1.9 httpng 26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
1.10 httpng 29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.20 daniel 32: #ifdef HAVE_ZLIB_H
33: #include <zlib.h>
34: #endif
1.1 veillard 35:
1.188 daniel 36: #include <libxml/xmlmemory.h>
37: #include <libxml/tree.h>
38: #include <libxml/parser.h>
39: #include <libxml/entities.h>
40: #include <libxml/encoding.h>
41: #include <libxml/valid.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/xmlIO.h>
1.193 daniel 44: #include <libxml/uri.h>
1.122 daniel 45: #include "xml-error.h"
1.1 veillard 46:
1.140 daniel 47: #define XML_PARSER_BIG_BUFFER_SIZE 1000
48: #define XML_PARSER_BUFFER_SIZE 100
49:
1.160 daniel 50: int xmlGetWarningsDefaultValue = 1;
1.86 daniel 51:
1.139 daniel 52: /*
53: * List of XML prefixed PI allowed by W3C specs
54: */
55:
56: const char *xmlW3CPIs[] = {
57: "xml-stylesheet",
58: NULL
59: };
1.91 daniel 60:
1.151 daniel 61: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
62: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
63: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
64: const xmlChar **str);
1.200 daniel 65:
66: /*
67: * Version handling
68: */
69: const char *xmlParserVersion = LIBXML_VERSION_STRING;
70:
71: /*
72: * xmlCheckVersion:
73: * @version: the include version number
74: *
75: * check the compiled lib version against the include one.
76: * This can warn or immediately kill the application
77: */
78: void
79: xmlCheckVersion(int version) {
1.202 daniel 80: int myversion = (int) LIBXML_VERSION;
1.200 daniel 81:
82: if ((myversion / 10000) != (version / 10000)) {
83: fprintf(stderr,
84: "Fatal: program compiled against libxml %d using libxml %d\n",
85: (version / 10000), (myversion / 10000));
86: exit(1);
87: }
88: if ((myversion / 100) < (version / 100)) {
89: fprintf(stderr,
90: "Warning: program compiled against libxml %d using older %d\n",
91: (version / 100), (myversion / 100));
92: }
93: }
94:
95:
1.91 daniel 96: /************************************************************************
97: * *
98: * Input handling functions for progressive parsing *
99: * *
100: ************************************************************************/
101:
102: /* #define DEBUG_INPUT */
1.140 daniel 103: /* #define DEBUG_STACK */
104: /* #define DEBUG_PUSH */
105:
1.91 daniel 106:
1.110 daniel 107: #define INPUT_CHUNK 250
108: /* we need to keep enough input to show errors in context */
109: #define LINE_LEN 80
1.91 daniel 110:
111: #ifdef DEBUG_INPUT
112: #define CHECK_BUFFER(in) check_buffer(in)
113:
114: void check_buffer(xmlParserInputPtr in) {
115: if (in->base != in->buf->buffer->content) {
116: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
117: }
118: if (in->cur < in->base) {
119: fprintf(stderr, "xmlParserInput: cur < base problem\n");
120: }
121: if (in->cur > in->base + in->buf->buffer->use) {
122: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
123: }
124: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
125: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
126: in->buf->buffer->use, in->buf->buffer->size);
127: }
128:
1.110 daniel 129: #else
130: #define CHECK_BUFFER(in)
131: #endif
132:
1.91 daniel 133:
134: /**
135: * xmlParserInputRead:
136: * @in: an XML parser input
137: * @len: an indicative size for the lookahead
138: *
139: * This function refresh the input for the parser. It doesn't try to
140: * preserve pointers to the input buffer, and discard already read data
141: *
1.123 daniel 142: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 143: * end of this entity
144: */
145: int
146: xmlParserInputRead(xmlParserInputPtr in, int len) {
147: int ret;
148: int used;
149: int index;
150:
151: #ifdef DEBUG_INPUT
152: fprintf(stderr, "Read\n");
153: #endif
154: if (in->buf == NULL) return(-1);
155: if (in->base == NULL) return(-1);
156: if (in->cur == NULL) return(-1);
157: if (in->buf->buffer == NULL) return(-1);
158:
159: CHECK_BUFFER(in);
160:
161: used = in->cur - in->buf->buffer->content;
162: ret = xmlBufferShrink(in->buf->buffer, used);
163: if (ret > 0) {
164: in->cur -= ret;
165: in->consumed += ret;
166: }
167: ret = xmlParserInputBufferRead(in->buf, len);
168: if (in->base != in->buf->buffer->content) {
169: /*
170: * the buffer has been realloced
171: */
172: index = in->cur - in->base;
173: in->base = in->buf->buffer->content;
174: in->cur = &in->buf->buffer->content[index];
175: }
176:
177: CHECK_BUFFER(in);
178:
179: return(ret);
180: }
181:
182: /**
183: * xmlParserInputGrow:
184: * @in: an XML parser input
185: * @len: an indicative size for the lookahead
186: *
187: * This function increase the input for the parser. It tries to
188: * preserve pointers to the input buffer, and keep already read data
189: *
1.123 daniel 190: * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
1.91 daniel 191: * end of this entity
192: */
193: int
194: xmlParserInputGrow(xmlParserInputPtr in, int len) {
195: int ret;
196: int index;
197:
198: #ifdef DEBUG_INPUT
199: fprintf(stderr, "Grow\n");
200: #endif
201: if (in->buf == NULL) return(-1);
202: if (in->base == NULL) return(-1);
203: if (in->cur == NULL) return(-1);
204: if (in->buf->buffer == NULL) return(-1);
205:
206: CHECK_BUFFER(in);
207:
208: index = in->cur - in->base;
1.202 daniel 209: if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
1.91 daniel 210:
211: CHECK_BUFFER(in);
212:
213: return(0);
214: }
1.189 daniel 215: if (in->buf->readcallback != NULL)
1.140 daniel 216: ret = xmlParserInputBufferGrow(in->buf, len);
217: else
218: return(0);
1.135 daniel 219:
220: /*
221: * NOTE : in->base may be a "dandling" i.e. freed pointer in this
222: * block, but we use it really as an integer to do some
223: * pointer arithmetic. Insure will raise it as a bug but in
224: * that specific case, that's not !
225: */
1.91 daniel 226: if (in->base != in->buf->buffer->content) {
227: /*
228: * the buffer has been realloced
229: */
230: index = in->cur - in->base;
231: in->base = in->buf->buffer->content;
232: in->cur = &in->buf->buffer->content[index];
233: }
234:
235: CHECK_BUFFER(in);
236:
237: return(ret);
238: }
239:
240: /**
241: * xmlParserInputShrink:
242: * @in: an XML parser input
243: *
244: * This function removes used input for the parser.
245: */
246: void
247: xmlParserInputShrink(xmlParserInputPtr in) {
248: int used;
249: int ret;
250: int index;
251:
252: #ifdef DEBUG_INPUT
253: fprintf(stderr, "Shrink\n");
254: #endif
255: if (in->buf == NULL) return;
256: if (in->base == NULL) return;
257: if (in->cur == NULL) return;
258: if (in->buf->buffer == NULL) return;
259:
260: CHECK_BUFFER(in);
261:
262: used = in->cur - in->buf->buffer->content;
263: if (used > INPUT_CHUNK) {
1.110 daniel 264: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 265: if (ret > 0) {
266: in->cur -= ret;
267: in->consumed += ret;
268: }
269: }
270:
271: CHECK_BUFFER(in);
272:
273: if (in->buf->buffer->use > INPUT_CHUNK) {
274: return;
275: }
276: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
277: if (in->base != in->buf->buffer->content) {
278: /*
279: * the buffer has been realloced
280: */
281: index = in->cur - in->base;
282: in->base = in->buf->buffer->content;
283: in->cur = &in->buf->buffer->content[index];
284: }
285:
286: CHECK_BUFFER(in);
287: }
288:
1.45 daniel 289: /************************************************************************
290: * *
291: * Parser stacks related functions and macros *
292: * *
293: ************************************************************************/
1.79 daniel 294:
295: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 296: int xmlDoValidityCheckingDefaultValue = 0;
1.180 daniel 297: int xmlKeepBlanksDefaultValue = 1;
1.135 daniel 298: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
299: const xmlChar ** str);
1.79 daniel 300:
1.1 veillard 301: /*
1.40 daniel 302: * Generic function for accessing stacks in the Parser Context
1.1 veillard 303: */
304:
1.140 daniel 305: #define PUSH_AND_POP(scope, type, name) \
306: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 307: if (ctxt->name##Nr >= ctxt->name##Max) { \
308: ctxt->name##Max *= 2; \
1.204 veillard 309: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 310: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
311: if (ctxt->name##Tab == NULL) { \
1.31 daniel 312: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 313: return(0); \
1.31 daniel 314: } \
315: } \
1.40 daniel 316: ctxt->name##Tab[ctxt->name##Nr] = value; \
317: ctxt->name = value; \
318: return(ctxt->name##Nr++); \
1.31 daniel 319: } \
1.140 daniel 320: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 321: type ret; \
1.40 daniel 322: if (ctxt->name##Nr <= 0) return(0); \
323: ctxt->name##Nr--; \
1.50 daniel 324: if (ctxt->name##Nr > 0) \
325: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
326: else \
327: ctxt->name = NULL; \
1.69 daniel 328: ret = ctxt->name##Tab[ctxt->name##Nr]; \
329: ctxt->name##Tab[ctxt->name##Nr] = 0; \
330: return(ret); \
1.31 daniel 331: } \
332:
1.140 daniel 333: PUSH_AND_POP(extern, xmlParserInputPtr, input)
334: PUSH_AND_POP(extern, xmlNodePtr, node)
335: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 336:
1.176 daniel 337: int spacePush(xmlParserCtxtPtr ctxt, int val) {
338: if (ctxt->spaceNr >= ctxt->spaceMax) {
339: ctxt->spaceMax *= 2;
1.204 veillard 340: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 341: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
342: if (ctxt->spaceTab == NULL) {
343: fprintf(stderr, "realloc failed !\n");
344: return(0);
345: }
346: }
347: ctxt->spaceTab[ctxt->spaceNr] = val;
348: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
349: return(ctxt->spaceNr++);
350: }
351:
352: int spacePop(xmlParserCtxtPtr ctxt) {
353: int ret;
354: if (ctxt->spaceNr <= 0) return(0);
355: ctxt->spaceNr--;
356: if (ctxt->spaceNr > 0)
357: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
358: else
359: ctxt->space = NULL;
360: ret = ctxt->spaceTab[ctxt->spaceNr];
361: ctxt->spaceTab[ctxt->spaceNr] = -1;
362: return(ret);
363: }
364:
1.55 daniel 365: /*
366: * Macros for accessing the content. Those should be used only by the parser,
367: * and not exported.
368: *
369: * Dirty macros, i.e. one need to make assumption on the context to use them
370: *
1.123 daniel 371: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 372: * To be used with extreme caution since operations consuming
373: * characters may move the input buffer to a different location !
1.123 daniel 374: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.152 daniel 375: * in ISO-Latin or UTF-8.
1.151 daniel 376: * This should be used internally by the parser
1.55 daniel 377: * only to compare to ASCII values otherwise it would break when
378: * running with UTF-8 encoding.
1.123 daniel 379: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 380: * to compare on ASCII based substring.
1.123 daniel 381: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 382: * strings within the parser.
383: *
1.77 daniel 384: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 385: *
386: * NEXT Skip to the next character, this does the proper decoding
387: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 388: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.155 daniel 389: * CUR_CHAR Return the current char as an int as well as its lenght.
1.55 daniel 390: */
1.45 daniel 391:
1.152 daniel 392: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 393: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 394: #define NXT(val) ctxt->input->cur[(val)]
395: #define CUR_PTR ctxt->input->cur
1.154 daniel 396:
1.164 daniel 397: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
398: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.168 daniel 399: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
400: if ((*ctxt->input->cur == 0) && \
401: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
402: xmlPopInput(ctxt)
1.164 daniel 403:
1.97 daniel 404: #define SHRINK xmlParserInputShrink(ctxt->input); \
405: if ((*ctxt->input->cur == 0) && \
406: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
407: xmlPopInput(ctxt)
408:
409: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
410: if ((*ctxt->input->cur == 0) && \
411: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
412: xmlPopInput(ctxt)
1.55 daniel 413:
1.155 daniel 414: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 415:
1.151 daniel 416: #define NEXT xmlNextChar(ctxt);
1.154 daniel 417:
1.153 daniel 418: #define NEXTL(l) \
419: if (*(ctxt->input->cur) == '\n') { \
420: ctxt->input->line++; ctxt->input->col = 1; \
421: } else ctxt->input->col++; \
1.154 daniel 422: ctxt->token = 0; ctxt->input->cur += l; \
423: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
424: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
425:
1.152 daniel 426: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 427: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 428:
1.152 daniel 429: #define COPY_BUF(l,b,i,v) \
430: if (l == 1) b[i++] = (xmlChar) v; \
431: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 432:
433: /**
434: * xmlNextChar:
435: * @ctxt: the XML parser context
436: *
437: * Skip to the next char input char.
438: */
1.55 daniel 439:
1.151 daniel 440: void
441: xmlNextChar(xmlParserCtxtPtr ctxt) {
1.201 daniel 442: if (ctxt->instate == XML_PARSER_EOF)
443: return;
444:
1.176 daniel 445: /*
446: * TODO: 2.11 End-of-Line Handling
447: * the literal two-character sequence "#xD#xA" or a standalone
448: * literal #xD, an XML processor must pass to the application
449: * the single character #xA.
450: */
1.151 daniel 451: if (ctxt->token != 0) ctxt->token = 0;
1.208 veillard 452: else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 453: if ((*ctxt->input->cur == 0) &&
454: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
455: (ctxt->instate != XML_PARSER_COMMENT)) {
456: /*
457: * If we are at the end of the current entity and
458: * the context allows it, we pop consumed entities
459: * automatically.
460: * TODO: the auto closing should be blocked in other cases
461: */
462: xmlPopInput(ctxt);
463: } else {
464: if (*(ctxt->input->cur) == '\n') {
465: ctxt->input->line++; ctxt->input->col = 1;
466: } else ctxt->input->col++;
1.198 daniel 467: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.151 daniel 468: /*
469: * We are supposed to handle UTF8, check it's valid
470: * From rfc2044: encoding of the Unicode values on UTF-8:
471: *
472: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
473: * 0000 0000-0000 007F 0xxxxxxx
474: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
475: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
476: *
1.160 daniel 477: * Check for the 0x110000 limit too
1.151 daniel 478: */
479: const unsigned char *cur = ctxt->input->cur;
480: unsigned char c;
1.91 daniel 481:
1.151 daniel 482: c = *cur;
483: if (c & 0x80) {
484: if (cur[1] == 0)
485: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486: if ((cur[1] & 0xc0) != 0x80)
487: goto encoding_error;
488: if ((c & 0xe0) == 0xe0) {
489: unsigned int val;
490:
491: if (cur[2] == 0)
492: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
493: if ((cur[2] & 0xc0) != 0x80)
494: goto encoding_error;
495: if ((c & 0xf0) == 0xf0) {
496: if (cur[3] == 0)
497: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
498: if (((c & 0xf8) != 0xf0) ||
499: ((cur[3] & 0xc0) != 0x80))
500: goto encoding_error;
501: /* 4-byte code */
502: ctxt->input->cur += 4;
503: val = (cur[0] & 0x7) << 18;
504: val |= (cur[1] & 0x3f) << 12;
505: val |= (cur[2] & 0x3f) << 6;
506: val |= cur[3] & 0x3f;
507: } else {
508: /* 3-byte code */
509: ctxt->input->cur += 3;
510: val = (cur[0] & 0xf) << 12;
511: val |= (cur[1] & 0x3f) << 6;
512: val |= cur[2] & 0x3f;
513: }
514: if (((val > 0xd7ff) && (val < 0xe000)) ||
515: ((val > 0xfffd) && (val < 0x10000)) ||
1.160 daniel 516: (val >= 0x110000)) {
1.151 daniel 517: if ((ctxt->sax != NULL) &&
518: (ctxt->sax->error != NULL))
519: ctxt->sax->error(ctxt->userData,
1.196 daniel 520: "Char 0x%X out of allowed range\n", val);
1.151 daniel 521: ctxt->errNo = XML_ERR_INVALID_ENCODING;
522: ctxt->wellFormed = 0;
1.180 daniel 523: ctxt->disableSAX = 1;
1.151 daniel 524: }
525: } else
526: /* 2-byte code */
527: ctxt->input->cur += 2;
528: } else
529: /* 1-byte code */
530: ctxt->input->cur++;
531: } else {
532: /*
533: * Assume it's a fixed lenght encoding (1) with
534: * a compatibke encoding for the ASCII set, since
535: * XML constructs only use < 128 chars
536: */
537: ctxt->input->cur++;
538: }
539: ctxt->nbChars++;
540: if (*ctxt->input->cur == 0)
541: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
542: }
1.208 veillard 543: } else {
544: ctxt->input->cur++;
545: ctxt->nbChars++;
546: if (*ctxt->input->cur == 0)
547: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1.151 daniel 548: }
1.207 veillard 549: if ((*ctxt->input->cur == '%') && (!ctxt->html))
550: xmlParserHandlePEReference(ctxt);
551: if ((*ctxt->input->cur == '&')&& (!ctxt->html))
552: xmlParserHandleReference(ctxt);
1.168 daniel 553: if ((*ctxt->input->cur == 0) &&
554: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
555: xmlPopInput(ctxt);
1.151 daniel 556: return;
557: encoding_error:
558: /*
559: * If we detect an UTF8 error that probably mean that the
560: * input encoding didn't get properly advertized in the
561: * declaration header. Report the error and switch the encoding
562: * to ISO-Latin-1 (if you don't like this policy, just declare the
563: * encoding !)
564: */
1.198 daniel 565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.151 daniel 566: ctxt->sax->error(ctxt->userData,
567: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 568: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
569: ctxt->input->cur[0], ctxt->input->cur[1],
570: ctxt->input->cur[2], ctxt->input->cur[3]);
571: }
1.151 daniel 572: ctxt->errNo = XML_ERR_INVALID_ENCODING;
573:
1.198 daniel 574: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.151 daniel 575: ctxt->input->cur++;
576: return;
577: }
1.42 daniel 578:
1.152 daniel 579: /**
580: * xmlCurrentChar:
581: * @ctxt: the XML parser context
582: * @len: pointer to the length of the char read
583: *
584: * The current char value, if using UTF-8 this may actaully span multiple
1.180 daniel 585: * bytes in the input buffer. Implement the end of line normalization:
586: * 2.11 End-of-Line Handling
587: * Wherever an external parsed entity or the literal entity value
588: * of an internal parsed entity contains either the literal two-character
589: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
590: * must pass to the application the single character #xA.
591: * This behavior can conveniently be produced by normalizing all
592: * line breaks to #xA on input, before parsing.)
1.152 daniel 593: *
594: * Returns the current char value and its lenght
595: */
596:
597: int
598: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
1.201 daniel 599: if (ctxt->instate == XML_PARSER_EOF)
600: return(0);
601:
1.152 daniel 602: if (ctxt->token != 0) {
603: *len = 0;
604: return(ctxt->token);
605: }
1.198 daniel 606: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.152 daniel 607: /*
608: * We are supposed to handle UTF8, check it's valid
609: * From rfc2044: encoding of the Unicode values on UTF-8:
610: *
611: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
612: * 0000 0000-0000 007F 0xxxxxxx
613: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
614: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
615: *
1.160 daniel 616: * Check for the 0x110000 limit too
1.152 daniel 617: */
618: const unsigned char *cur = ctxt->input->cur;
619: unsigned char c;
620: unsigned int val;
621:
622: c = *cur;
623: if (c & 0x80) {
624: if (cur[1] == 0)
625: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
626: if ((cur[1] & 0xc0) != 0x80)
627: goto encoding_error;
628: if ((c & 0xe0) == 0xe0) {
629:
630: if (cur[2] == 0)
631: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
632: if ((cur[2] & 0xc0) != 0x80)
633: goto encoding_error;
634: if ((c & 0xf0) == 0xf0) {
635: if (cur[3] == 0)
636: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
637: if (((c & 0xf8) != 0xf0) ||
638: ((cur[3] & 0xc0) != 0x80))
639: goto encoding_error;
640: /* 4-byte code */
641: *len = 4;
642: val = (cur[0] & 0x7) << 18;
643: val |= (cur[1] & 0x3f) << 12;
644: val |= (cur[2] & 0x3f) << 6;
645: val |= cur[3] & 0x3f;
646: } else {
647: /* 3-byte code */
648: *len = 3;
649: val = (cur[0] & 0xf) << 12;
650: val |= (cur[1] & 0x3f) << 6;
651: val |= cur[2] & 0x3f;
652: }
653: } else {
654: /* 2-byte code */
655: *len = 2;
656: val = (cur[0] & 0x1f) << 6;
1.168 daniel 657: val |= cur[1] & 0x3f;
1.152 daniel 658: }
659: if (!IS_CHAR(val)) {
660: if ((ctxt->sax != NULL) &&
661: (ctxt->sax->error != NULL))
662: ctxt->sax->error(ctxt->userData,
1.196 daniel 663: "Char 0x%X out of allowed range\n", val);
1.152 daniel 664: ctxt->errNo = XML_ERR_INVALID_ENCODING;
665: ctxt->wellFormed = 0;
1.180 daniel 666: ctxt->disableSAX = 1;
1.152 daniel 667: }
668: return(val);
669: } else {
670: /* 1-byte code */
671: *len = 1;
1.180 daniel 672: if (*ctxt->input->cur == 0xD) {
673: if (ctxt->input->cur[1] == 0xA) {
674: ctxt->nbChars++;
675: ctxt->input->cur++;
676: }
677: return(0xA);
678: }
1.152 daniel 679: return((int) *ctxt->input->cur);
680: }
681: }
682: /*
683: * Assume it's a fixed lenght encoding (1) with
684: * a compatibke encoding for the ASCII set, since
685: * XML constructs only use < 128 chars
686: */
687: *len = 1;
1.180 daniel 688: if (*ctxt->input->cur == 0xD) {
689: if (ctxt->input->cur[1] == 0xA) {
690: ctxt->nbChars++;
691: ctxt->input->cur++;
692: }
693: return(0xA);
694: }
1.152 daniel 695: return((int) *ctxt->input->cur);
696: encoding_error:
697: /*
698: * If we detect an UTF8 error that probably mean that the
699: * input encoding didn't get properly advertized in the
700: * declaration header. Report the error and switch the encoding
701: * to ISO-Latin-1 (if you don't like this policy, just declare the
702: * encoding !)
703: */
1.198 daniel 704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.152 daniel 705: ctxt->sax->error(ctxt->userData,
706: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 707: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
708: ctxt->input->cur[0], ctxt->input->cur[1],
709: ctxt->input->cur[2], ctxt->input->cur[3]);
710: }
1.152 daniel 711: ctxt->errNo = XML_ERR_INVALID_ENCODING;
712:
1.198 daniel 713: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.152 daniel 714: *len = 1;
715: return((int) *ctxt->input->cur);
716: }
717:
718: /**
1.162 daniel 719: * xmlStringCurrentChar:
720: * @ctxt: the XML parser context
721: * @cur: pointer to the beginning of the char
722: * @len: pointer to the length of the char read
723: *
724: * The current char value, if using UTF-8 this may actaully span multiple
725: * bytes in the input buffer.
726: *
727: * Returns the current char value and its lenght
728: */
729:
730: int
731: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
1.198 daniel 732: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
1.162 daniel 733: /*
734: * We are supposed to handle UTF8, check it's valid
735: * From rfc2044: encoding of the Unicode values on UTF-8:
736: *
737: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
738: * 0000 0000-0000 007F 0xxxxxxx
739: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
740: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
741: *
742: * Check for the 0x110000 limit too
743: */
744: unsigned char c;
745: unsigned int val;
746:
747: c = *cur;
748: if (c & 0x80) {
749: if ((cur[1] & 0xc0) != 0x80)
750: goto encoding_error;
751: if ((c & 0xe0) == 0xe0) {
752:
753: if ((cur[2] & 0xc0) != 0x80)
754: goto encoding_error;
755: if ((c & 0xf0) == 0xf0) {
756: if (((c & 0xf8) != 0xf0) ||
757: ((cur[3] & 0xc0) != 0x80))
758: goto encoding_error;
759: /* 4-byte code */
760: *len = 4;
761: val = (cur[0] & 0x7) << 18;
762: val |= (cur[1] & 0x3f) << 12;
763: val |= (cur[2] & 0x3f) << 6;
764: val |= cur[3] & 0x3f;
765: } else {
766: /* 3-byte code */
767: *len = 3;
768: val = (cur[0] & 0xf) << 12;
769: val |= (cur[1] & 0x3f) << 6;
770: val |= cur[2] & 0x3f;
771: }
772: } else {
773: /* 2-byte code */
774: *len = 2;
775: val = (cur[0] & 0x1f) << 6;
776: val |= cur[2] & 0x3f;
777: }
778: if (!IS_CHAR(val)) {
779: if ((ctxt->sax != NULL) &&
780: (ctxt->sax->error != NULL))
781: ctxt->sax->error(ctxt->userData,
1.196 daniel 782: "Char 0x%X out of allowed range\n", val);
1.162 daniel 783: ctxt->errNo = XML_ERR_INVALID_ENCODING;
784: ctxt->wellFormed = 0;
1.180 daniel 785: ctxt->disableSAX = 1;
1.162 daniel 786: }
787: return(val);
788: } else {
789: /* 1-byte code */
790: *len = 1;
791: return((int) *cur);
792: }
793: }
794: /*
795: * Assume it's a fixed lenght encoding (1) with
796: * a compatibke encoding for the ASCII set, since
797: * XML constructs only use < 128 chars
798: */
799: *len = 1;
800: return((int) *cur);
801: encoding_error:
802: /*
803: * If we detect an UTF8 error that probably mean that the
804: * input encoding didn't get properly advertized in the
805: * declaration header. Report the error and switch the encoding
806: * to ISO-Latin-1 (if you don't like this policy, just declare the
807: * encoding !)
808: */
1.198 daniel 809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
1.162 daniel 810: ctxt->sax->error(ctxt->userData,
811: "Input is not proper UTF-8, indicate encoding !\n");
1.198 daniel 812: ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
813: ctxt->input->cur[0], ctxt->input->cur[1],
814: ctxt->input->cur[2], ctxt->input->cur[3]);
815: }
1.162 daniel 816: ctxt->errNo = XML_ERR_INVALID_ENCODING;
817:
818: *len = 1;
819: return((int) *cur);
820: }
821:
822: /**
1.152 daniel 823: * xmlCopyChar:
824: * @len: pointer to the length of the char read (or zero)
825: * @array: pointer to an arry of xmlChar
826: * @val: the char value
827: *
828: * append the char value in the array
829: *
830: * Returns the number of xmlChar written
831: */
832:
833: int
834: xmlCopyChar(int len, xmlChar *out, int val) {
835: /*
836: * We are supposed to handle UTF8, check it's valid
837: * From rfc2044: encoding of the Unicode values on UTF-8:
838: *
839: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
840: * 0000 0000-0000 007F 0xxxxxxx
841: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
842: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
843: */
844: if (len == 0) {
845: if (val < 0) len = 0;
1.160 daniel 846: else if (val < 0x80) len = 1;
847: else if (val < 0x800) len = 2;
848: else if (val < 0x10000) len = 3;
849: else if (val < 0x110000) len = 4;
1.152 daniel 850: if (len == 0) {
851: fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
852: val);
853: return(0);
854: }
855: }
856: if (len > 1) {
857: int bits;
858:
859: if (val < 0x80) { *out++= val; bits= -6; }
860: else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
861: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
862: else { *out++= (val >> 18) | 0xF0; bits= 12; }
863:
864: for ( ; bits >= 0; bits-= 6)
865: *out++= ((val >> bits) & 0x3F) | 0x80 ;
866:
867: return(len);
868: }
869: *out = (xmlChar) val;
870: return(1);
1.155 daniel 871: }
872:
873: /**
874: * xmlSkipBlankChars:
875: * @ctxt: the XML parser context
876: *
877: * skip all blanks character found at that point in the input streams.
878: * It pops up finished entities in the process if allowable at that point.
879: *
880: * Returns the number of space chars skipped
881: */
882:
883: int
884: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
885: int cur, res = 0;
886:
887: do {
888: cur = CUR;
889: while (IS_BLANK(cur)) {
890: NEXT;
891: cur = CUR;
892: res++;
893: }
894: while ((cur == 0) && (ctxt->inputNr > 1) &&
895: (ctxt->instate != XML_PARSER_COMMENT)) {
896: xmlPopInput(ctxt);
897: cur = CUR;
898: }
899: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
900: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
901: } while (IS_BLANK(cur));
902: return(res);
1.152 daniel 903: }
904:
1.97 daniel 905: /************************************************************************
906: * *
907: * Commodity functions to handle entities processing *
908: * *
909: ************************************************************************/
1.40 daniel 910:
1.50 daniel 911: /**
912: * xmlPopInput:
913: * @ctxt: an XML parser context
914: *
1.40 daniel 915: * xmlPopInput: the current input pointed by ctxt->input came to an end
916: * pop it and return the next char.
1.45 daniel 917: *
1.123 daniel 918: * Returns the current xmlChar in the parser context
1.40 daniel 919: */
1.123 daniel 920: xmlChar
1.55 daniel 921: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 922: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 923: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 924: if ((*ctxt->input->cur == 0) &&
925: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
926: return(xmlPopInput(ctxt));
1.40 daniel 927: return(CUR);
928: }
929:
1.50 daniel 930: /**
931: * xmlPushInput:
932: * @ctxt: an XML parser context
933: * @input: an XML parser input fragment (entity, XML fragment ...).
934: *
1.40 daniel 935: * xmlPushInput: switch to a new input stream which is stacked on top
936: * of the previous one(s).
937: */
1.55 daniel 938: void
939: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 940: if (input == NULL) return;
941: inputPush(ctxt, input);
1.164 daniel 942: GROW;
1.40 daniel 943: }
944:
1.50 daniel 945: /**
1.69 daniel 946: * xmlFreeInputStream:
1.127 daniel 947: * @input: an xmlParserInputPtr
1.69 daniel 948: *
949: * Free up an input stream.
950: */
951: void
952: xmlFreeInputStream(xmlParserInputPtr input) {
953: if (input == NULL) return;
954:
1.119 daniel 955: if (input->filename != NULL) xmlFree((char *) input->filename);
956: if (input->directory != NULL) xmlFree((char *) input->directory);
1.164 daniel 957: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1.165 daniel 958: if (input->version != NULL) xmlFree((char *) input->version);
1.69 daniel 959: if ((input->free != NULL) && (input->base != NULL))
1.123 daniel 960: input->free((xmlChar *) input->base);
1.93 veillard 961: if (input->buf != NULL)
962: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 963: memset(input, -1, sizeof(xmlParserInput));
1.119 daniel 964: xmlFree(input);
1.69 daniel 965: }
966:
967: /**
1.96 daniel 968: * xmlNewInputStream:
969: * @ctxt: an XML parser context
970: *
971: * Create a new input stream structure
972: * Returns the new input stream or NULL
973: */
974: xmlParserInputPtr
975: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
976: xmlParserInputPtr input;
977:
1.119 daniel 978: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1.96 daniel 979: if (input == NULL) {
1.190 daniel 980: if (ctxt != NULL) {
981: ctxt->errNo = XML_ERR_NO_MEMORY;
982: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
983: ctxt->sax->error(ctxt->userData,
984: "malloc: couldn't allocate a new input stream\n");
985: ctxt->errNo = XML_ERR_NO_MEMORY;
986: }
1.96 daniel 987: return(NULL);
988: }
1.165 daniel 989: memset(input, 0, sizeof(xmlParserInput));
1.96 daniel 990: input->line = 1;
991: input->col = 1;
1.167 daniel 992: input->standalone = -1;
1.96 daniel 993: return(input);
994: }
995:
996: /**
1.190 daniel 997: * xmlNewIOInputStream:
998: * @ctxt: an XML parser context
999: * @input: an I/O Input
1000: * @enc: the charset encoding if known
1001: *
1002: * Create a new input stream structure encapsulating the @input into
1003: * a stream suitable for the parser.
1004: *
1005: * Returns the new input stream or NULL
1006: */
1007: xmlParserInputPtr
1008: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1009: xmlCharEncoding enc) {
1010: xmlParserInputPtr inputStream;
1011:
1012: inputStream = xmlNewInputStream(ctxt);
1013: if (inputStream == NULL) {
1014: return(NULL);
1015: }
1016: inputStream->filename = NULL;
1017: inputStream->buf = input;
1018: inputStream->base = inputStream->buf->buffer->content;
1019: inputStream->cur = inputStream->buf->buffer->content;
1020: if (enc != XML_CHAR_ENCODING_NONE) {
1021: xmlSwitchEncoding(ctxt, enc);
1022: }
1023:
1024: return(inputStream);
1025: }
1026:
1027: /**
1.50 daniel 1028: * xmlNewEntityInputStream:
1029: * @ctxt: an XML parser context
1030: * @entity: an Entity pointer
1031: *
1.82 daniel 1032: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 1033: *
1034: * Returns the new input stream or NULL
1.45 daniel 1035: */
1.50 daniel 1036: xmlParserInputPtr
1037: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1038: xmlParserInputPtr input;
1039:
1040: if (entity == NULL) {
1.123 daniel 1041: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 1042: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1043: ctxt->sax->error(ctxt->userData,
1.45 daniel 1044: "internal: xmlNewEntityInputStream entity = NULL\n");
1.123 daniel 1045: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.50 daniel 1046: return(NULL);
1.45 daniel 1047: }
1048: if (entity->content == NULL) {
1.159 daniel 1049: switch (entity->etype) {
1.113 daniel 1050: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1.123 daniel 1051: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.113 daniel 1052: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1053: ctxt->sax->error(ctxt->userData,
1054: "xmlNewEntityInputStream unparsed entity !\n");
1055: break;
1056: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1057: case XML_EXTERNAL_PARAMETER_ENTITY:
1.116 daniel 1058: return(xmlLoadExternalEntity((char *) entity->SystemID,
1.142 daniel 1059: (char *) entity->ExternalID, ctxt));
1.113 daniel 1060: case XML_INTERNAL_GENERAL_ENTITY:
1061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1062: ctxt->sax->error(ctxt->userData,
1063: "Internal entity %s without content !\n", entity->name);
1064: break;
1065: case XML_INTERNAL_PARAMETER_ENTITY:
1.123 daniel 1066: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1067: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1068: ctxt->sax->error(ctxt->userData,
1069: "Internal parameter entity %s without content !\n", entity->name);
1070: break;
1071: case XML_INTERNAL_PREDEFINED_ENTITY:
1.123 daniel 1072: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.113 daniel 1073: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1074: ctxt->sax->error(ctxt->userData,
1075: "Predefined entity %s without content !\n", entity->name);
1076: break;
1077: }
1.50 daniel 1078: return(NULL);
1.45 daniel 1079: }
1.96 daniel 1080: input = xmlNewInputStream(ctxt);
1.45 daniel 1081: if (input == NULL) {
1.50 daniel 1082: return(NULL);
1.45 daniel 1083: }
1.156 daniel 1084: input->filename = (char *) entity->SystemID;
1.45 daniel 1085: input->base = entity->content;
1086: input->cur = entity->content;
1.140 daniel 1087: input->length = entity->length;
1.50 daniel 1088: return(input);
1.45 daniel 1089: }
1090:
1.59 daniel 1091: /**
1092: * xmlNewStringInputStream:
1093: * @ctxt: an XML parser context
1.96 daniel 1094: * @buffer: an memory buffer
1.59 daniel 1095: *
1096: * Create a new input stream based on a memory buffer.
1.68 daniel 1097: * Returns the new input stream
1.59 daniel 1098: */
1099: xmlParserInputPtr
1.123 daniel 1100: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1.59 daniel 1101: xmlParserInputPtr input;
1102:
1.96 daniel 1103: if (buffer == NULL) {
1.123 daniel 1104: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 1105: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1106: ctxt->sax->error(ctxt->userData,
1.59 daniel 1107: "internal: xmlNewStringInputStream string = NULL\n");
1108: return(NULL);
1109: }
1.96 daniel 1110: input = xmlNewInputStream(ctxt);
1.59 daniel 1111: if (input == NULL) {
1112: return(NULL);
1113: }
1.96 daniel 1114: input->base = buffer;
1115: input->cur = buffer;
1.140 daniel 1116: input->length = xmlStrlen(buffer);
1.59 daniel 1117: return(input);
1118: }
1119:
1.76 daniel 1120: /**
1121: * xmlNewInputFromFile:
1122: * @ctxt: an XML parser context
1123: * @filename: the filename to use as entity
1124: *
1125: * Create a new input stream based on a file.
1126: *
1127: * Returns the new input stream or NULL in case of error
1128: */
1129: xmlParserInputPtr
1.79 daniel 1130: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 1131: xmlParserInputBufferPtr buf;
1.76 daniel 1132: xmlParserInputPtr inputStream;
1.111 daniel 1133: char *directory = NULL;
1.76 daniel 1134:
1.96 daniel 1135: if (ctxt == NULL) return(NULL);
1.91 daniel 1136: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 1137: if (buf == NULL) {
1.140 daniel 1138: char name[XML_PARSER_BIG_BUFFER_SIZE];
1.106 daniel 1139:
1.94 daniel 1140: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1141: #ifdef WIN32
1142: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1143: #else
1144: sprintf(name, "%s/%s", ctxt->input->directory, filename);
1145: #endif
1146: buf = xmlParserInputBufferCreateFilename(name,
1147: XML_CHAR_ENCODING_NONE);
1.106 daniel 1148: if (buf != NULL)
1.142 daniel 1149: directory = xmlParserGetDirectory(name);
1.106 daniel 1150: }
1151: if ((buf == NULL) && (ctxt->directory != NULL)) {
1152: #ifdef WIN32
1153: sprintf(name, "%s\\%s", ctxt->directory, filename);
1154: #else
1155: sprintf(name, "%s/%s", ctxt->directory, filename);
1156: #endif
1157: buf = xmlParserInputBufferCreateFilename(name,
1158: XML_CHAR_ENCODING_NONE);
1159: if (buf != NULL)
1.142 daniel 1160: directory = xmlParserGetDirectory(name);
1.106 daniel 1161: }
1162: if (buf == NULL)
1.94 daniel 1163: return(NULL);
1164: }
1165: if (directory == NULL)
1166: directory = xmlParserGetDirectory(filename);
1.76 daniel 1167:
1.96 daniel 1168: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 1169: if (inputStream == NULL) {
1.119 daniel 1170: if (directory != NULL) xmlFree((char *) directory);
1.76 daniel 1171: return(NULL);
1172: }
1173:
1.119 daniel 1174: inputStream->filename = xmlMemStrdup(filename);
1.94 daniel 1175: inputStream->directory = directory;
1.91 daniel 1176: inputStream->buf = buf;
1.76 daniel 1177:
1.91 daniel 1178: inputStream->base = inputStream->buf->buffer->content;
1179: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 1180: if ((ctxt->directory == NULL) && (directory != NULL))
1.134 daniel 1181: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1.76 daniel 1182: return(inputStream);
1183: }
1184:
1.77 daniel 1185: /************************************************************************
1186: * *
1.97 daniel 1187: * Commodity functions to handle parser contexts *
1188: * *
1189: ************************************************************************/
1190:
1191: /**
1192: * xmlInitParserCtxt:
1193: * @ctxt: an XML parser context
1194: *
1195: * Initialize a parser context
1196: */
1197:
1198: void
1199: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1200: {
1201: xmlSAXHandler *sax;
1202:
1.168 daniel 1203: xmlDefaultSAXHandlerInit();
1204:
1.119 daniel 1205: sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1.97 daniel 1206: if (sax == NULL) {
1207: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1208: }
1.180 daniel 1209: memset(sax, 0, sizeof(xmlSAXHandler));
1.97 daniel 1210:
1211: /* Allocate the Input stack */
1.119 daniel 1212: ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
1.97 daniel 1213: ctxt->inputNr = 0;
1214: ctxt->inputMax = 5;
1215: ctxt->input = NULL;
1.165 daniel 1216:
1.97 daniel 1217: ctxt->version = NULL;
1218: ctxt->encoding = NULL;
1219: ctxt->standalone = -1;
1.98 daniel 1220: ctxt->hasExternalSubset = 0;
1221: ctxt->hasPErefs = 0;
1.97 daniel 1222: ctxt->html = 0;
1.98 daniel 1223: ctxt->external = 0;
1.140 daniel 1224: ctxt->instate = XML_PARSER_START;
1.97 daniel 1225: ctxt->token = 0;
1.106 daniel 1226: ctxt->directory = NULL;
1.97 daniel 1227:
1228: /* Allocate the Node stack */
1.119 daniel 1229: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1.97 daniel 1230: ctxt->nodeNr = 0;
1231: ctxt->nodeMax = 10;
1232: ctxt->node = NULL;
1233:
1.140 daniel 1234: /* Allocate the Name stack */
1235: ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1236: ctxt->nameNr = 0;
1237: ctxt->nameMax = 10;
1238: ctxt->name = NULL;
1239:
1.176 daniel 1240: /* Allocate the space stack */
1241: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1242: ctxt->spaceNr = 1;
1243: ctxt->spaceMax = 10;
1244: ctxt->spaceTab[0] = -1;
1245: ctxt->space = &ctxt->spaceTab[0];
1246:
1.160 daniel 1247: if (sax == NULL) {
1248: ctxt->sax = &xmlDefaultSAXHandler;
1249: } else {
1.97 daniel 1250: ctxt->sax = sax;
1251: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1252: }
1253: ctxt->userData = ctxt;
1254: ctxt->myDoc = NULL;
1255: ctxt->wellFormed = 1;
1.99 daniel 1256: ctxt->valid = 1;
1.100 daniel 1257: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1.179 daniel 1258: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1.100 daniel 1259: ctxt->vctxt.userData = ctxt;
1.149 daniel 1260: if (ctxt->validate) {
1261: ctxt->vctxt.error = xmlParserValidityError;
1.160 daniel 1262: if (xmlGetWarningsDefaultValue == 0)
1263: ctxt->vctxt.warning = NULL;
1264: else
1265: ctxt->vctxt.warning = xmlParserValidityWarning;
1.180 daniel 1266: /* Allocate the Node stack */
1267: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1268: ctxt->vctxt.nodeNr = 0;
1269: ctxt->vctxt.nodeMax = 4;
1270: ctxt->vctxt.node = NULL;
1.149 daniel 1271: } else {
1272: ctxt->vctxt.error = NULL;
1273: ctxt->vctxt.warning = NULL;
1274: }
1.97 daniel 1275: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1276: ctxt->record_info = 0;
1.135 daniel 1277: ctxt->nbChars = 0;
1.140 daniel 1278: ctxt->checkIndex = 0;
1.180 daniel 1279: ctxt->inSubset = 0;
1.140 daniel 1280: ctxt->errNo = XML_ERR_OK;
1.185 daniel 1281: ctxt->depth = 0;
1.198 daniel 1282: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.97 daniel 1283: xmlInitNodeInfoSeq(&ctxt->node_seq);
1284: }
1285:
1286: /**
1287: * xmlFreeParserCtxt:
1288: * @ctxt: an XML parser context
1289: *
1290: * Free all the memory used by a parser context. However the parsed
1291: * document in ctxt->myDoc is not freed.
1292: */
1293:
1294: void
1295: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1296: {
1297: xmlParserInputPtr input;
1.140 daniel 1298: xmlChar *oldname;
1.97 daniel 1299:
1300: if (ctxt == NULL) return;
1301:
1302: while ((input = inputPop(ctxt)) != NULL) {
1303: xmlFreeInputStream(input);
1304: }
1.140 daniel 1305: while ((oldname = namePop(ctxt)) != NULL) {
1306: xmlFree(oldname);
1307: }
1.176 daniel 1308: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1.140 daniel 1309: if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
1.119 daniel 1310: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1311: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1312: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1313: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1.165 daniel 1314: if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1315: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1316: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1.180 daniel 1317: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1.97 daniel 1318: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
1.119 daniel 1319: xmlFree(ctxt->sax);
1320: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1321: xmlFree(ctxt);
1.97 daniel 1322: }
1323:
1324: /**
1325: * xmlNewParserCtxt:
1326: *
1327: * Allocate and initialize a new parser context.
1328: *
1329: * Returns the xmlParserCtxtPtr or NULL
1330: */
1331:
1332: xmlParserCtxtPtr
1333: xmlNewParserCtxt()
1334: {
1335: xmlParserCtxtPtr ctxt;
1336:
1.119 daniel 1337: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1.97 daniel 1338: if (ctxt == NULL) {
1339: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1340: perror("malloc");
1341: return(NULL);
1342: }
1.165 daniel 1343: memset(ctxt, 0, sizeof(xmlParserCtxt));
1.97 daniel 1344: xmlInitParserCtxt(ctxt);
1345: return(ctxt);
1346: }
1347:
1348: /**
1349: * xmlClearParserCtxt:
1350: * @ctxt: an XML parser context
1351: *
1352: * Clear (release owned resources) and reinitialize a parser context
1353: */
1354:
1355: void
1356: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1357: {
1358: xmlClearNodeInfoSeq(&ctxt->node_seq);
1359: xmlInitParserCtxt(ctxt);
1360: }
1361:
1362: /************************************************************************
1363: * *
1.77 daniel 1364: * Commodity functions to handle entities *
1365: * *
1366: ************************************************************************/
1367:
1.174 daniel 1368: /**
1369: * xmlCheckEntity:
1370: * @ctxt: an XML parser context
1371: * @content: the entity content string
1372: *
1373: * Parse an entity content and checks the WF constraints
1374: *
1375: */
1376:
1377: void
1378: xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1379: }
1.97 daniel 1380:
1381: /**
1382: * xmlParseCharRef:
1383: * @ctxt: an XML parser context
1384: *
1385: * parse Reference declarations
1386: *
1387: * [66] CharRef ::= '&#' [0-9]+ ';' |
1388: * '&#x' [0-9a-fA-F]+ ';'
1389: *
1.98 daniel 1390: * [ WFC: Legal Character ]
1391: * Characters referred to using character references must match the
1392: * production for Char.
1393: *
1.135 daniel 1394: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 1395: */
1.97 daniel 1396: int
1397: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1398: int val = 0;
1399:
1.111 daniel 1400: if (ctxt->token != 0) {
1401: val = ctxt->token;
1402: ctxt->token = 0;
1403: return(val);
1404: }
1.152 daniel 1405: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 1406: (NXT(2) == 'x')) {
1407: SKIP(3);
1.152 daniel 1408: while (RAW != ';') {
1409: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1410: val = val * 16 + (CUR - '0');
1.152 daniel 1411: else if ((RAW >= 'a') && (RAW <= 'f'))
1.97 daniel 1412: val = val * 16 + (CUR - 'a') + 10;
1.152 daniel 1413: else if ((RAW >= 'A') && (RAW <= 'F'))
1.97 daniel 1414: val = val * 16 + (CUR - 'A') + 10;
1415: else {
1.123 daniel 1416: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 1417: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1418: ctxt->sax->error(ctxt->userData,
1419: "xmlParseCharRef: invalid hexadecimal value\n");
1420: ctxt->wellFormed = 0;
1.180 daniel 1421: ctxt->disableSAX = 1;
1.97 daniel 1422: val = 0;
1423: break;
1424: }
1425: NEXT;
1426: }
1.164 daniel 1427: if (RAW == ';') {
1428: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1429: ctxt->nbChars ++;
1430: ctxt->input->cur++;
1431: }
1.152 daniel 1432: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1433: SKIP(2);
1.152 daniel 1434: while (RAW != ';') {
1435: if ((RAW >= '0') && (RAW <= '9'))
1.97 daniel 1436: val = val * 10 + (CUR - '0');
1437: else {
1.123 daniel 1438: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 1439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1440: ctxt->sax->error(ctxt->userData,
1441: "xmlParseCharRef: invalid decimal value\n");
1442: ctxt->wellFormed = 0;
1.180 daniel 1443: ctxt->disableSAX = 1;
1.97 daniel 1444: val = 0;
1445: break;
1446: }
1447: NEXT;
1448: }
1.164 daniel 1449: if (RAW == ';') {
1450: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1451: ctxt->nbChars ++;
1452: ctxt->input->cur++;
1453: }
1.97 daniel 1454: } else {
1.123 daniel 1455: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 1456: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 1457: ctxt->sax->error(ctxt->userData,
1458: "xmlParseCharRef: invalid value\n");
1.97 daniel 1459: ctxt->wellFormed = 0;
1.180 daniel 1460: ctxt->disableSAX = 1;
1.97 daniel 1461: }
1.98 daniel 1462:
1.97 daniel 1463: /*
1.98 daniel 1464: * [ WFC: Legal Character ]
1465: * Characters referred to using character references must match the
1466: * production for Char.
1.97 daniel 1467: */
1468: if (IS_CHAR(val)) {
1469: return(val);
1470: } else {
1.123 daniel 1471: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 1472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.123 daniel 1473: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
1.97 daniel 1474: val);
1475: ctxt->wellFormed = 0;
1.180 daniel 1476: ctxt->disableSAX = 1;
1.97 daniel 1477: }
1478: return(0);
1.77 daniel 1479: }
1480:
1.96 daniel 1481: /**
1.135 daniel 1482: * xmlParseStringCharRef:
1483: * @ctxt: an XML parser context
1484: * @str: a pointer to an index in the string
1485: *
1486: * parse Reference declarations, variant parsing from a string rather
1487: * than an an input flow.
1488: *
1489: * [66] CharRef ::= '&#' [0-9]+ ';' |
1490: * '&#x' [0-9a-fA-F]+ ';'
1491: *
1492: * [ WFC: Legal Character ]
1493: * Characters referred to using character references must match the
1494: * production for Char.
1495: *
1496: * Returns the value parsed (as an int), 0 in case of error, str will be
1497: * updated to the current value of the index
1498: */
1499: int
1500: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1501: const xmlChar *ptr;
1502: xmlChar cur;
1503: int val = 0;
1504:
1505: if ((str == NULL) || (*str == NULL)) return(0);
1506: ptr = *str;
1507: cur = *ptr;
1.137 daniel 1508: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1.135 daniel 1509: ptr += 3;
1510: cur = *ptr;
1511: while (cur != ';') {
1512: if ((cur >= '0') && (cur <= '9'))
1513: val = val * 16 + (cur - '0');
1514: else if ((cur >= 'a') && (cur <= 'f'))
1515: val = val * 16 + (cur - 'a') + 10;
1516: else if ((cur >= 'A') && (cur <= 'F'))
1517: val = val * 16 + (cur - 'A') + 10;
1518: else {
1519: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1521: ctxt->sax->error(ctxt->userData,
1.198 daniel 1522: "xmlParseStringCharRef: invalid hexadecimal value\n");
1.135 daniel 1523: ctxt->wellFormed = 0;
1.180 daniel 1524: ctxt->disableSAX = 1;
1.135 daniel 1525: val = 0;
1526: break;
1527: }
1528: ptr++;
1529: cur = *ptr;
1530: }
1531: if (cur == ';')
1532: ptr++;
1.145 daniel 1533: } else if ((cur == '&') && (ptr[1] == '#')){
1.135 daniel 1534: ptr += 2;
1535: cur = *ptr;
1536: while (cur != ';') {
1537: if ((cur >= '0') && (cur <= '9'))
1538: val = val * 10 + (cur - '0');
1539: else {
1540: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1542: ctxt->sax->error(ctxt->userData,
1.198 daniel 1543: "xmlParseStringCharRef: invalid decimal value\n");
1.135 daniel 1544: ctxt->wellFormed = 0;
1.180 daniel 1545: ctxt->disableSAX = 1;
1.135 daniel 1546: val = 0;
1547: break;
1548: }
1549: ptr++;
1550: cur = *ptr;
1551: }
1552: if (cur == ';')
1553: ptr++;
1554: } else {
1555: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1557: ctxt->sax->error(ctxt->userData,
1558: "xmlParseCharRef: invalid value\n");
1559: ctxt->wellFormed = 0;
1.180 daniel 1560: ctxt->disableSAX = 1;
1.135 daniel 1561: return(0);
1562: }
1563: *str = ptr;
1564:
1565: /*
1566: * [ WFC: Legal Character ]
1567: * Characters referred to using character references must match the
1568: * production for Char.
1569: */
1570: if (IS_CHAR(val)) {
1571: return(val);
1572: } else {
1573: ctxt->errNo = XML_ERR_INVALID_CHAR;
1574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575: ctxt->sax->error(ctxt->userData,
1576: "CharRef: invalid xmlChar value %d\n", val);
1577: ctxt->wellFormed = 0;
1.180 daniel 1578: ctxt->disableSAX = 1;
1.135 daniel 1579: }
1580: return(0);
1581: }
1582:
1583: /**
1.96 daniel 1584: * xmlParserHandleReference:
1585: * @ctxt: the parser context
1586: *
1.97 daniel 1587: * [67] Reference ::= EntityRef | CharRef
1588: *
1.96 daniel 1589: * [68] EntityRef ::= '&' Name ';'
1590: *
1.98 daniel 1591: * [ WFC: Entity Declared ]
1592: * the Name given in the entity reference must match that in an entity
1593: * declaration, except that well-formed documents need not declare any
1594: * of the following entities: amp, lt, gt, apos, quot.
1595: *
1596: * [ WFC: Parsed Entity ]
1597: * An entity reference must not contain the name of an unparsed entity
1598: *
1.97 daniel 1599: * [66] CharRef ::= '&#' [0-9]+ ';' |
1600: * '&#x' [0-9a-fA-F]+ ';'
1601: *
1.96 daniel 1602: * A PEReference may have been detectect in the current input stream
1603: * the handling is done accordingly to
1604: * http://www.w3.org/TR/REC-xml#entproc
1605: */
1606: void
1607: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 1608: xmlParserInputPtr input;
1.123 daniel 1609: xmlChar *name;
1.97 daniel 1610: xmlEntityPtr ent = NULL;
1611:
1.126 daniel 1612: if (ctxt->token != 0) {
1613: return;
1614: }
1.152 daniel 1615: if (RAW != '&') return;
1.97 daniel 1616: GROW;
1.152 daniel 1617: if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 1618: switch(ctxt->instate) {
1.140 daniel 1619: case XML_PARSER_ENTITY_DECL:
1620: case XML_PARSER_PI:
1.109 daniel 1621: case XML_PARSER_CDATA_SECTION:
1.140 daniel 1622: case XML_PARSER_COMMENT:
1.168 daniel 1623: case XML_PARSER_SYSTEM_LITERAL:
1.140 daniel 1624: /* we just ignore it there */
1625: return;
1626: case XML_PARSER_START_TAG:
1.109 daniel 1627: return;
1.140 daniel 1628: case XML_PARSER_END_TAG:
1.97 daniel 1629: return;
1630: case XML_PARSER_EOF:
1.123 daniel 1631: ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
1.97 daniel 1632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1633: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1634: ctxt->wellFormed = 0;
1.180 daniel 1635: ctxt->disableSAX = 1;
1.97 daniel 1636: return;
1637: case XML_PARSER_PROLOG:
1.140 daniel 1638: case XML_PARSER_START:
1639: case XML_PARSER_MISC:
1.123 daniel 1640: ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
1.97 daniel 1641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1642: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1643: ctxt->wellFormed = 0;
1.180 daniel 1644: ctxt->disableSAX = 1;
1.97 daniel 1645: return;
1646: case XML_PARSER_EPILOG:
1.123 daniel 1647: ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
1.97 daniel 1648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1650: ctxt->wellFormed = 0;
1.180 daniel 1651: ctxt->disableSAX = 1;
1.97 daniel 1652: return;
1653: case XML_PARSER_DTD:
1.123 daniel 1654: ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
1.97 daniel 1655: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656: ctxt->sax->error(ctxt->userData,
1657: "CharRef are forbiden in DTDs!\n");
1658: ctxt->wellFormed = 0;
1.180 daniel 1659: ctxt->disableSAX = 1;
1.97 daniel 1660: return;
1661: case XML_PARSER_ENTITY_VALUE:
1662: /*
1663: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1664: * substitution here since we need the literal
1.97 daniel 1665: * entity value to be able to save the internal
1666: * subset of the document.
1667: * This will be handled by xmlDecodeEntities
1668: */
1669: return;
1670: case XML_PARSER_CONTENT:
1671: case XML_PARSER_ATTRIBUTE_VALUE:
1672: ctxt->token = xmlParseCharRef(ctxt);
1673: return;
1674: }
1675: return;
1676: }
1677:
1678: switch(ctxt->instate) {
1.109 daniel 1679: case XML_PARSER_CDATA_SECTION:
1680: return;
1.140 daniel 1681: case XML_PARSER_PI:
1.97 daniel 1682: case XML_PARSER_COMMENT:
1.168 daniel 1683: case XML_PARSER_SYSTEM_LITERAL:
1684: case XML_PARSER_CONTENT:
1.97 daniel 1685: return;
1.140 daniel 1686: case XML_PARSER_START_TAG:
1687: return;
1688: case XML_PARSER_END_TAG:
1689: return;
1.97 daniel 1690: case XML_PARSER_EOF:
1.123 daniel 1691: ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
1.97 daniel 1692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1693: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
1694: ctxt->wellFormed = 0;
1.180 daniel 1695: ctxt->disableSAX = 1;
1.97 daniel 1696: return;
1697: case XML_PARSER_PROLOG:
1.140 daniel 1698: case XML_PARSER_START:
1699: case XML_PARSER_MISC:
1.123 daniel 1700: ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
1.97 daniel 1701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
1703: ctxt->wellFormed = 0;
1.180 daniel 1704: ctxt->disableSAX = 1;
1.97 daniel 1705: return;
1706: case XML_PARSER_EPILOG:
1.123 daniel 1707: ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
1.97 daniel 1708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
1710: ctxt->wellFormed = 0;
1.180 daniel 1711: ctxt->disableSAX = 1;
1.97 daniel 1712: return;
1713: case XML_PARSER_ENTITY_VALUE:
1714: /*
1715: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1716: * substitution here since we need the literal
1.97 daniel 1717: * entity value to be able to save the internal
1718: * subset of the document.
1719: * This will be handled by xmlDecodeEntities
1720: */
1721: return;
1722: case XML_PARSER_ATTRIBUTE_VALUE:
1723: /*
1724: * NOTE: in the case of attributes values, we don't do the
1725: * substitution here unless we are in a mode where
1726: * the parser is explicitely asked to substitute
1727: * entities. The SAX callback is called with values
1728: * without entity substitution.
1729: * This will then be handled by xmlDecodeEntities
1730: */
1.113 daniel 1731: return;
1.97 daniel 1732: case XML_PARSER_ENTITY_DECL:
1733: /*
1734: * we just ignore it there
1735: * the substitution will be done once the entity is referenced
1736: */
1737: return;
1738: case XML_PARSER_DTD:
1.123 daniel 1739: ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
1.97 daniel 1740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1741: ctxt->sax->error(ctxt->userData,
1742: "Entity references are forbiden in DTDs!\n");
1743: ctxt->wellFormed = 0;
1.180 daniel 1744: ctxt->disableSAX = 1;
1.97 daniel 1745: return;
1746: }
1747:
1748: NEXT;
1749: name = xmlScanName(ctxt);
1750: if (name == NULL) {
1.123 daniel 1751: ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
1.97 daniel 1752: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1753: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
1754: ctxt->wellFormed = 0;
1.180 daniel 1755: ctxt->disableSAX = 1;
1.97 daniel 1756: ctxt->token = '&';
1757: return;
1758: }
1759: if (NXT(xmlStrlen(name)) != ';') {
1.123 daniel 1760: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.97 daniel 1761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1762: ctxt->sax->error(ctxt->userData,
1763: "Entity reference: ';' expected\n");
1764: ctxt->wellFormed = 0;
1.180 daniel 1765: ctxt->disableSAX = 1;
1.97 daniel 1766: ctxt->token = '&';
1.119 daniel 1767: xmlFree(name);
1.97 daniel 1768: return;
1769: }
1770: SKIP(xmlStrlen(name) + 1);
1771: if (ctxt->sax != NULL) {
1772: if (ctxt->sax->getEntity != NULL)
1773: ent = ctxt->sax->getEntity(ctxt->userData, name);
1774: }
1.98 daniel 1775:
1776: /*
1777: * [ WFC: Entity Declared ]
1778: * the Name given in the entity reference must match that in an entity
1779: * declaration, except that well-formed documents need not declare any
1780: * of the following entities: amp, lt, gt, apos, quot.
1781: */
1.97 daniel 1782: if (ent == NULL)
1783: ent = xmlGetPredefinedEntity(name);
1784: if (ent == NULL) {
1.123 daniel 1785: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.97 daniel 1786: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1787: ctxt->sax->error(ctxt->userData,
1.98 daniel 1788: "Entity reference: entity %s not declared\n",
1789: name);
1.97 daniel 1790: ctxt->wellFormed = 0;
1.180 daniel 1791: ctxt->disableSAX = 1;
1.119 daniel 1792: xmlFree(name);
1.97 daniel 1793: return;
1794: }
1.98 daniel 1795:
1796: /*
1797: * [ WFC: Parsed Entity ]
1798: * An entity reference must not contain the name of an unparsed entity
1799: */
1.159 daniel 1800: if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.123 daniel 1801: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 1802: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1803: ctxt->sax->error(ctxt->userData,
1804: "Entity reference to unparsed entity %s\n", name);
1805: ctxt->wellFormed = 0;
1.180 daniel 1806: ctxt->disableSAX = 1;
1.98 daniel 1807: }
1808:
1.159 daniel 1809: if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
1.97 daniel 1810: ctxt->token = ent->content[0];
1.119 daniel 1811: xmlFree(name);
1.97 daniel 1812: return;
1813: }
1814: input = xmlNewEntityInputStream(ctxt, ent);
1815: xmlPushInput(ctxt, input);
1.119 daniel 1816: xmlFree(name);
1.96 daniel 1817: return;
1818: }
1819:
1820: /**
1821: * xmlParserHandlePEReference:
1822: * @ctxt: the parser context
1823: *
1824: * [69] PEReference ::= '%' Name ';'
1825: *
1.98 daniel 1826: * [ WFC: No Recursion ]
1827: * TODO A parsed entity must not contain a recursive
1828: * reference to itself, either directly or indirectly.
1829: *
1830: * [ WFC: Entity Declared ]
1831: * In a document without any DTD, a document with only an internal DTD
1832: * subset which contains no parameter entity references, or a document
1833: * with "standalone='yes'", ... ... The declaration of a parameter
1834: * entity must precede any reference to it...
1835: *
1836: * [ VC: Entity Declared ]
1837: * In a document with an external subset or external parameter entities
1838: * with "standalone='no'", ... ... The declaration of a parameter entity
1839: * must precede any reference to it...
1840: *
1841: * [ WFC: In DTD ]
1842: * Parameter-entity references may only appear in the DTD.
1843: * NOTE: misleading but this is handled.
1844: *
1845: * A PEReference may have been detected in the current input stream
1.96 daniel 1846: * the handling is done accordingly to
1847: * http://www.w3.org/TR/REC-xml#entproc
1848: * i.e.
1849: * - Included in literal in entity values
1850: * - Included as Paraemeter Entity reference within DTDs
1851: */
1852: void
1853: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 1854: xmlChar *name;
1.96 daniel 1855: xmlEntityPtr entity = NULL;
1856: xmlParserInputPtr input;
1857:
1.126 daniel 1858: if (ctxt->token != 0) {
1859: return;
1860: }
1.152 daniel 1861: if (RAW != '%') return;
1.96 daniel 1862: switch(ctxt->instate) {
1.109 daniel 1863: case XML_PARSER_CDATA_SECTION:
1864: return;
1.97 daniel 1865: case XML_PARSER_COMMENT:
1866: return;
1.140 daniel 1867: case XML_PARSER_START_TAG:
1868: return;
1869: case XML_PARSER_END_TAG:
1870: return;
1.96 daniel 1871: case XML_PARSER_EOF:
1.123 daniel 1872: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 1873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1874: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1875: ctxt->wellFormed = 0;
1.180 daniel 1876: ctxt->disableSAX = 1;
1.96 daniel 1877: return;
1878: case XML_PARSER_PROLOG:
1.140 daniel 1879: case XML_PARSER_START:
1880: case XML_PARSER_MISC:
1.123 daniel 1881: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 1882: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1883: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1884: ctxt->wellFormed = 0;
1.180 daniel 1885: ctxt->disableSAX = 1;
1.96 daniel 1886: return;
1.97 daniel 1887: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1888: case XML_PARSER_CONTENT:
1889: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 1890: case XML_PARSER_PI:
1.168 daniel 1891: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 1892: /* we just ignore it there */
1893: return;
1894: case XML_PARSER_EPILOG:
1.123 daniel 1895: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 1896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1897: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1898: ctxt->wellFormed = 0;
1.180 daniel 1899: ctxt->disableSAX = 1;
1.96 daniel 1900: return;
1.97 daniel 1901: case XML_PARSER_ENTITY_VALUE:
1902: /*
1903: * NOTE: in the case of entity values, we don't do the
1.127 daniel 1904: * substitution here since we need the literal
1.97 daniel 1905: * entity value to be able to save the internal
1906: * subset of the document.
1907: * This will be handled by xmlDecodeEntities
1908: */
1909: return;
1.96 daniel 1910: case XML_PARSER_DTD:
1.98 daniel 1911: /*
1912: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1913: * In the internal DTD subset, parameter-entity references
1914: * can occur only where markup declarations can occur, not
1915: * within markup declarations.
1916: * In that case this is handled in xmlParseMarkupDecl
1917: */
1918: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1919: return;
1.96 daniel 1920: }
1921:
1922: NEXT;
1923: name = xmlParseName(ctxt);
1924: if (name == NULL) {
1.123 daniel 1925: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 1926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1927: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1928: ctxt->wellFormed = 0;
1.180 daniel 1929: ctxt->disableSAX = 1;
1.96 daniel 1930: } else {
1.152 daniel 1931: if (RAW == ';') {
1.96 daniel 1932: NEXT;
1.98 daniel 1933: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1934: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1935: if (entity == NULL) {
1.98 daniel 1936:
1937: /*
1938: * [ WFC: Entity Declared ]
1939: * In a document without any DTD, a document with only an
1940: * internal DTD subset which contains no parameter entity
1941: * references, or a document with "standalone='yes'", ...
1942: * ... The declaration of a parameter entity must precede
1943: * any reference to it...
1944: */
1945: if ((ctxt->standalone == 1) ||
1946: ((ctxt->hasExternalSubset == 0) &&
1947: (ctxt->hasPErefs == 0))) {
1948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1949: ctxt->sax->error(ctxt->userData,
1950: "PEReference: %%%s; not found\n", name);
1951: ctxt->wellFormed = 0;
1.180 daniel 1952: ctxt->disableSAX = 1;
1.98 daniel 1953: } else {
1954: /*
1955: * [ VC: Entity Declared ]
1956: * In a document with an external subset or external
1957: * parameter entities with "standalone='no'", ...
1958: * ... The declaration of a parameter entity must precede
1959: * any reference to it...
1960: */
1.212 veillard 1961: if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1962: ctxt->vctxt.error(ctxt->vctxt.userData,
1963: "PEReference: %%%s; not found\n", name);
1964: } else
1.98 daniel 1965: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1966: ctxt->sax->warning(ctxt->userData,
1967: "PEReference: %%%s; not found\n", name);
1968: ctxt->valid = 0;
1969: }
1.96 daniel 1970: } else {
1.159 daniel 1971: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1972: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 1973: /*
1.156 daniel 1974: * TODO !!! handle the extra spaces added before and after
1.96 daniel 1975: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1976: */
1977: input = xmlNewEntityInputStream(ctxt, entity);
1978: xmlPushInput(ctxt, input);
1.164 daniel 1979: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1980: (RAW == '<') && (NXT(1) == '?') &&
1981: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1982: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 1983: xmlParseTextDecl(ctxt);
1.164 daniel 1984: }
1985: if (ctxt->token == 0)
1986: ctxt->token = ' ';
1.96 daniel 1987: } else {
1988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1989: ctxt->sax->error(ctxt->userData,
1990: "xmlHandlePEReference: %s is not a parameter entity\n",
1991: name);
1992: ctxt->wellFormed = 0;
1.180 daniel 1993: ctxt->disableSAX = 1;
1.96 daniel 1994: }
1995: }
1996: } else {
1.123 daniel 1997: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 1998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1999: ctxt->sax->error(ctxt->userData,
2000: "xmlHandlePEReference: expecting ';'\n");
2001: ctxt->wellFormed = 0;
1.180 daniel 2002: ctxt->disableSAX = 1;
1.96 daniel 2003: }
1.119 daniel 2004: xmlFree(name);
1.97 daniel 2005: }
2006: }
2007:
2008: /*
2009: * Macro used to grow the current buffer.
2010: */
2011: #define growBuffer(buffer) { \
2012: buffer##_size *= 2; \
1.145 daniel 2013: buffer = (xmlChar *) \
2014: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 2015: if (buffer == NULL) { \
2016: perror("realloc failed"); \
1.145 daniel 2017: return(NULL); \
1.97 daniel 2018: } \
1.96 daniel 2019: }
1.77 daniel 2020:
2021: /**
2022: * xmlDecodeEntities:
2023: * @ctxt: the parser context
2024: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2025: * @len: the len to decode (in bytes !), -1 for no size limit
1.123 daniel 2026: * @end: an end marker xmlChar, 0 if none
2027: * @end2: an end marker xmlChar, 0 if none
2028: * @end3: an end marker xmlChar, 0 if none
1.77 daniel 2029: *
2030: * [67] Reference ::= EntityRef | CharRef
2031: *
2032: * [69] PEReference ::= '%' Name ';'
2033: *
2034: * Returns A newly allocated string with the substitution done. The caller
2035: * must deallocate it !
2036: */
1.123 daniel 2037: xmlChar *
1.77 daniel 2038: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1.123 daniel 2039: xmlChar end, xmlChar end2, xmlChar end3) {
2040: xmlChar *buffer = NULL;
1.202 daniel 2041: unsigned int buffer_size = 0;
2042: unsigned int nbchars = 0;
1.78 daniel 2043:
1.123 daniel 2044: xmlChar *current = NULL;
1.77 daniel 2045: xmlEntityPtr ent;
2046: unsigned int max = (unsigned int) len;
1.161 daniel 2047: int c,l;
1.77 daniel 2048:
1.185 daniel 2049: if (ctxt->depth > 40) {
2050: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2051: ctxt->sax->error(ctxt->userData,
2052: "Detected entity reference loop\n");
2053: ctxt->wellFormed = 0;
2054: ctxt->disableSAX = 1;
2055: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2056: return(NULL);
2057: }
2058:
1.77 daniel 2059: /*
2060: * allocate a translation buffer.
2061: */
1.140 daniel 2062: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.123 daniel 2063: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
1.77 daniel 2064: if (buffer == NULL) {
2065: perror("xmlDecodeEntities: malloc failed");
2066: return(NULL);
2067: }
2068:
1.78 daniel 2069: /*
2070: * Ok loop until we reach one of the ending char or a size limit.
2071: */
1.161 daniel 2072: c = CUR_CHAR(l);
2073: while ((nbchars < max) && (c != end) &&
2074: (c != end2) && (c != end3)) {
1.77 daniel 2075:
1.161 daniel 2076: if (c == 0) break;
2077: if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
1.98 daniel 2078: int val = xmlParseCharRef(ctxt);
1.161 daniel 2079: COPY_BUF(0,buffer,nbchars,val);
2080: NEXTL(l);
2081: } else if ((c == '&') && (ctxt->token != '&') &&
2082: (what & XML_SUBSTITUTE_REF)) {
1.98 daniel 2083: ent = xmlParseEntityRef(ctxt);
2084: if ((ent != NULL) &&
2085: (ctxt->replaceEntities != 0)) {
2086: current = ent->content;
2087: while (*current != 0) {
1.161 daniel 2088: buffer[nbchars++] = *current++;
2089: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2090: growBuffer(buffer);
1.77 daniel 2091: }
2092: }
1.98 daniel 2093: } else if (ent != NULL) {
1.123 daniel 2094: const xmlChar *cur = ent->name;
1.98 daniel 2095:
1.161 daniel 2096: buffer[nbchars++] = '&';
2097: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.98 daniel 2098: growBuffer(buffer);
2099: }
1.161 daniel 2100: while (*cur != 0) {
2101: buffer[nbchars++] = *cur++;
2102: }
2103: buffer[nbchars++] = ';';
1.77 daniel 2104: }
1.161 daniel 2105: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.97 daniel 2106: /*
1.77 daniel 2107: * a PEReference induce to switch the entity flow,
2108: * we break here to flush the current set of chars
2109: * parsed if any. We will be called back later.
1.97 daniel 2110: */
1.91 daniel 2111: if (nbchars != 0) break;
1.77 daniel 2112:
2113: xmlParsePEReference(ctxt);
1.79 daniel 2114:
1.97 daniel 2115: /*
1.79 daniel 2116: * Pop-up of finished entities.
1.97 daniel 2117: */
1.152 daniel 2118: while ((RAW == 0) && (ctxt->inputNr > 1))
1.79 daniel 2119: xmlPopInput(ctxt);
2120:
1.98 daniel 2121: break;
1.77 daniel 2122: } else {
1.161 daniel 2123: COPY_BUF(l,buffer,nbchars,c);
2124: NEXTL(l);
2125: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.86 daniel 2126: growBuffer(buffer);
2127: }
1.77 daniel 2128: }
1.161 daniel 2129: c = CUR_CHAR(l);
1.77 daniel 2130: }
1.161 daniel 2131: buffer[nbchars++] = 0;
1.77 daniel 2132: return(buffer);
2133: }
2134:
1.135 daniel 2135: /**
2136: * xmlStringDecodeEntities:
2137: * @ctxt: the parser context
2138: * @str: the input string
2139: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2140: * @end: an end marker xmlChar, 0 if none
2141: * @end2: an end marker xmlChar, 0 if none
2142: * @end3: an end marker xmlChar, 0 if none
2143: *
2144: * [67] Reference ::= EntityRef | CharRef
2145: *
2146: * [69] PEReference ::= '%' Name ';'
2147: *
2148: * Returns A newly allocated string with the substitution done. The caller
2149: * must deallocate it !
2150: */
2151: xmlChar *
2152: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2153: xmlChar end, xmlChar end2, xmlChar end3) {
2154: xmlChar *buffer = NULL;
2155: int buffer_size = 0;
2156:
2157: xmlChar *current = NULL;
2158: xmlEntityPtr ent;
1.176 daniel 2159: int c,l;
2160: int nbchars = 0;
1.135 daniel 2161:
1.211 veillard 2162: if (str == NULL)
2163: return(NULL);
2164:
1.185 daniel 2165: if (ctxt->depth > 40) {
2166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2167: ctxt->sax->error(ctxt->userData,
2168: "Detected entity reference loop\n");
2169: ctxt->wellFormed = 0;
2170: ctxt->disableSAX = 1;
2171: ctxt->errNo = XML_ERR_ENTITY_LOOP;
2172: return(NULL);
2173: }
2174:
1.135 daniel 2175: /*
2176: * allocate a translation buffer.
2177: */
1.140 daniel 2178: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 2179: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2180: if (buffer == NULL) {
2181: perror("xmlDecodeEntities: malloc failed");
2182: return(NULL);
2183: }
2184:
2185: /*
2186: * Ok loop until we reach one of the ending char or a size limit.
2187: */
1.176 daniel 2188: c = CUR_SCHAR(str, l);
2189: while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
1.135 daniel 2190:
1.176 daniel 2191: if (c == 0) break;
2192: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 2193: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 2194: if (val != 0) {
2195: COPY_BUF(0,buffer,nbchars,val);
2196: }
2197: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.135 daniel 2198: ent = xmlParseStringEntityRef(ctxt, &str);
1.185 daniel 2199: if ((ent != NULL) && (ent->content != NULL)) {
2200: xmlChar *rep;
2201:
2202: ctxt->depth++;
2203: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2204: 0, 0, 0);
2205: ctxt->depth--;
2206: if (rep != NULL) {
2207: current = rep;
2208: while (*current != 0) {
2209: buffer[nbchars++] = *current++;
2210: if (nbchars >
2211: buffer_size - XML_PARSER_BUFFER_SIZE) {
2212: growBuffer(buffer);
2213: }
1.135 daniel 2214: }
1.185 daniel 2215: xmlFree(rep);
1.135 daniel 2216: }
2217: } else if (ent != NULL) {
2218: int i = xmlStrlen(ent->name);
2219: const xmlChar *cur = ent->name;
2220:
1.176 daniel 2221: buffer[nbchars++] = '&';
2222: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2223: growBuffer(buffer);
2224: }
2225: for (;i > 0;i--)
1.176 daniel 2226: buffer[nbchars++] = *cur++;
2227: buffer[nbchars++] = ';';
1.135 daniel 2228: }
1.176 daniel 2229: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.135 daniel 2230: ent = xmlParseStringPEReference(ctxt, &str);
2231: if (ent != NULL) {
1.185 daniel 2232: xmlChar *rep;
2233:
2234: ctxt->depth++;
2235: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2236: 0, 0, 0);
2237: ctxt->depth--;
2238: if (rep != NULL) {
2239: current = rep;
2240: while (*current != 0) {
2241: buffer[nbchars++] = *current++;
2242: if (nbchars >
2243: buffer_size - XML_PARSER_BUFFER_SIZE) {
2244: growBuffer(buffer);
2245: }
1.135 daniel 2246: }
1.185 daniel 2247: xmlFree(rep);
1.135 daniel 2248: }
2249: }
2250: } else {
1.176 daniel 2251: COPY_BUF(l,buffer,nbchars,c);
2252: str += l;
2253: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 2254: growBuffer(buffer);
2255: }
2256: }
1.176 daniel 2257: c = CUR_SCHAR(str, l);
1.135 daniel 2258: }
1.176 daniel 2259: buffer[nbchars++] = 0;
1.135 daniel 2260: return(buffer);
2261: }
2262:
1.1 veillard 2263:
1.28 daniel 2264: /************************************************************************
2265: * *
1.75 daniel 2266: * Commodity functions to handle encodings *
2267: * *
2268: ************************************************************************/
2269:
1.172 daniel 2270: /*
2271: * xmlCheckLanguageID
2272: * @lang: pointer to the string value
2273: *
2274: * Checks that the value conforms to the LanguageID production:
2275: *
2276: * [33] LanguageID ::= Langcode ('-' Subcode)*
2277: * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2278: * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2279: * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2280: * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2281: * [38] Subcode ::= ([a-z] | [A-Z])+
2282: *
2283: * Returns 1 if correct 0 otherwise
2284: **/
2285: int
2286: xmlCheckLanguageID(const xmlChar *lang) {
2287: const xmlChar *cur = lang;
2288:
2289: if (cur == NULL)
2290: return(0);
2291: if (((cur[0] == 'i') && (cur[1] == '-')) ||
2292: ((cur[0] == 'I') && (cur[1] == '-'))) {
2293: /*
2294: * IANA code
2295: */
2296: cur += 2;
2297: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2298: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2299: cur++;
2300: } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2301: ((cur[0] == 'X') && (cur[1] == '-'))) {
2302: /*
2303: * User code
2304: */
2305: cur += 2;
2306: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2307: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2308: cur++;
2309: } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2310: ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2311: /*
2312: * ISO639
2313: */
2314: cur++;
2315: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2316: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2317: cur++;
2318: else
2319: return(0);
2320: } else
2321: return(0);
2322: while (cur[0] != 0) {
2323: if (cur[0] != '-')
2324: return(0);
2325: cur++;
2326: if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2327: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2328: cur++;
2329: else
2330: return(0);
2331: while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2332: ((cur[0] >= 'a') && (cur[0] <= 'z')))
2333: cur++;
2334: }
2335: return(1);
2336: }
2337:
1.75 daniel 2338: /**
2339: * xmlSwitchEncoding:
2340: * @ctxt: the parser context
1.124 daniel 2341: * @enc: the encoding value (number)
1.75 daniel 2342: *
2343: * change the input functions when discovering the character encoding
2344: * of a given entity.
1.193 daniel 2345: *
2346: * Returns 0 in case of success, -1 otherwise
1.75 daniel 2347: */
1.193 daniel 2348: int
1.75 daniel 2349: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2350: {
1.156 daniel 2351: xmlCharEncodingHandlerPtr handler;
2352:
1.193 daniel 2353: switch (enc) {
2354: case XML_CHAR_ENCODING_ERROR:
2355: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2357: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2358: ctxt->wellFormed = 0;
2359: ctxt->disableSAX = 1;
2360: break;
2361: case XML_CHAR_ENCODING_NONE:
2362: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2363: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2364: return(0);
2365: case XML_CHAR_ENCODING_UTF8:
2366: /* default encoding, no conversion should be needed */
1.198 daniel 2367: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2368: return(0);
2369: default:
2370: break;
2371: }
1.156 daniel 2372: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 2373: if (handler == NULL) {
2374: /*
2375: * Default handlers.
2376: */
2377: switch (enc) {
2378: case XML_CHAR_ENCODING_ERROR:
2379: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2381: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2382: ctxt->wellFormed = 0;
2383: ctxt->disableSAX = 1;
1.198 daniel 2384: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2385: break;
2386: case XML_CHAR_ENCODING_NONE:
2387: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 2388: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2389: return(0);
2390: case XML_CHAR_ENCODING_UTF8:
1.211 veillard 2391: case XML_CHAR_ENCODING_ASCII:
1.193 daniel 2392: /* default encoding, no conversion should be needed */
1.198 daniel 2393: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2394: return(0);
2395: case XML_CHAR_ENCODING_UTF16LE:
2396: break;
2397: case XML_CHAR_ENCODING_UTF16BE:
2398: break;
2399: case XML_CHAR_ENCODING_UCS4LE:
2400: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2401: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2402: ctxt->sax->error(ctxt->userData,
2403: "char encoding USC4 little endian not supported\n");
2404: break;
2405: case XML_CHAR_ENCODING_UCS4BE:
2406: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2408: ctxt->sax->error(ctxt->userData,
2409: "char encoding USC4 big endian not supported\n");
2410: break;
2411: case XML_CHAR_ENCODING_EBCDIC:
2412: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2413: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2414: ctxt->sax->error(ctxt->userData,
2415: "char encoding EBCDIC not supported\n");
2416: break;
2417: case XML_CHAR_ENCODING_UCS4_2143:
2418: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2420: ctxt->sax->error(ctxt->userData,
2421: "char encoding UCS4 2143 not supported\n");
2422: break;
2423: case XML_CHAR_ENCODING_UCS4_3412:
2424: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2426: ctxt->sax->error(ctxt->userData,
2427: "char encoding UCS4 3412 not supported\n");
2428: break;
2429: case XML_CHAR_ENCODING_UCS2:
2430: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2432: ctxt->sax->error(ctxt->userData,
2433: "char encoding UCS2 not supported\n");
2434: break;
2435: case XML_CHAR_ENCODING_8859_1:
2436: case XML_CHAR_ENCODING_8859_2:
2437: case XML_CHAR_ENCODING_8859_3:
2438: case XML_CHAR_ENCODING_8859_4:
2439: case XML_CHAR_ENCODING_8859_5:
2440: case XML_CHAR_ENCODING_8859_6:
2441: case XML_CHAR_ENCODING_8859_7:
2442: case XML_CHAR_ENCODING_8859_8:
2443: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 2444: /*
1.203 veillard 2445: * We used to keep the internal content in the
2446: * document encoding however this turns being unmaintainable
2447: * So xmlGetCharEncodingHandler() will return non-null
2448: * values for this now.
1.195 daniel 2449: */
2450: if ((ctxt->inputNr == 1) &&
2451: (ctxt->encoding == NULL) &&
2452: (ctxt->input->encoding != NULL)) {
2453: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2454: }
1.198 daniel 2455: ctxt->charset = enc;
1.195 daniel 2456: return(0);
1.193 daniel 2457: case XML_CHAR_ENCODING_2022_JP:
2458: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2459: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2460: ctxt->sax->error(ctxt->userData,
2461: "char encoding ISO-2022-JPnot supported\n");
2462: break;
2463: case XML_CHAR_ENCODING_SHIFT_JIS:
2464: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2465: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2466: ctxt->sax->error(ctxt->userData,
2467: "char encoding Shift_JIS not supported\n");
2468: break;
2469: case XML_CHAR_ENCODING_EUC_JP:
2470: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2472: ctxt->sax->error(ctxt->userData,
2473: "char encoding EUC-JPnot supported\n");
2474: break;
2475: }
2476: }
2477: if (handler == NULL)
2478: return(-1);
1.198 daniel 2479: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2480: return(xmlSwitchToEncoding(ctxt, handler));
2481: }
2482:
2483: /**
2484: * xmlSwitchToEncoding:
2485: * @ctxt: the parser context
2486: * @handler: the encoding handler
2487: *
2488: * change the input functions when discovering the character encoding
2489: * of a given entity.
2490: *
2491: * Returns 0 in case of success, -1 otherwise
2492: */
2493: int
2494: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2495: {
1.194 daniel 2496: int nbchars;
2497:
1.156 daniel 2498: if (handler != NULL) {
2499: if (ctxt->input != NULL) {
2500: if (ctxt->input->buf != NULL) {
2501: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 2502: if (ctxt->input->buf->encoder == handler)
2503: return(0);
1.197 daniel 2504: /*
2505: * Note: this is a bit dangerous, but that's what it
2506: * takes to use nearly compatible signature for different
2507: * encodings.
2508: */
2509: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2510: ctxt->input->buf->encoder = handler;
2511: return(0);
1.156 daniel 2512: }
2513: ctxt->input->buf->encoder = handler;
2514:
2515: /*
1.194 daniel 2516: * Is there already some content down the pipe to convert ?
1.156 daniel 2517: */
2518: if ((ctxt->input->buf->buffer != NULL) &&
2519: (ctxt->input->buf->buffer->use > 0)) {
2520: int processed;
2521:
2522: /*
2523: * Specific handling of the Byte Order Mark for
2524: * UTF-16
2525: */
1.195 daniel 2526: if ((handler->name != NULL) &&
2527: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 2528: (ctxt->input->cur[0] == 0xFF) &&
2529: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 2530: ctxt->input->cur += 2;
1.156 daniel 2531: }
1.195 daniel 2532: if ((handler->name != NULL) &&
2533: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 2534: (ctxt->input->cur[0] == 0xFE) &&
2535: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 2536: ctxt->input->cur += 2;
1.156 daniel 2537: }
2538:
2539: /*
1.194 daniel 2540: * Shring the current input buffer.
2541: * Move it as the raw buffer and create a new input buffer
1.156 daniel 2542: */
2543: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 2544: xmlBufferShrink(ctxt->input->buf->buffer, processed);
2545: ctxt->input->buf->raw = ctxt->input->buf->buffer;
2546: ctxt->input->buf->buffer = xmlBufferCreate();
2547:
2548: /*
1.197 daniel 2549: * convert just enough to get
2550: * '<?xml version="1.0" encoding="xxx"?>'
2551: * parsed with the autodetected encoding
2552: * into the parser reading buffer.
1.194 daniel 2553: */
1.197 daniel 2554: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2555: ctxt->input->buf->buffer,
2556: ctxt->input->buf->raw);
1.194 daniel 2557: if (nbchars < 0) {
2558: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2559: return(-1);
1.156 daniel 2560: }
1.194 daniel 2561: ctxt->input->base =
2562: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2563: }
1.193 daniel 2564: return(0);
1.156 daniel 2565: } else {
1.209 veillard 2566: if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1.156 daniel 2567: /*
2568: * When parsing a static memory array one must know the
2569: * size to be able to convert the buffer.
2570: */
2571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2572: ctxt->sax->error(ctxt->userData,
2573: "xmlSwitchEncoding : no input\n");
1.193 daniel 2574: return(-1);
1.156 daniel 2575: } else {
1.194 daniel 2576: int processed;
2577:
2578: /*
2579: * Shring the current input buffer.
2580: * Move it as the raw buffer and create a new input buffer
2581: */
2582: processed = ctxt->input->cur - ctxt->input->base;
1.209 veillard 2583:
1.194 daniel 2584: ctxt->input->buf->raw = xmlBufferCreate();
2585: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1.209 veillard 2586: ctxt->input->length - processed);
1.194 daniel 2587: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 2588:
2589: /*
1.194 daniel 2590: * convert as much as possible of the raw input
2591: * to the parser reading buffer.
2592: */
2593: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2594: ctxt->input->buf->buffer,
2595: ctxt->input->buf->raw);
2596: if (nbchars < 0) {
2597: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2598: return(-1);
1.156 daniel 2599: }
1.194 daniel 2600:
1.156 daniel 2601: /*
2602: * Conversion succeeded, get rid of the old buffer
2603: */
2604: if ((ctxt->input->free != NULL) &&
2605: (ctxt->input->base != NULL))
2606: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 2607: ctxt->input->base =
2608: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 2609: }
2610: }
2611: } else {
2612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2613: ctxt->sax->error(ctxt->userData,
2614: "xmlSwitchEncoding : no input\n");
1.193 daniel 2615: return(-1);
1.156 daniel 2616: }
1.195 daniel 2617: /*
2618: * The parsing is now done in UTF8 natively
2619: */
1.198 daniel 2620: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 2621: } else
2622: return(-1);
2623: return(0);
1.156 daniel 2624:
1.75 daniel 2625: }
2626:
2627: /************************************************************************
2628: * *
1.123 daniel 2629: * Commodity functions to handle xmlChars *
1.28 daniel 2630: * *
2631: ************************************************************************/
2632:
1.50 daniel 2633: /**
2634: * xmlStrndup:
1.123 daniel 2635: * @cur: the input xmlChar *
1.50 daniel 2636: * @len: the len of @cur
2637: *
1.123 daniel 2638: * a strndup for array of xmlChar's
1.68 daniel 2639: *
1.123 daniel 2640: * Returns a new xmlChar * or NULL
1.1 veillard 2641: */
1.123 daniel 2642: xmlChar *
2643: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 2644: xmlChar *ret;
2645:
2646: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2647: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 2648: if (ret == NULL) {
1.86 daniel 2649: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2650: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 2651: return(NULL);
2652: }
1.123 daniel 2653: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 2654: ret[len] = 0;
2655: return(ret);
2656: }
2657:
1.50 daniel 2658: /**
2659: * xmlStrdup:
1.123 daniel 2660: * @cur: the input xmlChar *
1.50 daniel 2661: *
1.152 daniel 2662: * a strdup for array of xmlChar's. Since they are supposed to be
2663: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2664: * a termination mark of '0'.
1.68 daniel 2665: *
1.123 daniel 2666: * Returns a new xmlChar * or NULL
1.1 veillard 2667: */
1.123 daniel 2668: xmlChar *
2669: xmlStrdup(const xmlChar *cur) {
2670: const xmlChar *p = cur;
1.1 veillard 2671:
1.135 daniel 2672: if (cur == NULL) return(NULL);
1.152 daniel 2673: while (*p != 0) p++;
1.1 veillard 2674: return(xmlStrndup(cur, p - cur));
2675: }
2676:
1.50 daniel 2677: /**
2678: * xmlCharStrndup:
2679: * @cur: the input char *
2680: * @len: the len of @cur
2681: *
1.123 daniel 2682: * a strndup for char's to xmlChar's
1.68 daniel 2683: *
1.123 daniel 2684: * Returns a new xmlChar * or NULL
1.45 daniel 2685: */
2686:
1.123 daniel 2687: xmlChar *
1.55 daniel 2688: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 2689: int i;
1.135 daniel 2690: xmlChar *ret;
2691:
2692: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 2693: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 2694: if (ret == NULL) {
1.86 daniel 2695: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 2696: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2697: return(NULL);
2698: }
2699: for (i = 0;i < len;i++)
1.123 daniel 2700: ret[i] = (xmlChar) cur[i];
1.45 daniel 2701: ret[len] = 0;
2702: return(ret);
2703: }
2704:
1.50 daniel 2705: /**
2706: * xmlCharStrdup:
2707: * @cur: the input char *
2708: * @len: the len of @cur
2709: *
1.123 daniel 2710: * a strdup for char's to xmlChar's
1.68 daniel 2711: *
1.123 daniel 2712: * Returns a new xmlChar * or NULL
1.45 daniel 2713: */
2714:
1.123 daniel 2715: xmlChar *
1.55 daniel 2716: xmlCharStrdup(const char *cur) {
1.45 daniel 2717: const char *p = cur;
2718:
1.135 daniel 2719: if (cur == NULL) return(NULL);
1.45 daniel 2720: while (*p != '\0') p++;
2721: return(xmlCharStrndup(cur, p - cur));
2722: }
2723:
1.50 daniel 2724: /**
2725: * xmlStrcmp:
1.123 daniel 2726: * @str1: the first xmlChar *
2727: * @str2: the second xmlChar *
1.50 daniel 2728: *
1.123 daniel 2729: * a strcmp for xmlChar's
1.68 daniel 2730: *
2731: * Returns the integer result of the comparison
1.14 veillard 2732: */
2733:
1.55 daniel 2734: int
1.123 daniel 2735: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 2736: register int tmp;
2737:
1.135 daniel 2738: if ((str1 == NULL) && (str2 == NULL)) return(0);
2739: if (str1 == NULL) return(-1);
2740: if (str2 == NULL) return(1);
1.14 veillard 2741: do {
2742: tmp = *str1++ - *str2++;
2743: if (tmp != 0) return(tmp);
2744: } while ((*str1 != 0) && (*str2 != 0));
2745: return (*str1 - *str2);
2746: }
2747:
1.50 daniel 2748: /**
2749: * xmlStrncmp:
1.123 daniel 2750: * @str1: the first xmlChar *
2751: * @str2: the second xmlChar *
1.50 daniel 2752: * @len: the max comparison length
2753: *
1.123 daniel 2754: * a strncmp for xmlChar's
1.68 daniel 2755: *
2756: * Returns the integer result of the comparison
1.14 veillard 2757: */
2758:
1.55 daniel 2759: int
1.123 daniel 2760: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 2761: register int tmp;
2762:
2763: if (len <= 0) return(0);
1.135 daniel 2764: if ((str1 == NULL) && (str2 == NULL)) return(0);
2765: if (str1 == NULL) return(-1);
2766: if (str2 == NULL) return(1);
1.14 veillard 2767: do {
2768: tmp = *str1++ - *str2++;
2769: if (tmp != 0) return(tmp);
2770: len--;
2771: if (len <= 0) return(0);
2772: } while ((*str1 != 0) && (*str2 != 0));
2773: return (*str1 - *str2);
2774: }
2775:
1.50 daniel 2776: /**
2777: * xmlStrchr:
1.123 daniel 2778: * @str: the xmlChar * array
2779: * @val: the xmlChar to search
1.50 daniel 2780: *
1.123 daniel 2781: * a strchr for xmlChar's
1.68 daniel 2782: *
1.123 daniel 2783: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 2784: */
2785:
1.123 daniel 2786: const xmlChar *
2787: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 2788: if (str == NULL) return(NULL);
1.14 veillard 2789: while (*str != 0) {
1.123 daniel 2790: if (*str == val) return((xmlChar *) str);
1.14 veillard 2791: str++;
2792: }
2793: return(NULL);
1.89 daniel 2794: }
2795:
2796: /**
2797: * xmlStrstr:
1.123 daniel 2798: * @str: the xmlChar * array (haystack)
2799: * @val: the xmlChar to search (needle)
1.89 daniel 2800: *
1.123 daniel 2801: * a strstr for xmlChar's
1.89 daniel 2802: *
1.123 daniel 2803: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2804: */
2805:
1.123 daniel 2806: const xmlChar *
2807: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 2808: int n;
2809:
2810: if (str == NULL) return(NULL);
2811: if (val == NULL) return(NULL);
2812: n = xmlStrlen(val);
2813:
2814: if (n == 0) return(str);
2815: while (*str != 0) {
2816: if (*str == *val) {
1.123 daniel 2817: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 2818: }
2819: str++;
2820: }
2821: return(NULL);
2822: }
2823:
2824: /**
2825: * xmlStrsub:
1.123 daniel 2826: * @str: the xmlChar * array (haystack)
1.89 daniel 2827: * @start: the index of the first char (zero based)
2828: * @len: the length of the substring
2829: *
2830: * Extract a substring of a given string
2831: *
1.123 daniel 2832: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 2833: */
2834:
1.123 daniel 2835: xmlChar *
2836: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 2837: int i;
2838:
2839: if (str == NULL) return(NULL);
2840: if (start < 0) return(NULL);
1.90 daniel 2841: if (len < 0) return(NULL);
1.89 daniel 2842:
2843: for (i = 0;i < start;i++) {
2844: if (*str == 0) return(NULL);
2845: str++;
2846: }
2847: if (*str == 0) return(NULL);
2848: return(xmlStrndup(str, len));
1.14 veillard 2849: }
1.28 daniel 2850:
1.50 daniel 2851: /**
2852: * xmlStrlen:
1.123 daniel 2853: * @str: the xmlChar * array
1.50 daniel 2854: *
1.127 daniel 2855: * length of a xmlChar's string
1.68 daniel 2856: *
1.123 daniel 2857: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 2858: */
2859:
1.55 daniel 2860: int
1.123 daniel 2861: xmlStrlen(const xmlChar *str) {
1.45 daniel 2862: int len = 0;
2863:
2864: if (str == NULL) return(0);
2865: while (*str != 0) {
2866: str++;
2867: len++;
2868: }
2869: return(len);
2870: }
2871:
1.50 daniel 2872: /**
2873: * xmlStrncat:
1.123 daniel 2874: * @cur: the original xmlChar * array
2875: * @add: the xmlChar * array added
1.50 daniel 2876: * @len: the length of @add
2877: *
1.123 daniel 2878: * a strncat for array of xmlChar's
1.68 daniel 2879: *
1.123 daniel 2880: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2881: */
2882:
1.123 daniel 2883: xmlChar *
2884: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 2885: int size;
1.123 daniel 2886: xmlChar *ret;
1.45 daniel 2887:
2888: if ((add == NULL) || (len == 0))
2889: return(cur);
2890: if (cur == NULL)
2891: return(xmlStrndup(add, len));
2892:
2893: size = xmlStrlen(cur);
1.204 veillard 2894: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 2895: if (ret == NULL) {
1.86 daniel 2896: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 2897: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 2898: return(cur);
2899: }
1.123 daniel 2900: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 2901: ret[size + len] = 0;
2902: return(ret);
2903: }
2904:
1.50 daniel 2905: /**
2906: * xmlStrcat:
1.123 daniel 2907: * @cur: the original xmlChar * array
2908: * @add: the xmlChar * array added
1.50 daniel 2909: *
1.152 daniel 2910: * a strcat for array of xmlChar's. Since they are supposed to be
2911: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2912: * a termination mark of '0'.
1.68 daniel 2913: *
1.123 daniel 2914: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 2915: */
1.123 daniel 2916: xmlChar *
2917: xmlStrcat(xmlChar *cur, const xmlChar *add) {
2918: const xmlChar *p = add;
1.45 daniel 2919:
2920: if (add == NULL) return(cur);
2921: if (cur == NULL)
2922: return(xmlStrdup(add));
2923:
1.152 daniel 2924: while (*p != 0) p++;
1.45 daniel 2925: return(xmlStrncat(cur, add, p - add));
2926: }
2927:
2928: /************************************************************************
2929: * *
2930: * Commodity functions, cleanup needed ? *
2931: * *
2932: ************************************************************************/
2933:
1.50 daniel 2934: /**
2935: * areBlanks:
2936: * @ctxt: an XML parser context
1.123 daniel 2937: * @str: a xmlChar *
1.50 daniel 2938: * @len: the size of @str
2939: *
1.45 daniel 2940: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 2941: *
1.68 daniel 2942: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 2943: */
2944:
1.123 daniel 2945: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 2946: int i, ret;
1.45 daniel 2947: xmlNodePtr lastChild;
2948:
1.176 daniel 2949: /*
2950: * Check for xml:space value.
2951: */
2952: if (*(ctxt->space) == 1)
2953: return(0);
2954:
2955: /*
2956: * Check that the string is made of blanks
2957: */
1.45 daniel 2958: for (i = 0;i < len;i++)
2959: if (!(IS_BLANK(str[i]))) return(0);
2960:
1.176 daniel 2961: /*
2962: * Look if the element is mixed content in the Dtd if available
2963: */
1.104 daniel 2964: if (ctxt->myDoc != NULL) {
2965: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2966: if (ret == 0) return(1);
2967: if (ret == 1) return(0);
2968: }
1.176 daniel 2969:
1.104 daniel 2970: /*
1.176 daniel 2971: * Otherwise, heuristic :-\
1.104 daniel 2972: */
1.179 daniel 2973: if (ctxt->keepBlanks)
2974: return(0);
2975: if (RAW != '<') return(0);
2976: if (ctxt->node == NULL) return(0);
2977: if ((ctxt->node->children == NULL) &&
2978: (RAW == '<') && (NXT(1) == '/')) return(0);
2979:
1.45 daniel 2980: lastChild = xmlGetLastChild(ctxt->node);
2981: if (lastChild == NULL) {
2982: if (ctxt->node->content != NULL) return(0);
2983: } else if (xmlNodeIsText(lastChild))
2984: return(0);
1.157 daniel 2985: else if ((ctxt->node->children != NULL) &&
2986: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 2987: return(0);
1.45 daniel 2988: return(1);
2989: }
2990:
1.50 daniel 2991: /**
2992: * xmlHandleEntity:
2993: * @ctxt: an XML parser context
2994: * @entity: an XML entity pointer.
2995: *
2996: * Default handling of defined entities, when should we define a new input
1.45 daniel 2997: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 2998: *
2999: * OBSOLETE: to be removed at some point.
1.45 daniel 3000: */
3001:
1.55 daniel 3002: void
3003: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 3004: int len;
1.50 daniel 3005: xmlParserInputPtr input;
1.45 daniel 3006:
3007: if (entity->content == NULL) {
1.123 daniel 3008: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3009: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3010: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 3011: entity->name);
1.59 daniel 3012: ctxt->wellFormed = 0;
1.180 daniel 3013: ctxt->disableSAX = 1;
1.45 daniel 3014: return;
3015: }
3016: len = xmlStrlen(entity->content);
3017: if (len <= 2) goto handle_as_char;
3018:
3019: /*
3020: * Redefine its content as an input stream.
3021: */
1.50 daniel 3022: input = xmlNewEntityInputStream(ctxt, entity);
3023: xmlPushInput(ctxt, input);
1.45 daniel 3024: return;
3025:
3026: handle_as_char:
3027: /*
3028: * Just handle the content as a set of chars.
3029: */
1.171 daniel 3030: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3031: (ctxt->sax->characters != NULL))
1.74 daniel 3032: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 3033:
3034: }
3035:
3036: /*
3037: * Forward definition for recusive behaviour.
3038: */
1.77 daniel 3039: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3040: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 3041:
1.28 daniel 3042: /************************************************************************
3043: * *
3044: * Extra stuff for namespace support *
3045: * Relates to http://www.w3.org/TR/WD-xml-names *
3046: * *
3047: ************************************************************************/
3048:
1.50 daniel 3049: /**
3050: * xmlNamespaceParseNCName:
3051: * @ctxt: an XML parser context
3052: *
3053: * parse an XML namespace name.
1.28 daniel 3054: *
3055: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3056: *
3057: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3058: * CombiningChar | Extender
1.68 daniel 3059: *
3060: * Returns the namespace name or NULL
1.28 daniel 3061: */
3062:
1.123 daniel 3063: xmlChar *
1.55 daniel 3064: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.152 daniel 3065: xmlChar buf[XML_MAX_NAMELEN + 5];
3066: int len = 0, l;
3067: int cur = CUR_CHAR(l);
1.28 daniel 3068:
1.156 daniel 3069: /* load first the value of the char !!! */
1.152 daniel 3070: if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
1.28 daniel 3071:
1.152 daniel 3072: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3073: (cur == '.') || (cur == '-') ||
3074: (cur == '_') ||
3075: (IS_COMBINING(cur)) ||
3076: (IS_EXTENDER(cur))) {
3077: COPY_BUF(l,buf,len,cur);
3078: NEXTL(l);
3079: cur = CUR_CHAR(l);
1.91 daniel 3080: if (len >= XML_MAX_NAMELEN) {
3081: fprintf(stderr,
3082: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1.152 daniel 3083: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
3084: (cur == '.') || (cur == '-') ||
3085: (cur == '_') ||
3086: (IS_COMBINING(cur)) ||
3087: (IS_EXTENDER(cur))) {
3088: NEXTL(l);
3089: cur = CUR_CHAR(l);
3090: }
1.91 daniel 3091: break;
3092: }
3093: }
3094: return(xmlStrndup(buf, len));
1.28 daniel 3095: }
3096:
1.50 daniel 3097: /**
3098: * xmlNamespaceParseQName:
3099: * @ctxt: an XML parser context
1.123 daniel 3100: * @prefix: a xmlChar **
1.50 daniel 3101: *
3102: * parse an XML qualified name
1.28 daniel 3103: *
3104: * [NS 5] QName ::= (Prefix ':')? LocalPart
3105: *
3106: * [NS 6] Prefix ::= NCName
3107: *
3108: * [NS 7] LocalPart ::= NCName
1.68 daniel 3109: *
1.127 daniel 3110: * Returns the local part, and prefix is updated
1.50 daniel 3111: * to get the Prefix if any.
1.28 daniel 3112: */
3113:
1.123 daniel 3114: xmlChar *
3115: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3116: xmlChar *ret = NULL;
1.28 daniel 3117:
3118: *prefix = NULL;
3119: ret = xmlNamespaceParseNCName(ctxt);
1.152 daniel 3120: if (RAW == ':') {
1.28 daniel 3121: *prefix = ret;
1.40 daniel 3122: NEXT;
1.28 daniel 3123: ret = xmlNamespaceParseNCName(ctxt);
3124: }
3125:
3126: return(ret);
3127: }
3128:
1.50 daniel 3129: /**
1.72 daniel 3130: * xmlSplitQName:
1.162 daniel 3131: * @ctxt: an XML parser context
1.72 daniel 3132: * @name: an XML parser context
1.123 daniel 3133: * @prefix: a xmlChar **
1.72 daniel 3134: *
1.206 veillard 3135: * parse an UTF8 encoded XML qualified name string
1.72 daniel 3136: *
3137: * [NS 5] QName ::= (Prefix ':')? LocalPart
3138: *
3139: * [NS 6] Prefix ::= NCName
3140: *
3141: * [NS 7] LocalPart ::= NCName
3142: *
1.127 daniel 3143: * Returns the local part, and prefix is updated
1.72 daniel 3144: * to get the Prefix if any.
3145: */
3146:
1.123 daniel 3147: xmlChar *
1.162 daniel 3148: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3149: xmlChar buf[XML_MAX_NAMELEN + 5];
3150: int len = 0;
1.123 daniel 3151: xmlChar *ret = NULL;
3152: const xmlChar *cur = name;
1.206 veillard 3153: int c;
1.72 daniel 3154:
3155: *prefix = NULL;
1.113 daniel 3156:
3157: /* xml: prefix is not really a namespace */
3158: if ((cur[0] == 'x') && (cur[1] == 'm') &&
3159: (cur[2] == 'l') && (cur[3] == ':'))
3160: return(xmlStrdup(name));
3161:
1.162 daniel 3162: /* nasty but valid */
3163: if (cur[0] == ':')
3164: return(xmlStrdup(name));
3165:
1.206 veillard 3166: c = *cur++;
3167: while ((c != 0) && (c != ':')) {
3168: buf[len++] = c;
3169: c = *cur++;
1.162 daniel 3170: }
1.72 daniel 3171:
1.162 daniel 3172: ret = xmlStrndup(buf, len);
1.72 daniel 3173:
1.162 daniel 3174: if (c == ':') {
1.206 veillard 3175: c = *cur++;
3176: if (c == 0) return(ret);
1.72 daniel 3177: *prefix = ret;
1.162 daniel 3178: len = 0;
1.72 daniel 3179:
1.206 veillard 3180: while (c != 0) {
3181: buf[len++] = c;
3182: c = *cur++;
1.162 daniel 3183: }
1.72 daniel 3184:
1.162 daniel 3185: ret = xmlStrndup(buf, len);
1.72 daniel 3186: }
3187:
3188: return(ret);
3189: }
1.206 veillard 3190:
1.72 daniel 3191: /**
1.50 daniel 3192: * xmlNamespaceParseNSDef:
3193: * @ctxt: an XML parser context
3194: *
3195: * parse a namespace prefix declaration
1.28 daniel 3196: *
3197: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3198: *
3199: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 3200: *
3201: * Returns the namespace name
1.28 daniel 3202: */
3203:
1.123 daniel 3204: xmlChar *
1.55 daniel 3205: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.123 daniel 3206: xmlChar *name = NULL;
1.28 daniel 3207:
1.152 daniel 3208: if ((RAW == 'x') && (NXT(1) == 'm') &&
1.40 daniel 3209: (NXT(2) == 'l') && (NXT(3) == 'n') &&
3210: (NXT(4) == 's')) {
3211: SKIP(5);
1.152 daniel 3212: if (RAW == ':') {
1.40 daniel 3213: NEXT;
1.28 daniel 3214: name = xmlNamespaceParseNCName(ctxt);
3215: }
3216: }
1.39 daniel 3217: return(name);
1.28 daniel 3218: }
3219:
1.50 daniel 3220: /**
3221: * xmlParseQuotedString:
3222: * @ctxt: an XML parser context
3223: *
1.45 daniel 3224: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 3225: * To be removed at next drop of binary compatibility
1.68 daniel 3226: *
3227: * Returns the string parser or NULL.
1.45 daniel 3228: */
1.123 daniel 3229: xmlChar *
1.55 daniel 3230: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.135 daniel 3231: xmlChar *buf = NULL;
1.152 daniel 3232: int len = 0,l;
1.140 daniel 3233: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3234: int c;
1.45 daniel 3235:
1.135 daniel 3236: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3237: if (buf == NULL) {
3238: fprintf(stderr, "malloc of %d byte failed\n", size);
3239: return(NULL);
3240: }
1.152 daniel 3241: if (RAW == '"') {
1.45 daniel 3242: NEXT;
1.152 daniel 3243: c = CUR_CHAR(l);
1.135 daniel 3244: while (IS_CHAR(c) && (c != '"')) {
1.152 daniel 3245: if (len + 5 >= size) {
1.135 daniel 3246: size *= 2;
1.204 veillard 3247: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3248: if (buf == NULL) {
3249: fprintf(stderr, "realloc of %d byte failed\n", size);
3250: return(NULL);
3251: }
3252: }
1.152 daniel 3253: COPY_BUF(l,buf,len,c);
3254: NEXTL(l);
3255: c = CUR_CHAR(l);
1.135 daniel 3256: }
3257: if (c != '"') {
1.123 daniel 3258: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3260: ctxt->sax->error(ctxt->userData,
3261: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3262: ctxt->wellFormed = 0;
1.180 daniel 3263: ctxt->disableSAX = 1;
1.55 daniel 3264: } else {
1.45 daniel 3265: NEXT;
3266: }
1.152 daniel 3267: } else if (RAW == '\''){
1.45 daniel 3268: NEXT;
1.135 daniel 3269: c = CUR;
3270: while (IS_CHAR(c) && (c != '\'')) {
3271: if (len + 1 >= size) {
3272: size *= 2;
1.204 veillard 3273: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3274: if (buf == NULL) {
3275: fprintf(stderr, "realloc of %d byte failed\n", size);
3276: return(NULL);
3277: }
3278: }
3279: buf[len++] = c;
3280: NEXT;
3281: c = CUR;
3282: }
1.152 daniel 3283: if (RAW != '\'') {
1.123 daniel 3284: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 3285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3286: ctxt->sax->error(ctxt->userData,
3287: "String not closed \"%.50s\"\n", buf);
1.59 daniel 3288: ctxt->wellFormed = 0;
1.180 daniel 3289: ctxt->disableSAX = 1;
1.55 daniel 3290: } else {
1.45 daniel 3291: NEXT;
3292: }
3293: }
1.135 daniel 3294: return(buf);
1.45 daniel 3295: }
3296:
1.50 daniel 3297: /**
3298: * xmlParseNamespace:
3299: * @ctxt: an XML parser context
3300: *
1.45 daniel 3301: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
3302: *
3303: * This is what the older xml-name Working Draft specified, a bunch of
3304: * other stuff may still rely on it, so support is still here as
1.127 daniel 3305: * if it was declared on the root of the Tree:-(
1.110 daniel 3306: *
3307: * To be removed at next drop of binary compatibility
1.45 daniel 3308: */
3309:
1.55 daniel 3310: void
3311: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.123 daniel 3312: xmlChar *href = NULL;
3313: xmlChar *prefix = NULL;
1.45 daniel 3314: int garbage = 0;
3315:
3316: /*
3317: * We just skipped "namespace" or "xml:namespace"
3318: */
3319: SKIP_BLANKS;
3320:
1.153 daniel 3321: while (IS_CHAR(RAW) && (RAW != '>')) {
1.45 daniel 3322: /*
3323: * We can have "ns" or "prefix" attributes
3324: * Old encoding as 'href' or 'AS' attributes is still supported
3325: */
1.152 daniel 3326: if ((RAW == 'n') && (NXT(1) == 's')) {
1.45 daniel 3327: garbage = 0;
3328: SKIP(2);
3329: SKIP_BLANKS;
3330:
1.152 daniel 3331: if (RAW != '=') continue;
1.45 daniel 3332: NEXT;
3333: SKIP_BLANKS;
3334:
3335: href = xmlParseQuotedString(ctxt);
3336: SKIP_BLANKS;
1.152 daniel 3337: } else if ((RAW == 'h') && (NXT(1) == 'r') &&
1.45 daniel 3338: (NXT(2) == 'e') && (NXT(3) == 'f')) {
3339: garbage = 0;
3340: SKIP(4);
3341: SKIP_BLANKS;
3342:
1.152 daniel 3343: if (RAW != '=') continue;
1.45 daniel 3344: NEXT;
3345: SKIP_BLANKS;
3346:
3347: href = xmlParseQuotedString(ctxt);
3348: SKIP_BLANKS;
1.152 daniel 3349: } else if ((RAW == 'p') && (NXT(1) == 'r') &&
1.45 daniel 3350: (NXT(2) == 'e') && (NXT(3) == 'f') &&
3351: (NXT(4) == 'i') && (NXT(5) == 'x')) {
3352: garbage = 0;
3353: SKIP(6);
3354: SKIP_BLANKS;
3355:
1.152 daniel 3356: if (RAW != '=') continue;
1.45 daniel 3357: NEXT;
3358: SKIP_BLANKS;
3359:
3360: prefix = xmlParseQuotedString(ctxt);
3361: SKIP_BLANKS;
1.152 daniel 3362: } else if ((RAW == 'A') && (NXT(1) == 'S')) {
1.45 daniel 3363: garbage = 0;
3364: SKIP(2);
3365: SKIP_BLANKS;
3366:
1.152 daniel 3367: if (RAW != '=') continue;
1.45 daniel 3368: NEXT;
3369: SKIP_BLANKS;
3370:
3371: prefix = xmlParseQuotedString(ctxt);
3372: SKIP_BLANKS;
1.152 daniel 3373: } else if ((RAW == '?') && (NXT(1) == '>')) {
1.45 daniel 3374: garbage = 0;
1.91 daniel 3375: NEXT;
1.45 daniel 3376: } else {
3377: /*
3378: * Found garbage when parsing the namespace
3379: */
1.122 daniel 3380: if (!garbage) {
1.55 daniel 3381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3382: ctxt->sax->error(ctxt->userData,
3383: "xmlParseNamespace found garbage\n");
3384: }
1.123 daniel 3385: ctxt->errNo = XML_ERR_NS_DECL_ERROR;
1.59 daniel 3386: ctxt->wellFormed = 0;
1.180 daniel 3387: ctxt->disableSAX = 1;
1.45 daniel 3388: NEXT;
3389: }
3390: }
3391:
3392: MOVETO_ENDTAG(CUR_PTR);
3393: NEXT;
3394:
3395: /*
3396: * Register the DTD.
1.72 daniel 3397: if (href != NULL)
3398: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 3399: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 3400: */
3401:
1.119 daniel 3402: if (prefix != NULL) xmlFree(prefix);
3403: if (href != NULL) xmlFree(href);
1.45 daniel 3404: }
3405:
1.28 daniel 3406: /************************************************************************
3407: * *
3408: * The parser itself *
3409: * Relates to http://www.w3.org/TR/REC-xml *
3410: * *
3411: ************************************************************************/
1.14 veillard 3412:
1.50 daniel 3413: /**
1.97 daniel 3414: * xmlScanName:
3415: * @ctxt: an XML parser context
3416: *
3417: * Trickery: parse an XML name but without consuming the input flow
3418: * Needed for rollback cases.
3419: *
3420: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3421: * CombiningChar | Extender
3422: *
3423: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3424: *
3425: * [6] Names ::= Name (S Name)*
3426: *
3427: * Returns the Name parsed or NULL
3428: */
3429:
1.123 daniel 3430: xmlChar *
1.97 daniel 3431: xmlScanName(xmlParserCtxtPtr ctxt) {
1.123 daniel 3432: xmlChar buf[XML_MAX_NAMELEN];
1.97 daniel 3433: int len = 0;
3434:
3435: GROW;
1.152 daniel 3436: if (!IS_LETTER(RAW) && (RAW != '_') &&
3437: (RAW != ':')) {
1.97 daniel 3438: return(NULL);
3439: }
3440:
3441: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3442: (NXT(len) == '.') || (NXT(len) == '-') ||
3443: (NXT(len) == '_') || (NXT(len) == ':') ||
3444: (IS_COMBINING(NXT(len))) ||
3445: (IS_EXTENDER(NXT(len)))) {
3446: buf[len] = NXT(len);
3447: len++;
3448: if (len >= XML_MAX_NAMELEN) {
3449: fprintf(stderr,
3450: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
3451: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
3452: (NXT(len) == '.') || (NXT(len) == '-') ||
3453: (NXT(len) == '_') || (NXT(len) == ':') ||
3454: (IS_COMBINING(NXT(len))) ||
3455: (IS_EXTENDER(NXT(len))))
3456: len++;
3457: break;
3458: }
3459: }
3460: return(xmlStrndup(buf, len));
3461: }
3462:
3463: /**
1.50 daniel 3464: * xmlParseName:
3465: * @ctxt: an XML parser context
3466: *
3467: * parse an XML name.
1.22 daniel 3468: *
3469: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3470: * CombiningChar | Extender
3471: *
3472: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3473: *
3474: * [6] Names ::= Name (S Name)*
1.68 daniel 3475: *
3476: * Returns the Name parsed or NULL
1.1 veillard 3477: */
3478:
1.123 daniel 3479: xmlChar *
1.55 daniel 3480: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 3481: xmlChar buf[XML_MAX_NAMELEN + 5];
3482: int len = 0, l;
3483: int c;
1.1 veillard 3484:
1.91 daniel 3485: GROW;
1.160 daniel 3486: c = CUR_CHAR(l);
1.190 daniel 3487: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3488: (!IS_LETTER(c) && (c != '_') &&
3489: (c != ':'))) {
1.91 daniel 3490: return(NULL);
3491: }
1.40 daniel 3492:
1.190 daniel 3493: while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3494: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3495: (c == '.') || (c == '-') ||
3496: (c == '_') || (c == ':') ||
3497: (IS_COMBINING(c)) ||
3498: (IS_EXTENDER(c)))) {
1.160 daniel 3499: COPY_BUF(l,buf,len,c);
3500: NEXTL(l);
3501: c = CUR_CHAR(l);
1.91 daniel 3502: if (len >= XML_MAX_NAMELEN) {
3503: fprintf(stderr,
3504: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3505: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3506: (c == '.') || (c == '-') ||
3507: (c == '_') || (c == ':') ||
3508: (IS_COMBINING(c)) ||
3509: (IS_EXTENDER(c))) {
3510: NEXTL(l);
3511: c = CUR_CHAR(l);
1.97 daniel 3512: }
1.91 daniel 3513: break;
3514: }
3515: }
3516: return(xmlStrndup(buf, len));
1.22 daniel 3517: }
3518:
1.50 daniel 3519: /**
1.135 daniel 3520: * xmlParseStringName:
3521: * @ctxt: an XML parser context
3522: * @str: a pointer to an index in the string
3523: *
3524: * parse an XML name.
3525: *
3526: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3527: * CombiningChar | Extender
3528: *
3529: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3530: *
3531: * [6] Names ::= Name (S Name)*
3532: *
3533: * Returns the Name parsed or NULL. The str pointer
3534: * is updated to the current location in the string.
3535: */
3536:
3537: xmlChar *
3538: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 3539: xmlChar buf[XML_MAX_NAMELEN + 5];
3540: const xmlChar *cur = *str;
3541: int len = 0, l;
3542: int c;
1.135 daniel 3543:
1.176 daniel 3544: c = CUR_SCHAR(cur, l);
3545: if (!IS_LETTER(c) && (c != '_') &&
3546: (c != ':')) {
1.135 daniel 3547: return(NULL);
3548: }
3549:
1.176 daniel 3550: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3551: (c == '.') || (c == '-') ||
3552: (c == '_') || (c == ':') ||
3553: (IS_COMBINING(c)) ||
3554: (IS_EXTENDER(c))) {
3555: COPY_BUF(l,buf,len,c);
3556: cur += l;
3557: c = CUR_SCHAR(cur, l);
3558: if (len >= XML_MAX_NAMELEN) {
3559: fprintf(stderr,
3560: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
3561: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3562: (c == '.') || (c == '-') ||
3563: (c == '_') || (c == ':') ||
3564: (IS_COMBINING(c)) ||
3565: (IS_EXTENDER(c))) {
3566: cur += l;
3567: c = CUR_SCHAR(cur, l);
3568: }
3569: break;
3570: }
1.135 daniel 3571: }
1.176 daniel 3572: *str = cur;
3573: return(xmlStrndup(buf, len));
1.135 daniel 3574: }
3575:
3576: /**
1.50 daniel 3577: * xmlParseNmtoken:
3578: * @ctxt: an XML parser context
3579: *
3580: * parse an XML Nmtoken.
1.22 daniel 3581: *
3582: * [7] Nmtoken ::= (NameChar)+
3583: *
3584: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 3585: *
3586: * Returns the Nmtoken parsed or NULL
1.22 daniel 3587: */
3588:
1.123 daniel 3589: xmlChar *
1.55 daniel 3590: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.123 daniel 3591: xmlChar buf[XML_MAX_NAMELEN];
1.91 daniel 3592: int len = 0;
1.160 daniel 3593: int c,l;
1.22 daniel 3594:
1.91 daniel 3595: GROW;
1.160 daniel 3596: c = CUR_CHAR(l);
3597: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3598: (c == '.') || (c == '-') ||
3599: (c == '_') || (c == ':') ||
3600: (IS_COMBINING(c)) ||
3601: (IS_EXTENDER(c))) {
3602: COPY_BUF(l,buf,len,c);
3603: NEXTL(l);
3604: c = CUR_CHAR(l);
1.91 daniel 3605: if (len >= XML_MAX_NAMELEN) {
3606: fprintf(stderr,
3607: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
1.160 daniel 3608: while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3609: (c == '.') || (c == '-') ||
3610: (c == '_') || (c == ':') ||
3611: (IS_COMBINING(c)) ||
3612: (IS_EXTENDER(c))) {
3613: NEXTL(l);
3614: c = CUR_CHAR(l);
3615: }
1.91 daniel 3616: break;
3617: }
3618: }
1.168 daniel 3619: if (len == 0)
3620: return(NULL);
1.91 daniel 3621: return(xmlStrndup(buf, len));
1.1 veillard 3622: }
3623:
1.50 daniel 3624: /**
3625: * xmlParseEntityValue:
3626: * @ctxt: an XML parser context
1.78 daniel 3627: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 3628: *
3629: * parse a value for ENTITY decl.
1.24 daniel 3630: *
3631: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3632: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 3633: *
1.78 daniel 3634: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 3635: */
3636:
1.123 daniel 3637: xmlChar *
3638: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 3639: xmlChar *buf = NULL;
3640: int len = 0;
1.140 daniel 3641: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3642: int c, l;
1.135 daniel 3643: xmlChar stop;
1.123 daniel 3644: xmlChar *ret = NULL;
1.176 daniel 3645: const xmlChar *cur = NULL;
1.98 daniel 3646: xmlParserInputPtr input;
1.24 daniel 3647:
1.152 daniel 3648: if (RAW == '"') stop = '"';
3649: else if (RAW == '\'') stop = '\'';
1.135 daniel 3650: else {
3651: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
3652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3653: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
3654: ctxt->wellFormed = 0;
1.180 daniel 3655: ctxt->disableSAX = 1;
1.135 daniel 3656: return(NULL);
3657: }
3658: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3659: if (buf == NULL) {
3660: fprintf(stderr, "malloc of %d byte failed\n", size);
3661: return(NULL);
3662: }
1.94 daniel 3663:
1.135 daniel 3664: /*
3665: * The content of the entity definition is copied in a buffer.
3666: */
1.94 daniel 3667:
1.135 daniel 3668: ctxt->instate = XML_PARSER_ENTITY_VALUE;
3669: input = ctxt->input;
3670: GROW;
3671: NEXT;
1.152 daniel 3672: c = CUR_CHAR(l);
1.135 daniel 3673: /*
3674: * NOTE: 4.4.5 Included in Literal
3675: * When a parameter entity reference appears in a literal entity
3676: * value, ... a single or double quote character in the replacement
3677: * text is always treated as a normal data character and will not
3678: * terminate the literal.
3679: * In practice it means we stop the loop only when back at parsing
3680: * the initial entity and the quote is found
3681: */
3682: while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
1.152 daniel 3683: if (len + 5 >= size) {
1.135 daniel 3684: size *= 2;
1.204 veillard 3685: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 3686: if (buf == NULL) {
3687: fprintf(stderr, "realloc of %d byte failed\n", size);
3688: return(NULL);
1.94 daniel 3689: }
1.79 daniel 3690: }
1.152 daniel 3691: COPY_BUF(l,buf,len,c);
3692: NEXTL(l);
1.98 daniel 3693: /*
1.135 daniel 3694: * Pop-up of finished entities.
1.98 daniel 3695: */
1.152 daniel 3696: while ((RAW == 0) && (ctxt->inputNr > 1))
1.135 daniel 3697: xmlPopInput(ctxt);
1.152 daniel 3698:
3699: c = CUR_CHAR(l);
1.135 daniel 3700: if (c == 0) {
1.94 daniel 3701: GROW;
1.152 daniel 3702: c = CUR_CHAR(l);
1.79 daniel 3703: }
1.135 daniel 3704: }
3705: buf[len] = 0;
3706:
3707: /*
1.176 daniel 3708: * Raise problem w.r.t. '&' and '%' being used in non-entities
3709: * reference constructs. Note Charref will be handled in
3710: * xmlStringDecodeEntities()
3711: */
3712: cur = buf;
3713: while (*cur != 0) {
3714: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3715: xmlChar *name;
3716: xmlChar tmp = *cur;
3717:
3718: cur++;
3719: name = xmlParseStringName(ctxt, &cur);
3720: if ((name == NULL) || (*cur != ';')) {
3721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3722: ctxt->sax->error(ctxt->userData,
3723: "EntityValue: '%c' forbidden except for entities references\n",
3724: tmp);
3725: ctxt->wellFormed = 0;
1.180 daniel 3726: ctxt->disableSAX = 1;
1.176 daniel 3727: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
3728: }
3729: if ((ctxt->inSubset == 1) && (tmp == '%')) {
3730: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3731: ctxt->sax->error(ctxt->userData,
3732: "EntityValue: PEReferences forbidden in internal subset\n",
3733: tmp);
3734: ctxt->wellFormed = 0;
1.180 daniel 3735: ctxt->disableSAX = 1;
1.176 daniel 3736: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
3737: }
3738: if (name != NULL)
3739: xmlFree(name);
3740: }
3741: cur++;
3742: }
3743:
3744: /*
1.135 daniel 3745: * Then PEReference entities are substituted.
3746: */
3747: if (c != stop) {
3748: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3749: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 3750: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 3751: ctxt->wellFormed = 0;
1.180 daniel 3752: ctxt->disableSAX = 1;
1.170 daniel 3753: xmlFree(buf);
1.135 daniel 3754: } else {
3755: NEXT;
3756: /*
3757: * NOTE: 4.4.7 Bypassed
3758: * When a general entity reference appears in the EntityValue in
3759: * an entity declaration, it is bypassed and left as is.
1.176 daniel 3760: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 3761: */
3762: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3763: 0, 0, 0);
3764: if (orig != NULL)
3765: *orig = buf;
3766: else
3767: xmlFree(buf);
1.24 daniel 3768: }
3769:
3770: return(ret);
3771: }
3772:
1.50 daniel 3773: /**
3774: * xmlParseAttValue:
3775: * @ctxt: an XML parser context
3776: *
3777: * parse a value for an attribute
1.78 daniel 3778: * Note: the parser won't do substitution of entities here, this
1.113 daniel 3779: * will be handled later in xmlStringGetNodeList
1.29 daniel 3780: *
3781: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3782: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 3783: *
1.129 daniel 3784: * 3.3.3 Attribute-Value Normalization:
3785: * Before the value of an attribute is passed to the application or
3786: * checked for validity, the XML processor must normalize it as follows:
3787: * - a character reference is processed by appending the referenced
3788: * character to the attribute value
3789: * - an entity reference is processed by recursively processing the
3790: * replacement text of the entity
3791: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3792: * appending #x20 to the normalized value, except that only a single
3793: * #x20 is appended for a "#xD#xA" sequence that is part of an external
3794: * parsed entity or the literal entity value of an internal parsed entity
3795: * - other characters are processed by appending them to the normalized value
1.130 daniel 3796: * If the declared value is not CDATA, then the XML processor must further
3797: * process the normalized attribute value by discarding any leading and
3798: * trailing space (#x20) characters, and by replacing sequences of space
3799: * (#x20) characters by a single space (#x20) character.
3800: * All attributes for which no declaration has been read should be treated
3801: * by a non-validating parser as if declared CDATA.
1.129 daniel 3802: *
3803: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 3804: */
3805:
1.123 daniel 3806: xmlChar *
1.55 daniel 3807: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 3808: xmlChar limit = 0;
1.198 daniel 3809: xmlChar *buf = NULL;
3810: int len = 0;
3811: int buf_size = 0;
3812: int c, l;
1.129 daniel 3813: xmlChar *current = NULL;
3814: xmlEntityPtr ent;
3815:
1.29 daniel 3816:
1.91 daniel 3817: SHRINK;
1.151 daniel 3818: if (NXT(0) == '"') {
1.96 daniel 3819: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 3820: limit = '"';
1.40 daniel 3821: NEXT;
1.151 daniel 3822: } else if (NXT(0) == '\'') {
1.129 daniel 3823: limit = '\'';
1.96 daniel 3824: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 3825: NEXT;
1.29 daniel 3826: } else {
1.123 daniel 3827: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 3828: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3829: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 3830: ctxt->wellFormed = 0;
1.180 daniel 3831: ctxt->disableSAX = 1;
1.129 daniel 3832: return(NULL);
1.29 daniel 3833: }
3834:
1.129 daniel 3835: /*
3836: * allocate a translation buffer.
3837: */
1.198 daniel 3838: buf_size = XML_PARSER_BUFFER_SIZE;
3839: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
3840: if (buf == NULL) {
1.129 daniel 3841: perror("xmlParseAttValue: malloc failed");
3842: return(NULL);
3843: }
3844:
3845: /*
3846: * Ok loop until we reach one of the ending char or a size limit.
3847: */
1.198 daniel 3848: c = CUR_CHAR(l);
3849: while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
3850: if (c == 0) break;
1.205 veillard 3851: if (ctxt->token == '&') {
3852: static xmlChar buffer[6] = "&";
3853:
3854: if (len > buf_size - 10) {
3855: growBuffer(buf);
3856: }
3857: current = &buffer[0];
3858: while (*current != 0) {
3859: buf[len++] = *current++;
3860: }
3861: ctxt->token = 0;
3862: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 3863: int val = xmlParseCharRef(ctxt);
1.198 daniel 3864: COPY_BUF(l,buf,len,val);
3865: NEXTL(l);
3866: } else if (c == '&') {
1.129 daniel 3867: ent = xmlParseEntityRef(ctxt);
3868: if ((ent != NULL) &&
3869: (ctxt->replaceEntities != 0)) {
1.185 daniel 3870: xmlChar *rep;
3871:
1.186 daniel 3872: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3873: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 3874: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 3875: if (rep != NULL) {
3876: current = rep;
3877: while (*current != 0) {
1.198 daniel 3878: buf[len++] = *current++;
3879: if (len > buf_size - 10) {
3880: growBuffer(buf);
1.186 daniel 3881: }
1.185 daniel 3882: }
1.186 daniel 3883: xmlFree(rep);
1.129 daniel 3884: }
1.186 daniel 3885: } else {
3886: if (ent->content != NULL)
1.198 daniel 3887: buf[len++] = ent->content[0];
1.129 daniel 3888: }
3889: } else if (ent != NULL) {
3890: int i = xmlStrlen(ent->name);
3891: const xmlChar *cur = ent->name;
3892:
1.186 daniel 3893: /*
3894: * This may look absurd but is needed to detect
3895: * entities problems
3896: */
1.211 veillard 3897: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3898: (ent->content != NULL)) {
1.186 daniel 3899: xmlChar *rep;
3900: rep = xmlStringDecodeEntities(ctxt, ent->content,
3901: XML_SUBSTITUTE_REF, 0, 0, 0);
3902: if (rep != NULL)
3903: xmlFree(rep);
3904: }
3905:
3906: /*
3907: * Just output the reference
3908: */
1.198 daniel 3909: buf[len++] = '&';
3910: if (len > buf_size - i - 10) {
3911: growBuffer(buf);
1.129 daniel 3912: }
3913: for (;i > 0;i--)
1.198 daniel 3914: buf[len++] = *cur++;
3915: buf[len++] = ';';
1.129 daniel 3916: }
3917: } else {
1.198 daniel 3918: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3919: COPY_BUF(l,buf,len,0x20);
3920: if (len > buf_size - 10) {
3921: growBuffer(buf);
1.129 daniel 3922: }
3923: } else {
1.198 daniel 3924: COPY_BUF(l,buf,len,c);
3925: if (len > buf_size - 10) {
3926: growBuffer(buf);
1.129 daniel 3927: }
3928: }
1.198 daniel 3929: NEXTL(l);
1.129 daniel 3930: }
1.198 daniel 3931: GROW;
3932: c = CUR_CHAR(l);
1.129 daniel 3933: }
1.198 daniel 3934: buf[len++] = 0;
1.152 daniel 3935: if (RAW == '<') {
1.129 daniel 3936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3937: ctxt->sax->error(ctxt->userData,
3938: "Unescaped '<' not allowed in attributes values\n");
3939: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
3940: ctxt->wellFormed = 0;
1.180 daniel 3941: ctxt->disableSAX = 1;
1.152 daniel 3942: } else if (RAW != limit) {
1.129 daniel 3943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3944: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
3945: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
3946: ctxt->wellFormed = 0;
1.180 daniel 3947: ctxt->disableSAX = 1;
1.129 daniel 3948: } else
3949: NEXT;
1.198 daniel 3950: return(buf);
1.29 daniel 3951: }
3952:
1.50 daniel 3953: /**
3954: * xmlParseSystemLiteral:
3955: * @ctxt: an XML parser context
3956: *
3957: * parse an XML Literal
1.21 daniel 3958: *
1.22 daniel 3959: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 3960: *
3961: * Returns the SystemLiteral parsed or NULL
1.21 daniel 3962: */
3963:
1.123 daniel 3964: xmlChar *
1.55 daniel 3965: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 3966: xmlChar *buf = NULL;
3967: int len = 0;
1.140 daniel 3968: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 3969: int cur, l;
1.135 daniel 3970: xmlChar stop;
1.168 daniel 3971: int state = ctxt->instate;
1.21 daniel 3972:
1.91 daniel 3973: SHRINK;
1.152 daniel 3974: if (RAW == '"') {
1.40 daniel 3975: NEXT;
1.135 daniel 3976: stop = '"';
1.152 daniel 3977: } else if (RAW == '\'') {
1.40 daniel 3978: NEXT;
1.135 daniel 3979: stop = '\'';
1.21 daniel 3980: } else {
1.55 daniel 3981: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3982: ctxt->sax->error(ctxt->userData,
3983: "SystemLiteral \" or ' expected\n");
1.123 daniel 3984: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 3985: ctxt->wellFormed = 0;
1.180 daniel 3986: ctxt->disableSAX = 1;
1.135 daniel 3987: return(NULL);
1.21 daniel 3988: }
3989:
1.135 daniel 3990: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3991: if (buf == NULL) {
3992: fprintf(stderr, "malloc of %d byte failed\n", size);
3993: return(NULL);
3994: }
1.168 daniel 3995: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 3996: cur = CUR_CHAR(l);
1.135 daniel 3997: while ((IS_CHAR(cur)) && (cur != stop)) {
1.152 daniel 3998: if (len + 5 >= size) {
1.135 daniel 3999: size *= 2;
1.204 veillard 4000: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4001: if (buf == NULL) {
4002: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 4003: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4004: return(NULL);
4005: }
4006: }
1.152 daniel 4007: COPY_BUF(l,buf,len,cur);
4008: NEXTL(l);
4009: cur = CUR_CHAR(l);
1.135 daniel 4010: if (cur == 0) {
4011: GROW;
4012: SHRINK;
1.152 daniel 4013: cur = CUR_CHAR(l);
1.135 daniel 4014: }
4015: }
4016: buf[len] = 0;
1.204 veillard 4017: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 4018: if (!IS_CHAR(cur)) {
4019: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4020: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4021: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4022: ctxt->wellFormed = 0;
1.180 daniel 4023: ctxt->disableSAX = 1;
1.135 daniel 4024: } else {
4025: NEXT;
4026: }
4027: return(buf);
1.21 daniel 4028: }
4029:
1.50 daniel 4030: /**
4031: * xmlParsePubidLiteral:
4032: * @ctxt: an XML parser context
1.21 daniel 4033: *
1.50 daniel 4034: * parse an XML public literal
1.68 daniel 4035: *
4036: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4037: *
4038: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 4039: */
4040:
1.123 daniel 4041: xmlChar *
1.55 daniel 4042: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 4043: xmlChar *buf = NULL;
4044: int len = 0;
1.140 daniel 4045: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 4046: xmlChar cur;
4047: xmlChar stop;
1.125 daniel 4048:
1.91 daniel 4049: SHRINK;
1.152 daniel 4050: if (RAW == '"') {
1.40 daniel 4051: NEXT;
1.135 daniel 4052: stop = '"';
1.152 daniel 4053: } else if (RAW == '\'') {
1.40 daniel 4054: NEXT;
1.135 daniel 4055: stop = '\'';
1.21 daniel 4056: } else {
1.55 daniel 4057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4058: ctxt->sax->error(ctxt->userData,
4059: "SystemLiteral \" or ' expected\n");
1.123 daniel 4060: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.59 daniel 4061: ctxt->wellFormed = 0;
1.180 daniel 4062: ctxt->disableSAX = 1;
1.135 daniel 4063: return(NULL);
4064: }
4065: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4066: if (buf == NULL) {
4067: fprintf(stderr, "malloc of %d byte failed\n", size);
4068: return(NULL);
4069: }
4070: cur = CUR;
4071: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4072: if (len + 1 >= size) {
4073: size *= 2;
1.204 veillard 4074: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4075: if (buf == NULL) {
4076: fprintf(stderr, "realloc of %d byte failed\n", size);
4077: return(NULL);
4078: }
4079: }
4080: buf[len++] = cur;
4081: NEXT;
4082: cur = CUR;
4083: if (cur == 0) {
4084: GROW;
4085: SHRINK;
4086: cur = CUR;
4087: }
4088: }
4089: buf[len] = 0;
4090: if (cur != stop) {
4091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4093: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4094: ctxt->wellFormed = 0;
1.180 daniel 4095: ctxt->disableSAX = 1;
1.135 daniel 4096: } else {
4097: NEXT;
1.21 daniel 4098: }
1.135 daniel 4099: return(buf);
1.21 daniel 4100: }
4101:
1.50 daniel 4102: /**
4103: * xmlParseCharData:
4104: * @ctxt: an XML parser context
4105: * @cdata: int indicating whether we are within a CDATA section
4106: *
4107: * parse a CharData section.
4108: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 4109: *
1.151 daniel 4110: * The right angle bracket (>) may be represented using the string ">",
4111: * and must, for compatibility, be escaped using ">" or a character
4112: * reference when it appears in the string "]]>" in content, when that
4113: * string is not marking the end of a CDATA section.
4114: *
1.27 daniel 4115: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4116: */
4117:
1.55 daniel 4118: void
4119: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 4120: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 4121: int nbchar = 0;
1.152 daniel 4122: int cur, l;
1.27 daniel 4123:
1.91 daniel 4124: SHRINK;
1.152 daniel 4125: cur = CUR_CHAR(l);
1.190 daniel 4126: while (((cur != '<') || (ctxt->token == '<')) &&
4127: ((cur != '&') || (ctxt->token == '&')) &&
4128: (IS_CHAR(cur))) {
1.97 daniel 4129: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 4130: (NXT(2) == '>')) {
4131: if (cdata) break;
4132: else {
4133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 4134: ctxt->sax->error(ctxt->userData,
1.59 daniel 4135: "Sequence ']]>' not allowed in content\n");
1.123 daniel 4136: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.151 daniel 4137: /* Should this be relaxed ??? I see a "must here */
4138: ctxt->wellFormed = 0;
1.180 daniel 4139: ctxt->disableSAX = 1;
1.59 daniel 4140: }
4141: }
1.152 daniel 4142: COPY_BUF(l,buf,nbchar,cur);
4143: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 4144: /*
4145: * Ok the segment is to be consumed as chars.
4146: */
1.171 daniel 4147: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4148: if (areBlanks(ctxt, buf, nbchar)) {
4149: if (ctxt->sax->ignorableWhitespace != NULL)
4150: ctxt->sax->ignorableWhitespace(ctxt->userData,
4151: buf, nbchar);
4152: } else {
4153: if (ctxt->sax->characters != NULL)
4154: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4155: }
4156: }
4157: nbchar = 0;
4158: }
1.152 daniel 4159: NEXTL(l);
4160: cur = CUR_CHAR(l);
1.27 daniel 4161: }
1.91 daniel 4162: if (nbchar != 0) {
4163: /*
4164: * Ok the segment is to be consumed as chars.
4165: */
1.171 daniel 4166: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 4167: if (areBlanks(ctxt, buf, nbchar)) {
4168: if (ctxt->sax->ignorableWhitespace != NULL)
4169: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4170: } else {
4171: if (ctxt->sax->characters != NULL)
4172: ctxt->sax->characters(ctxt->userData, buf, nbchar);
4173: }
4174: }
1.45 daniel 4175: }
1.27 daniel 4176: }
4177:
1.50 daniel 4178: /**
4179: * xmlParseExternalID:
4180: * @ctxt: an XML parser context
1.123 daniel 4181: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 4182: * @strict: indicate whether we should restrict parsing to only
4183: * production [75], see NOTE below
1.50 daniel 4184: *
1.67 daniel 4185: * Parse an External ID or a Public ID
4186: *
4187: * NOTE: Productions [75] and [83] interract badly since [75] can generate
4188: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 4189: *
4190: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4191: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 4192: *
4193: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4194: *
1.68 daniel 4195: * Returns the function returns SystemLiteral and in the second
1.67 daniel 4196: * case publicID receives PubidLiteral, is strict is off
4197: * it is possible to return NULL and have publicID set.
1.22 daniel 4198: */
4199:
1.123 daniel 4200: xmlChar *
4201: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4202: xmlChar *URI = NULL;
1.22 daniel 4203:
1.91 daniel 4204: SHRINK;
1.152 daniel 4205: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 4206: (NXT(2) == 'S') && (NXT(3) == 'T') &&
4207: (NXT(4) == 'E') && (NXT(5) == 'M')) {
4208: SKIP(6);
1.59 daniel 4209: if (!IS_BLANK(CUR)) {
4210: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4211: ctxt->sax->error(ctxt->userData,
1.59 daniel 4212: "Space required after 'SYSTEM'\n");
1.123 daniel 4213: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4214: ctxt->wellFormed = 0;
1.180 daniel 4215: ctxt->disableSAX = 1;
1.59 daniel 4216: }
1.42 daniel 4217: SKIP_BLANKS;
1.39 daniel 4218: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4219: if (URI == NULL) {
1.55 daniel 4220: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4221: ctxt->sax->error(ctxt->userData,
1.39 daniel 4222: "xmlParseExternalID: SYSTEM, no URI\n");
1.123 daniel 4223: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4224: ctxt->wellFormed = 0;
1.180 daniel 4225: ctxt->disableSAX = 1;
1.59 daniel 4226: }
1.152 daniel 4227: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 4228: (NXT(2) == 'B') && (NXT(3) == 'L') &&
4229: (NXT(4) == 'I') && (NXT(5) == 'C')) {
4230: SKIP(6);
1.59 daniel 4231: if (!IS_BLANK(CUR)) {
4232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4233: ctxt->sax->error(ctxt->userData,
1.59 daniel 4234: "Space required after 'PUBLIC'\n");
1.123 daniel 4235: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4236: ctxt->wellFormed = 0;
1.180 daniel 4237: ctxt->disableSAX = 1;
1.59 daniel 4238: }
1.42 daniel 4239: SKIP_BLANKS;
1.39 daniel 4240: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 4241: if (*publicID == NULL) {
1.55 daniel 4242: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4243: ctxt->sax->error(ctxt->userData,
1.39 daniel 4244: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.123 daniel 4245: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.59 daniel 4246: ctxt->wellFormed = 0;
1.180 daniel 4247: ctxt->disableSAX = 1;
1.59 daniel 4248: }
1.67 daniel 4249: if (strict) {
4250: /*
4251: * We don't handle [83] so "S SystemLiteral" is required.
4252: */
4253: if (!IS_BLANK(CUR)) {
4254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4255: ctxt->sax->error(ctxt->userData,
1.67 daniel 4256: "Space required after the Public Identifier\n");
1.123 daniel 4257: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4258: ctxt->wellFormed = 0;
1.180 daniel 4259: ctxt->disableSAX = 1;
1.67 daniel 4260: }
4261: } else {
4262: /*
4263: * We handle [83] so we return immediately, if
4264: * "S SystemLiteral" is not detected. From a purely parsing
4265: * point of view that's a nice mess.
4266: */
1.135 daniel 4267: const xmlChar *ptr;
4268: GROW;
4269:
4270: ptr = CUR_PTR;
1.67 daniel 4271: if (!IS_BLANK(*ptr)) return(NULL);
4272:
4273: while (IS_BLANK(*ptr)) ptr++;
1.173 daniel 4274: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 4275: }
1.42 daniel 4276: SKIP_BLANKS;
1.39 daniel 4277: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 4278: if (URI == NULL) {
1.55 daniel 4279: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4280: ctxt->sax->error(ctxt->userData,
1.39 daniel 4281: "xmlParseExternalID: PUBLIC, no URI\n");
1.123 daniel 4282: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.59 daniel 4283: ctxt->wellFormed = 0;
1.180 daniel 4284: ctxt->disableSAX = 1;
1.59 daniel 4285: }
1.22 daniel 4286: }
1.39 daniel 4287: return(URI);
1.22 daniel 4288: }
4289:
1.50 daniel 4290: /**
4291: * xmlParseComment:
1.69 daniel 4292: * @ctxt: an XML parser context
1.50 daniel 4293: *
1.3 veillard 4294: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 4295: * The spec says that "For compatibility, the string "--" (double-hyphen)
4296: * must not occur within comments. "
1.22 daniel 4297: *
4298: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 4299: */
1.72 daniel 4300: void
1.114 daniel 4301: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 4302: xmlChar *buf = NULL;
1.195 daniel 4303: int len;
1.140 daniel 4304: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4305: int q, ql;
4306: int r, rl;
4307: int cur, l;
1.140 daniel 4308: xmlParserInputState state;
1.187 daniel 4309: xmlParserInputPtr input = ctxt->input;
1.3 veillard 4310:
4311: /*
1.22 daniel 4312: * Check that there is a comment right here.
1.3 veillard 4313: */
1.152 daniel 4314: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 4315: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 4316:
1.140 daniel 4317: state = ctxt->instate;
1.97 daniel 4318: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 4319: SHRINK;
1.40 daniel 4320: SKIP(4);
1.135 daniel 4321: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4322: if (buf == NULL) {
4323: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4324: ctxt->instate = state;
1.135 daniel 4325: return;
4326: }
1.152 daniel 4327: q = CUR_CHAR(ql);
4328: NEXTL(ql);
4329: r = CUR_CHAR(rl);
4330: NEXTL(rl);
4331: cur = CUR_CHAR(l);
1.195 daniel 4332: len = 0;
1.135 daniel 4333: while (IS_CHAR(cur) &&
4334: ((cur != '>') ||
4335: (r != '-') || (q != '-'))) {
1.195 daniel 4336: if ((r == '-') && (q == '-') && (len > 1)) {
1.55 daniel 4337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4338: ctxt->sax->error(ctxt->userData,
1.38 daniel 4339: "Comment must not contain '--' (double-hyphen)`\n");
1.123 daniel 4340: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.59 daniel 4341: ctxt->wellFormed = 0;
1.180 daniel 4342: ctxt->disableSAX = 1;
1.59 daniel 4343: }
1.152 daniel 4344: if (len + 5 >= size) {
1.135 daniel 4345: size *= 2;
1.204 veillard 4346: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4347: if (buf == NULL) {
4348: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4349: ctxt->instate = state;
1.135 daniel 4350: return;
4351: }
4352: }
1.152 daniel 4353: COPY_BUF(ql,buf,len,q);
1.135 daniel 4354: q = r;
1.152 daniel 4355: ql = rl;
1.135 daniel 4356: r = cur;
1.152 daniel 4357: rl = l;
4358: NEXTL(l);
4359: cur = CUR_CHAR(l);
1.135 daniel 4360: if (cur == 0) {
4361: SHRINK;
4362: GROW;
1.152 daniel 4363: cur = CUR_CHAR(l);
1.135 daniel 4364: }
1.3 veillard 4365: }
1.135 daniel 4366: buf[len] = 0;
4367: if (!IS_CHAR(cur)) {
1.55 daniel 4368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4369: ctxt->sax->error(ctxt->userData,
1.135 daniel 4370: "Comment not terminated \n<!--%.50s\n", buf);
1.123 daniel 4371: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.59 daniel 4372: ctxt->wellFormed = 0;
1.180 daniel 4373: ctxt->disableSAX = 1;
1.178 daniel 4374: xmlFree(buf);
1.3 veillard 4375: } else {
1.187 daniel 4376: if (input != ctxt->input) {
4377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4378: ctxt->sax->error(ctxt->userData,
4379: "Comment doesn't start and stop in the same entity\n");
4380: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4381: ctxt->wellFormed = 0;
4382: ctxt->disableSAX = 1;
4383: }
1.40 daniel 4384: NEXT;
1.171 daniel 4385: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4386: (!ctxt->disableSAX))
1.135 daniel 4387: ctxt->sax->comment(ctxt->userData, buf);
4388: xmlFree(buf);
1.3 veillard 4389: }
1.140 daniel 4390: ctxt->instate = state;
1.3 veillard 4391: }
4392:
1.50 daniel 4393: /**
4394: * xmlParsePITarget:
4395: * @ctxt: an XML parser context
4396: *
4397: * parse the name of a PI
1.22 daniel 4398: *
4399: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 4400: *
4401: * Returns the PITarget name or NULL
1.22 daniel 4402: */
4403:
1.123 daniel 4404: xmlChar *
1.55 daniel 4405: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 4406: xmlChar *name;
1.22 daniel 4407:
4408: name = xmlParseName(ctxt);
1.139 daniel 4409: if ((name != NULL) &&
1.22 daniel 4410: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 4411: ((name[1] == 'm') || (name[1] == 'M')) &&
4412: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 4413: int i;
1.177 daniel 4414: if ((name[0] == 'x') && (name[1] == 'm') &&
4415: (name[2] == 'l') && (name[3] == 0)) {
1.151 daniel 4416: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4417: ctxt->sax->error(ctxt->userData,
4418: "XML declaration allowed only at the start of the document\n");
4419: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4420: ctxt->wellFormed = 0;
1.180 daniel 4421: ctxt->disableSAX = 1;
1.151 daniel 4422: return(name);
4423: } else if (name[3] == 0) {
4424: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4425: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
4426: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
4427: ctxt->wellFormed = 0;
1.180 daniel 4428: ctxt->disableSAX = 1;
1.151 daniel 4429: return(name);
4430: }
1.139 daniel 4431: for (i = 0;;i++) {
4432: if (xmlW3CPIs[i] == NULL) break;
4433: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
4434: return(name);
4435: }
4436: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
4437: ctxt->sax->warning(ctxt->userData,
1.122 daniel 4438: "xmlParsePItarget: invalid name prefix 'xml'\n");
1.123 daniel 4439: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.122 daniel 4440: }
1.22 daniel 4441: }
4442: return(name);
4443: }
4444:
1.50 daniel 4445: /**
4446: * xmlParsePI:
4447: * @ctxt: an XML parser context
4448: *
4449: * parse an XML Processing Instruction.
1.22 daniel 4450: *
4451: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 4452: *
1.69 daniel 4453: * The processing is transfered to SAX once parsed.
1.3 veillard 4454: */
4455:
1.55 daniel 4456: void
4457: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 4458: xmlChar *buf = NULL;
4459: int len = 0;
1.140 daniel 4460: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 4461: int cur, l;
1.123 daniel 4462: xmlChar *target;
1.140 daniel 4463: xmlParserInputState state;
1.22 daniel 4464:
1.152 daniel 4465: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 4466: xmlParserInputPtr input = ctxt->input;
1.140 daniel 4467: state = ctxt->instate;
4468: ctxt->instate = XML_PARSER_PI;
1.3 veillard 4469: /*
4470: * this is a Processing Instruction.
4471: */
1.40 daniel 4472: SKIP(2);
1.91 daniel 4473: SHRINK;
1.3 veillard 4474:
4475: /*
1.22 daniel 4476: * Parse the target name and check for special support like
4477: * namespace.
1.3 veillard 4478: */
1.22 daniel 4479: target = xmlParsePITarget(ctxt);
4480: if (target != NULL) {
1.156 daniel 4481: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 4482: if (input != ctxt->input) {
4483: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4484: ctxt->sax->error(ctxt->userData,
4485: "PI declaration doesn't start and stop in the same entity\n");
4486: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4487: ctxt->wellFormed = 0;
4488: ctxt->disableSAX = 1;
4489: }
1.156 daniel 4490: SKIP(2);
4491:
4492: /*
4493: * SAX: PI detected.
4494: */
1.171 daniel 4495: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 4496: (ctxt->sax->processingInstruction != NULL))
4497: ctxt->sax->processingInstruction(ctxt->userData,
4498: target, NULL);
4499: ctxt->instate = state;
1.170 daniel 4500: xmlFree(target);
1.156 daniel 4501: return;
4502: }
1.135 daniel 4503: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4504: if (buf == NULL) {
4505: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 4506: ctxt->instate = state;
1.135 daniel 4507: return;
4508: }
4509: cur = CUR;
4510: if (!IS_BLANK(cur)) {
1.114 daniel 4511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4512: ctxt->sax->error(ctxt->userData,
4513: "xmlParsePI: PI %s space expected\n", target);
1.123 daniel 4514: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 4515: ctxt->wellFormed = 0;
1.180 daniel 4516: ctxt->disableSAX = 1;
1.114 daniel 4517: }
4518: SKIP_BLANKS;
1.152 daniel 4519: cur = CUR_CHAR(l);
1.135 daniel 4520: while (IS_CHAR(cur) &&
4521: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 4522: if (len + 5 >= size) {
1.135 daniel 4523: size *= 2;
1.204 veillard 4524: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 4525: if (buf == NULL) {
4526: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 4527: ctxt->instate = state;
1.135 daniel 4528: return;
4529: }
4530: }
1.152 daniel 4531: COPY_BUF(l,buf,len,cur);
4532: NEXTL(l);
4533: cur = CUR_CHAR(l);
1.135 daniel 4534: if (cur == 0) {
4535: SHRINK;
4536: GROW;
1.152 daniel 4537: cur = CUR_CHAR(l);
1.135 daniel 4538: }
4539: }
4540: buf[len] = 0;
1.152 daniel 4541: if (cur != '?') {
1.72 daniel 4542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4543: ctxt->sax->error(ctxt->userData,
1.72 daniel 4544: "xmlParsePI: PI %s never end ...\n", target);
1.123 daniel 4545: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 4546: ctxt->wellFormed = 0;
1.180 daniel 4547: ctxt->disableSAX = 1;
1.22 daniel 4548: } else {
1.187 daniel 4549: if (input != ctxt->input) {
4550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4551: ctxt->sax->error(ctxt->userData,
4552: "PI declaration doesn't start and stop in the same entity\n");
4553: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4554: ctxt->wellFormed = 0;
4555: ctxt->disableSAX = 1;
4556: }
1.72 daniel 4557: SKIP(2);
1.44 daniel 4558:
1.72 daniel 4559: /*
4560: * SAX: PI detected.
4561: */
1.171 daniel 4562: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 4563: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 4564: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 4565: target, buf);
1.22 daniel 4566: }
1.135 daniel 4567: xmlFree(buf);
1.119 daniel 4568: xmlFree(target);
1.3 veillard 4569: } else {
1.55 daniel 4570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 4571: ctxt->sax->error(ctxt->userData,
4572: "xmlParsePI : no target name\n");
1.123 daniel 4573: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.59 daniel 4574: ctxt->wellFormed = 0;
1.180 daniel 4575: ctxt->disableSAX = 1;
1.22 daniel 4576: }
1.140 daniel 4577: ctxt->instate = state;
1.22 daniel 4578: }
4579: }
4580:
1.50 daniel 4581: /**
4582: * xmlParseNotationDecl:
4583: * @ctxt: an XML parser context
4584: *
4585: * parse a notation declaration
1.22 daniel 4586: *
4587: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4588: *
4589: * Hence there is actually 3 choices:
4590: * 'PUBLIC' S PubidLiteral
4591: * 'PUBLIC' S PubidLiteral S SystemLiteral
4592: * and 'SYSTEM' S SystemLiteral
1.50 daniel 4593: *
1.67 daniel 4594: * See the NOTE on xmlParseExternalID().
1.22 daniel 4595: */
4596:
1.55 daniel 4597: void
4598: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4599: xmlChar *name;
4600: xmlChar *Pubid;
4601: xmlChar *Systemid;
1.22 daniel 4602:
1.152 daniel 4603: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4604: (NXT(2) == 'N') && (NXT(3) == 'O') &&
4605: (NXT(4) == 'T') && (NXT(5) == 'A') &&
4606: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 4607: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 4608: xmlParserInputPtr input = ctxt->input;
1.91 daniel 4609: SHRINK;
1.40 daniel 4610: SKIP(10);
1.67 daniel 4611: if (!IS_BLANK(CUR)) {
4612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4613: ctxt->sax->error(ctxt->userData,
4614: "Space required after '<!NOTATION'\n");
1.123 daniel 4615: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 4616: ctxt->wellFormed = 0;
1.180 daniel 4617: ctxt->disableSAX = 1;
1.67 daniel 4618: return;
4619: }
4620: SKIP_BLANKS;
1.22 daniel 4621:
4622: name = xmlParseName(ctxt);
4623: if (name == NULL) {
1.55 daniel 4624: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4625: ctxt->sax->error(ctxt->userData,
4626: "NOTATION: Name expected here\n");
1.123 daniel 4627: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.67 daniel 4628: ctxt->wellFormed = 0;
1.180 daniel 4629: ctxt->disableSAX = 1;
1.67 daniel 4630: return;
4631: }
4632: if (!IS_BLANK(CUR)) {
4633: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4634: ctxt->sax->error(ctxt->userData,
1.67 daniel 4635: "Space required after the NOTATION name'\n");
1.123 daniel 4636: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4637: ctxt->wellFormed = 0;
1.180 daniel 4638: ctxt->disableSAX = 1;
1.22 daniel 4639: return;
4640: }
1.42 daniel 4641: SKIP_BLANKS;
1.67 daniel 4642:
1.22 daniel 4643: /*
1.67 daniel 4644: * Parse the IDs.
1.22 daniel 4645: */
1.160 daniel 4646: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 4647: SKIP_BLANKS;
4648:
1.152 daniel 4649: if (RAW == '>') {
1.187 daniel 4650: if (input != ctxt->input) {
4651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4652: ctxt->sax->error(ctxt->userData,
4653: "Notation declaration doesn't start and stop in the same entity\n");
4654: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4655: ctxt->wellFormed = 0;
4656: ctxt->disableSAX = 1;
4657: }
1.40 daniel 4658: NEXT;
1.171 daniel 4659: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4660: (ctxt->sax->notationDecl != NULL))
1.74 daniel 4661: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 4662: } else {
4663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4664: ctxt->sax->error(ctxt->userData,
1.67 daniel 4665: "'>' required to close NOTATION declaration\n");
1.123 daniel 4666: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 4667: ctxt->wellFormed = 0;
1.180 daniel 4668: ctxt->disableSAX = 1;
1.67 daniel 4669: }
1.119 daniel 4670: xmlFree(name);
4671: if (Systemid != NULL) xmlFree(Systemid);
4672: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 4673: }
4674: }
4675:
1.50 daniel 4676: /**
4677: * xmlParseEntityDecl:
4678: * @ctxt: an XML parser context
4679: *
4680: * parse <!ENTITY declarations
1.22 daniel 4681: *
4682: * [70] EntityDecl ::= GEDecl | PEDecl
4683: *
4684: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4685: *
4686: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4687: *
4688: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4689: *
4690: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 4691: *
4692: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 4693: *
4694: * [ VC: Notation Declared ]
1.116 daniel 4695: * The Name must match the declared name of a notation.
1.22 daniel 4696: */
4697:
1.55 daniel 4698: void
4699: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4700: xmlChar *name = NULL;
4701: xmlChar *value = NULL;
4702: xmlChar *URI = NULL, *literal = NULL;
4703: xmlChar *ndata = NULL;
1.39 daniel 4704: int isParameter = 0;
1.123 daniel 4705: xmlChar *orig = NULL;
1.22 daniel 4706:
1.94 daniel 4707: GROW;
1.152 daniel 4708: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4709: (NXT(2) == 'E') && (NXT(3) == 'N') &&
4710: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 4711: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 4712: xmlParserInputPtr input = ctxt->input;
1.96 daniel 4713: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 4714: SHRINK;
1.40 daniel 4715: SKIP(8);
1.59 daniel 4716: if (!IS_BLANK(CUR)) {
4717: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4718: ctxt->sax->error(ctxt->userData,
4719: "Space required after '<!ENTITY'\n");
1.123 daniel 4720: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4721: ctxt->wellFormed = 0;
1.180 daniel 4722: ctxt->disableSAX = 1;
1.59 daniel 4723: }
4724: SKIP_BLANKS;
1.40 daniel 4725:
1.152 daniel 4726: if (RAW == '%') {
1.40 daniel 4727: NEXT;
1.59 daniel 4728: if (!IS_BLANK(CUR)) {
4729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4730: ctxt->sax->error(ctxt->userData,
4731: "Space required after '%'\n");
1.123 daniel 4732: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4733: ctxt->wellFormed = 0;
1.180 daniel 4734: ctxt->disableSAX = 1;
1.59 daniel 4735: }
1.42 daniel 4736: SKIP_BLANKS;
1.39 daniel 4737: isParameter = 1;
1.22 daniel 4738: }
4739:
4740: name = xmlParseName(ctxt);
1.24 daniel 4741: if (name == NULL) {
1.55 daniel 4742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4743: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.123 daniel 4744: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 4745: ctxt->wellFormed = 0;
1.180 daniel 4746: ctxt->disableSAX = 1;
1.24 daniel 4747: return;
4748: }
1.59 daniel 4749: if (!IS_BLANK(CUR)) {
4750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4751: ctxt->sax->error(ctxt->userData,
1.59 daniel 4752: "Space required after the entity name\n");
1.123 daniel 4753: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4754: ctxt->wellFormed = 0;
1.180 daniel 4755: ctxt->disableSAX = 1;
1.59 daniel 4756: }
1.42 daniel 4757: SKIP_BLANKS;
1.24 daniel 4758:
1.22 daniel 4759: /*
1.68 daniel 4760: * handle the various case of definitions...
1.22 daniel 4761: */
1.39 daniel 4762: if (isParameter) {
1.152 daniel 4763: if ((RAW == '"') || (RAW == '\''))
1.78 daniel 4764: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 4765: if (value) {
1.171 daniel 4766: if ((ctxt->sax != NULL) &&
4767: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4768: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4769: XML_INTERNAL_PARAMETER_ENTITY,
4770: NULL, NULL, value);
4771: }
1.24 daniel 4772: else {
1.67 daniel 4773: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4774: if ((URI == NULL) && (literal == NULL)) {
4775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4776: ctxt->sax->error(ctxt->userData,
4777: "Entity value required\n");
4778: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4779: ctxt->wellFormed = 0;
1.180 daniel 4780: ctxt->disableSAX = 1;
1.169 daniel 4781: }
1.39 daniel 4782: if (URI) {
1.193 daniel 4783: xmlURIPtr uri;
4784:
4785: uri = xmlParseURI((const char *) URI);
4786: if (uri == NULL) {
4787: if ((ctxt->sax != NULL) &&
4788: (!ctxt->disableSAX) &&
4789: (ctxt->sax->error != NULL))
4790: ctxt->sax->error(ctxt->userData,
4791: "Invalid URI: %s\n", URI);
4792: ctxt->wellFormed = 0;
4793: ctxt->errNo = XML_ERR_INVALID_URI;
4794: } else {
4795: if (uri->fragment != NULL) {
4796: if ((ctxt->sax != NULL) &&
4797: (!ctxt->disableSAX) &&
4798: (ctxt->sax->error != NULL))
4799: ctxt->sax->error(ctxt->userData,
4800: "Fragment not allowed: %s\n", URI);
4801: ctxt->wellFormed = 0;
4802: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4803: } else {
4804: if ((ctxt->sax != NULL) &&
4805: (!ctxt->disableSAX) &&
4806: (ctxt->sax->entityDecl != NULL))
4807: ctxt->sax->entityDecl(ctxt->userData, name,
4808: XML_EXTERNAL_PARAMETER_ENTITY,
4809: literal, URI, NULL);
4810: }
4811: xmlFreeURI(uri);
4812: }
1.39 daniel 4813: }
1.24 daniel 4814: }
4815: } else {
1.152 daniel 4816: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 4817: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 4818: if ((ctxt->sax != NULL) &&
4819: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4820: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4821: XML_INTERNAL_GENERAL_ENTITY,
4822: NULL, NULL, value);
4823: } else {
1.67 daniel 4824: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 4825: if ((URI == NULL) && (literal == NULL)) {
4826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4827: ctxt->sax->error(ctxt->userData,
4828: "Entity value required\n");
4829: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
4830: ctxt->wellFormed = 0;
1.180 daniel 4831: ctxt->disableSAX = 1;
1.169 daniel 4832: }
1.193 daniel 4833: if (URI) {
4834: xmlURIPtr uri;
4835:
4836: uri = xmlParseURI((const char *)URI);
4837: if (uri == NULL) {
4838: if ((ctxt->sax != NULL) &&
4839: (!ctxt->disableSAX) &&
4840: (ctxt->sax->error != NULL))
4841: ctxt->sax->error(ctxt->userData,
4842: "Invalid URI: %s\n", URI);
4843: ctxt->wellFormed = 0;
4844: ctxt->errNo = XML_ERR_INVALID_URI;
4845: } else {
4846: if (uri->fragment != NULL) {
4847: if ((ctxt->sax != NULL) &&
4848: (!ctxt->disableSAX) &&
4849: (ctxt->sax->error != NULL))
4850: ctxt->sax->error(ctxt->userData,
4851: "Fragment not allowed: %s\n", URI);
4852: ctxt->wellFormed = 0;
4853: ctxt->errNo = XML_ERR_URI_FRAGMENT;
4854: }
4855: xmlFreeURI(uri);
4856: }
4857: }
1.152 daniel 4858: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.59 daniel 4859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4860: ctxt->sax->error(ctxt->userData,
1.59 daniel 4861: "Space required before 'NDATA'\n");
1.123 daniel 4862: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4863: ctxt->wellFormed = 0;
1.180 daniel 4864: ctxt->disableSAX = 1;
1.59 daniel 4865: }
1.42 daniel 4866: SKIP_BLANKS;
1.152 daniel 4867: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 4868: (NXT(2) == 'A') && (NXT(3) == 'T') &&
4869: (NXT(4) == 'A')) {
4870: SKIP(5);
1.59 daniel 4871: if (!IS_BLANK(CUR)) {
4872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4873: ctxt->sax->error(ctxt->userData,
1.59 daniel 4874: "Space required after 'NDATA'\n");
1.123 daniel 4875: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4876: ctxt->wellFormed = 0;
1.180 daniel 4877: ctxt->disableSAX = 1;
1.59 daniel 4878: }
1.42 daniel 4879: SKIP_BLANKS;
1.24 daniel 4880: ndata = xmlParseName(ctxt);
1.171 daniel 4881: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 4882: (ctxt->sax->unparsedEntityDecl != NULL))
4883: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 4884: literal, URI, ndata);
4885: } else {
1.171 daniel 4886: if ((ctxt->sax != NULL) &&
4887: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 4888: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 4889: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4890: literal, URI, NULL);
1.24 daniel 4891: }
4892: }
4893: }
1.42 daniel 4894: SKIP_BLANKS;
1.152 daniel 4895: if (RAW != '>') {
1.55 daniel 4896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4897: ctxt->sax->error(ctxt->userData,
1.31 daniel 4898: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.123 daniel 4899: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.59 daniel 4900: ctxt->wellFormed = 0;
1.180 daniel 4901: ctxt->disableSAX = 1;
1.187 daniel 4902: } else {
4903: if (input != ctxt->input) {
4904: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4905: ctxt->sax->error(ctxt->userData,
4906: "Entity declaration doesn't start and stop in the same entity\n");
4907: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4908: ctxt->wellFormed = 0;
4909: ctxt->disableSAX = 1;
4910: }
1.40 daniel 4911: NEXT;
1.187 daniel 4912: }
1.78 daniel 4913: if (orig != NULL) {
4914: /*
1.98 daniel 4915: * Ugly mechanism to save the raw entity value.
1.78 daniel 4916: */
4917: xmlEntityPtr cur = NULL;
4918:
1.98 daniel 4919: if (isParameter) {
4920: if ((ctxt->sax != NULL) &&
4921: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 4922: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 4923: } else {
4924: if ((ctxt->sax != NULL) &&
4925: (ctxt->sax->getEntity != NULL))
1.120 daniel 4926: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 4927: }
4928: if (cur != NULL) {
4929: if (cur->orig != NULL)
1.119 daniel 4930: xmlFree(orig);
1.98 daniel 4931: else
4932: cur->orig = orig;
4933: } else
1.119 daniel 4934: xmlFree(orig);
1.78 daniel 4935: }
1.119 daniel 4936: if (name != NULL) xmlFree(name);
4937: if (value != NULL) xmlFree(value);
4938: if (URI != NULL) xmlFree(URI);
4939: if (literal != NULL) xmlFree(literal);
4940: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 4941: }
4942: }
4943:
1.50 daniel 4944: /**
1.59 daniel 4945: * xmlParseDefaultDecl:
4946: * @ctxt: an XML parser context
4947: * @value: Receive a possible fixed default value for the attribute
4948: *
4949: * Parse an attribute default declaration
4950: *
4951: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4952: *
1.99 daniel 4953: * [ VC: Required Attribute ]
1.117 daniel 4954: * if the default declaration is the keyword #REQUIRED, then the
4955: * attribute must be specified for all elements of the type in the
4956: * attribute-list declaration.
1.99 daniel 4957: *
4958: * [ VC: Attribute Default Legal ]
1.102 daniel 4959: * The declared default value must meet the lexical constraints of
4960: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 4961: *
4962: * [ VC: Fixed Attribute Default ]
1.117 daniel 4963: * if an attribute has a default value declared with the #FIXED
4964: * keyword, instances of that attribute must match the default value.
1.99 daniel 4965: *
4966: * [ WFC: No < in Attribute Values ]
4967: * handled in xmlParseAttValue()
4968: *
1.59 daniel 4969: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4970: * or XML_ATTRIBUTE_FIXED.
4971: */
4972:
4973: int
1.123 daniel 4974: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 4975: int val;
1.123 daniel 4976: xmlChar *ret;
1.59 daniel 4977:
4978: *value = NULL;
1.152 daniel 4979: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 4980: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
4981: (NXT(4) == 'U') && (NXT(5) == 'I') &&
4982: (NXT(6) == 'R') && (NXT(7) == 'E') &&
4983: (NXT(8) == 'D')) {
4984: SKIP(9);
4985: return(XML_ATTRIBUTE_REQUIRED);
4986: }
1.152 daniel 4987: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 4988: (NXT(2) == 'M') && (NXT(3) == 'P') &&
4989: (NXT(4) == 'L') && (NXT(5) == 'I') &&
4990: (NXT(6) == 'E') && (NXT(7) == 'D')) {
4991: SKIP(8);
4992: return(XML_ATTRIBUTE_IMPLIED);
4993: }
4994: val = XML_ATTRIBUTE_NONE;
1.152 daniel 4995: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 4996: (NXT(2) == 'I') && (NXT(3) == 'X') &&
4997: (NXT(4) == 'E') && (NXT(5) == 'D')) {
4998: SKIP(6);
4999: val = XML_ATTRIBUTE_FIXED;
5000: if (!IS_BLANK(CUR)) {
5001: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5002: ctxt->sax->error(ctxt->userData,
5003: "Space required after '#FIXED'\n");
1.123 daniel 5004: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5005: ctxt->wellFormed = 0;
1.180 daniel 5006: ctxt->disableSAX = 1;
1.59 daniel 5007: }
5008: SKIP_BLANKS;
5009: }
5010: ret = xmlParseAttValue(ctxt);
1.96 daniel 5011: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 5012: if (ret == NULL) {
5013: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5014: ctxt->sax->error(ctxt->userData,
1.59 daniel 5015: "Attribute default value declaration error\n");
5016: ctxt->wellFormed = 0;
1.180 daniel 5017: ctxt->disableSAX = 1;
1.59 daniel 5018: } else
5019: *value = ret;
5020: return(val);
5021: }
5022:
5023: /**
1.66 daniel 5024: * xmlParseNotationType:
5025: * @ctxt: an XML parser context
5026: *
5027: * parse an Notation attribute type.
5028: *
1.99 daniel 5029: * Note: the leading 'NOTATION' S part has already being parsed...
5030: *
1.66 daniel 5031: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5032: *
1.99 daniel 5033: * [ VC: Notation Attributes ]
1.117 daniel 5034: * Values of this type must match one of the notation names included
1.99 daniel 5035: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 5036: *
5037: * Returns: the notation attribute tree built while parsing
5038: */
5039:
5040: xmlEnumerationPtr
5041: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5042: xmlChar *name;
1.66 daniel 5043: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5044:
1.152 daniel 5045: if (RAW != '(') {
1.66 daniel 5046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5047: ctxt->sax->error(ctxt->userData,
5048: "'(' required to start 'NOTATION'\n");
1.123 daniel 5049: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 5050: ctxt->wellFormed = 0;
1.180 daniel 5051: ctxt->disableSAX = 1;
1.66 daniel 5052: return(NULL);
5053: }
1.91 daniel 5054: SHRINK;
1.66 daniel 5055: do {
5056: NEXT;
5057: SKIP_BLANKS;
5058: name = xmlParseName(ctxt);
5059: if (name == NULL) {
5060: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5061: ctxt->sax->error(ctxt->userData,
1.66 daniel 5062: "Name expected in NOTATION declaration\n");
1.123 daniel 5063: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 5064: ctxt->wellFormed = 0;
1.180 daniel 5065: ctxt->disableSAX = 1;
1.66 daniel 5066: return(ret);
5067: }
5068: cur = xmlCreateEnumeration(name);
1.119 daniel 5069: xmlFree(name);
1.66 daniel 5070: if (cur == NULL) return(ret);
5071: if (last == NULL) ret = last = cur;
5072: else {
5073: last->next = cur;
5074: last = cur;
5075: }
5076: SKIP_BLANKS;
1.152 daniel 5077: } while (RAW == '|');
5078: if (RAW != ')') {
1.66 daniel 5079: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5080: ctxt->sax->error(ctxt->userData,
1.66 daniel 5081: "')' required to finish NOTATION declaration\n");
1.123 daniel 5082: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 5083: ctxt->wellFormed = 0;
1.180 daniel 5084: ctxt->disableSAX = 1;
1.170 daniel 5085: if ((last != NULL) && (last != ret))
5086: xmlFreeEnumeration(last);
1.66 daniel 5087: return(ret);
5088: }
5089: NEXT;
5090: return(ret);
5091: }
5092:
5093: /**
5094: * xmlParseEnumerationType:
5095: * @ctxt: an XML parser context
5096: *
5097: * parse an Enumeration attribute type.
5098: *
5099: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5100: *
1.99 daniel 5101: * [ VC: Enumeration ]
1.117 daniel 5102: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 5103: * the declaration
5104: *
1.66 daniel 5105: * Returns: the enumeration attribute tree built while parsing
5106: */
5107:
5108: xmlEnumerationPtr
5109: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 5110: xmlChar *name;
1.66 daniel 5111: xmlEnumerationPtr ret = NULL, last = NULL, cur;
5112:
1.152 daniel 5113: if (RAW != '(') {
1.66 daniel 5114: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5115: ctxt->sax->error(ctxt->userData,
1.66 daniel 5116: "'(' required to start ATTLIST enumeration\n");
1.123 daniel 5117: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 5118: ctxt->wellFormed = 0;
1.180 daniel 5119: ctxt->disableSAX = 1;
1.66 daniel 5120: return(NULL);
5121: }
1.91 daniel 5122: SHRINK;
1.66 daniel 5123: do {
5124: NEXT;
5125: SKIP_BLANKS;
5126: name = xmlParseNmtoken(ctxt);
5127: if (name == NULL) {
5128: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5129: ctxt->sax->error(ctxt->userData,
1.66 daniel 5130: "NmToken expected in ATTLIST enumeration\n");
1.123 daniel 5131: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 5132: ctxt->wellFormed = 0;
1.180 daniel 5133: ctxt->disableSAX = 1;
1.66 daniel 5134: return(ret);
5135: }
5136: cur = xmlCreateEnumeration(name);
1.119 daniel 5137: xmlFree(name);
1.66 daniel 5138: if (cur == NULL) return(ret);
5139: if (last == NULL) ret = last = cur;
5140: else {
5141: last->next = cur;
5142: last = cur;
5143: }
5144: SKIP_BLANKS;
1.152 daniel 5145: } while (RAW == '|');
5146: if (RAW != ')') {
1.66 daniel 5147: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5148: ctxt->sax->error(ctxt->userData,
1.66 daniel 5149: "')' required to finish ATTLIST enumeration\n");
1.123 daniel 5150: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 5151: ctxt->wellFormed = 0;
1.180 daniel 5152: ctxt->disableSAX = 1;
1.66 daniel 5153: return(ret);
5154: }
5155: NEXT;
5156: return(ret);
5157: }
5158:
5159: /**
1.50 daniel 5160: * xmlParseEnumeratedType:
5161: * @ctxt: an XML parser context
1.66 daniel 5162: * @tree: the enumeration tree built while parsing
1.50 daniel 5163: *
1.66 daniel 5164: * parse an Enumerated attribute type.
1.22 daniel 5165: *
5166: * [57] EnumeratedType ::= NotationType | Enumeration
5167: *
5168: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5169: *
1.50 daniel 5170: *
1.66 daniel 5171: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 5172: */
5173:
1.66 daniel 5174: int
5175: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 5176: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 5177: (NXT(2) == 'T') && (NXT(3) == 'A') &&
5178: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5179: (NXT(6) == 'O') && (NXT(7) == 'N')) {
5180: SKIP(8);
5181: if (!IS_BLANK(CUR)) {
5182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5183: ctxt->sax->error(ctxt->userData,
5184: "Space required after 'NOTATION'\n");
1.123 daniel 5185: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 5186: ctxt->wellFormed = 0;
1.180 daniel 5187: ctxt->disableSAX = 1;
1.66 daniel 5188: return(0);
5189: }
5190: SKIP_BLANKS;
5191: *tree = xmlParseNotationType(ctxt);
5192: if (*tree == NULL) return(0);
5193: return(XML_ATTRIBUTE_NOTATION);
5194: }
5195: *tree = xmlParseEnumerationType(ctxt);
5196: if (*tree == NULL) return(0);
5197: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 5198: }
5199:
1.50 daniel 5200: /**
5201: * xmlParseAttributeType:
5202: * @ctxt: an XML parser context
1.66 daniel 5203: * @tree: the enumeration tree built while parsing
1.50 daniel 5204: *
1.59 daniel 5205: * parse the Attribute list def for an element
1.22 daniel 5206: *
5207: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5208: *
5209: * [55] StringType ::= 'CDATA'
5210: *
5211: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5212: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 5213: *
1.102 daniel 5214: * Validity constraints for attribute values syntax are checked in
5215: * xmlValidateAttributeValue()
5216: *
1.99 daniel 5217: * [ VC: ID ]
1.117 daniel 5218: * Values of type ID must match the Name production. A name must not
1.99 daniel 5219: * appear more than once in an XML document as a value of this type;
5220: * i.e., ID values must uniquely identify the elements which bear them.
5221: *
5222: * [ VC: One ID per Element Type ]
1.117 daniel 5223: * No element type may have more than one ID attribute specified.
1.99 daniel 5224: *
5225: * [ VC: ID Attribute Default ]
1.117 daniel 5226: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 5227: *
5228: * [ VC: IDREF ]
1.102 daniel 5229: * Values of type IDREF must match the Name production, and values
1.140 daniel 5230: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 5231: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 5232: * values must match the value of some ID attribute.
5233: *
5234: * [ VC: Entity Name ]
1.102 daniel 5235: * Values of type ENTITY must match the Name production, values
1.140 daniel 5236: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 5237: * name of an unparsed entity declared in the DTD.
1.99 daniel 5238: *
5239: * [ VC: Name Token ]
1.102 daniel 5240: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 5241: * of type NMTOKENS must match Nmtokens.
5242: *
1.69 daniel 5243: * Returns the attribute type
1.22 daniel 5244: */
1.59 daniel 5245: int
1.66 daniel 5246: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 5247: SHRINK;
1.152 daniel 5248: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 5249: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5250: (NXT(4) == 'A')) {
5251: SKIP(5);
1.66 daniel 5252: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 5253: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 5254: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 5255: (NXT(4) == 'F') && (NXT(5) == 'S')) {
5256: SKIP(6);
5257: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 5258: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 5259: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 5260: (NXT(4) == 'F')) {
5261: SKIP(5);
1.59 daniel 5262: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 5263: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 5264: SKIP(2);
5265: return(XML_ATTRIBUTE_ID);
1.152 daniel 5266: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5267: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5268: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5269: SKIP(6);
1.59 daniel 5270: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 5271: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 5272: (NXT(2) == 'T') && (NXT(3) == 'I') &&
5273: (NXT(4) == 'T') && (NXT(5) == 'I') &&
5274: (NXT(6) == 'E') && (NXT(7) == 'S')) {
5275: SKIP(8);
1.59 daniel 5276: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 5277: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 5278: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5279: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 5280: (NXT(6) == 'N') && (NXT(7) == 'S')) {
5281: SKIP(8);
5282: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 5283: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 5284: (NXT(2) == 'T') && (NXT(3) == 'O') &&
5285: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 5286: (NXT(6) == 'N')) {
5287: SKIP(7);
1.59 daniel 5288: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 5289: }
1.66 daniel 5290: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 5291: }
5292:
1.50 daniel 5293: /**
5294: * xmlParseAttributeListDecl:
5295: * @ctxt: an XML parser context
5296: *
5297: * : parse the Attribute list def for an element
1.22 daniel 5298: *
5299: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5300: *
5301: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 5302: *
1.22 daniel 5303: */
1.55 daniel 5304: void
5305: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5306: xmlChar *elemName;
5307: xmlChar *attrName;
1.103 daniel 5308: xmlEnumerationPtr tree;
1.22 daniel 5309:
1.152 daniel 5310: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5311: (NXT(2) == 'A') && (NXT(3) == 'T') &&
5312: (NXT(4) == 'T') && (NXT(5) == 'L') &&
5313: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 5314: (NXT(8) == 'T')) {
1.187 daniel 5315: xmlParserInputPtr input = ctxt->input;
5316:
1.40 daniel 5317: SKIP(9);
1.59 daniel 5318: if (!IS_BLANK(CUR)) {
5319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5320: ctxt->sax->error(ctxt->userData,
5321: "Space required after '<!ATTLIST'\n");
1.123 daniel 5322: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5323: ctxt->wellFormed = 0;
1.180 daniel 5324: ctxt->disableSAX = 1;
1.59 daniel 5325: }
1.42 daniel 5326: SKIP_BLANKS;
1.59 daniel 5327: elemName = xmlParseName(ctxt);
5328: if (elemName == NULL) {
1.55 daniel 5329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5330: ctxt->sax->error(ctxt->userData,
5331: "ATTLIST: no name for Element\n");
1.123 daniel 5332: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5333: ctxt->wellFormed = 0;
1.180 daniel 5334: ctxt->disableSAX = 1;
1.22 daniel 5335: return;
5336: }
1.42 daniel 5337: SKIP_BLANKS;
1.152 daniel 5338: while (RAW != '>') {
1.123 daniel 5339: const xmlChar *check = CUR_PTR;
1.59 daniel 5340: int type;
5341: int def;
1.123 daniel 5342: xmlChar *defaultValue = NULL;
1.59 daniel 5343:
1.103 daniel 5344: tree = NULL;
1.59 daniel 5345: attrName = xmlParseName(ctxt);
5346: if (attrName == NULL) {
5347: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5348: ctxt->sax->error(ctxt->userData,
5349: "ATTLIST: no name for Attribute\n");
1.123 daniel 5350: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5351: ctxt->wellFormed = 0;
1.180 daniel 5352: ctxt->disableSAX = 1;
1.59 daniel 5353: break;
5354: }
1.97 daniel 5355: GROW;
1.59 daniel 5356: if (!IS_BLANK(CUR)) {
5357: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5358: ctxt->sax->error(ctxt->userData,
1.59 daniel 5359: "Space required after the attribute name\n");
1.123 daniel 5360: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5361: ctxt->wellFormed = 0;
1.180 daniel 5362: ctxt->disableSAX = 1;
1.170 daniel 5363: if (attrName != NULL)
5364: xmlFree(attrName);
5365: if (defaultValue != NULL)
5366: xmlFree(defaultValue);
1.59 daniel 5367: break;
5368: }
5369: SKIP_BLANKS;
5370:
1.66 daniel 5371: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 5372: if (type <= 0) {
5373: if (attrName != NULL)
5374: xmlFree(attrName);
5375: if (defaultValue != NULL)
5376: xmlFree(defaultValue);
5377: break;
5378: }
1.22 daniel 5379:
1.97 daniel 5380: GROW;
1.59 daniel 5381: if (!IS_BLANK(CUR)) {
5382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5383: ctxt->sax->error(ctxt->userData,
1.59 daniel 5384: "Space required after the attribute type\n");
1.123 daniel 5385: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5386: ctxt->wellFormed = 0;
1.180 daniel 5387: ctxt->disableSAX = 1;
1.170 daniel 5388: if (attrName != NULL)
5389: xmlFree(attrName);
5390: if (defaultValue != NULL)
5391: xmlFree(defaultValue);
5392: if (tree != NULL)
5393: xmlFreeEnumeration(tree);
1.59 daniel 5394: break;
5395: }
1.42 daniel 5396: SKIP_BLANKS;
1.59 daniel 5397:
5398: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 5399: if (def <= 0) {
5400: if (attrName != NULL)
5401: xmlFree(attrName);
5402: if (defaultValue != NULL)
5403: xmlFree(defaultValue);
5404: if (tree != NULL)
5405: xmlFreeEnumeration(tree);
5406: break;
5407: }
1.59 daniel 5408:
1.97 daniel 5409: GROW;
1.152 daniel 5410: if (RAW != '>') {
1.59 daniel 5411: if (!IS_BLANK(CUR)) {
5412: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5413: ctxt->sax->error(ctxt->userData,
1.59 daniel 5414: "Space required after the attribute default value\n");
1.123 daniel 5415: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5416: ctxt->wellFormed = 0;
1.180 daniel 5417: ctxt->disableSAX = 1;
1.170 daniel 5418: if (attrName != NULL)
5419: xmlFree(attrName);
5420: if (defaultValue != NULL)
5421: xmlFree(defaultValue);
5422: if (tree != NULL)
5423: xmlFreeEnumeration(tree);
1.59 daniel 5424: break;
5425: }
5426: SKIP_BLANKS;
5427: }
1.40 daniel 5428: if (check == CUR_PTR) {
1.55 daniel 5429: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5430: ctxt->sax->error(ctxt->userData,
1.59 daniel 5431: "xmlParseAttributeListDecl: detected internal error\n");
1.123 daniel 5432: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.170 daniel 5433: if (attrName != NULL)
5434: xmlFree(attrName);
5435: if (defaultValue != NULL)
5436: xmlFree(defaultValue);
5437: if (tree != NULL)
5438: xmlFreeEnumeration(tree);
1.22 daniel 5439: break;
5440: }
1.171 daniel 5441: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5442: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 5443: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 5444: type, def, defaultValue, tree);
1.59 daniel 5445: if (attrName != NULL)
1.119 daniel 5446: xmlFree(attrName);
1.59 daniel 5447: if (defaultValue != NULL)
1.119 daniel 5448: xmlFree(defaultValue);
1.97 daniel 5449: GROW;
1.22 daniel 5450: }
1.187 daniel 5451: if (RAW == '>') {
5452: if (input != ctxt->input) {
5453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5454: ctxt->sax->error(ctxt->userData,
5455: "Attribute list declaration doesn't start and stop in the same entity\n");
5456: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5457: ctxt->wellFormed = 0;
5458: ctxt->disableSAX = 1;
5459: }
1.40 daniel 5460: NEXT;
1.187 daniel 5461: }
1.22 daniel 5462:
1.119 daniel 5463: xmlFree(elemName);
1.22 daniel 5464: }
5465: }
5466:
1.50 daniel 5467: /**
1.61 daniel 5468: * xmlParseElementMixedContentDecl:
5469: * @ctxt: an XML parser context
5470: *
5471: * parse the declaration for a Mixed Element content
5472: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5473: *
5474: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5475: * '(' S? '#PCDATA' S? ')'
5476: *
1.99 daniel 5477: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5478: *
5479: * [ VC: No Duplicate Types ]
1.117 daniel 5480: * The same name must not appear more than once in a single
5481: * mixed-content declaration.
1.99 daniel 5482: *
1.61 daniel 5483: * returns: the list of the xmlElementContentPtr describing the element choices
5484: */
5485: xmlElementContentPtr
1.62 daniel 5486: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 5487: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 5488: xmlChar *elem = NULL;
1.61 daniel 5489:
1.97 daniel 5490: GROW;
1.152 daniel 5491: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5492: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5493: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5494: (NXT(6) == 'A')) {
5495: SKIP(7);
5496: SKIP_BLANKS;
1.91 daniel 5497: SHRINK;
1.152 daniel 5498: if (RAW == ')') {
1.187 daniel 5499: ctxt->entity = ctxt->input;
1.63 daniel 5500: NEXT;
5501: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 5502: if (RAW == '*') {
1.136 daniel 5503: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5504: NEXT;
5505: }
1.63 daniel 5506: return(ret);
5507: }
1.152 daniel 5508: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 5509: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
5510: if (ret == NULL) return(NULL);
1.99 daniel 5511: }
1.152 daniel 5512: while (RAW == '|') {
1.64 daniel 5513: NEXT;
1.61 daniel 5514: if (elem == NULL) {
5515: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5516: if (ret == NULL) return(NULL);
5517: ret->c1 = cur;
1.64 daniel 5518: cur = ret;
1.61 daniel 5519: } else {
1.64 daniel 5520: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5521: if (n == NULL) return(NULL);
5522: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
5523: cur->c2 = n;
5524: cur = n;
1.119 daniel 5525: xmlFree(elem);
1.61 daniel 5526: }
5527: SKIP_BLANKS;
5528: elem = xmlParseName(ctxt);
5529: if (elem == NULL) {
5530: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5531: ctxt->sax->error(ctxt->userData,
1.61 daniel 5532: "xmlParseElementMixedContentDecl : Name expected\n");
1.123 daniel 5533: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 5534: ctxt->wellFormed = 0;
1.180 daniel 5535: ctxt->disableSAX = 1;
1.61 daniel 5536: xmlFreeElementContent(cur);
5537: return(NULL);
5538: }
5539: SKIP_BLANKS;
1.97 daniel 5540: GROW;
1.61 daniel 5541: }
1.152 daniel 5542: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 5543: if (elem != NULL) {
1.61 daniel 5544: cur->c2 = xmlNewElementContent(elem,
5545: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5546: xmlFree(elem);
1.66 daniel 5547: }
1.65 daniel 5548: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 5549: ctxt->entity = ctxt->input;
1.64 daniel 5550: SKIP(2);
1.61 daniel 5551: } else {
1.119 daniel 5552: if (elem != NULL) xmlFree(elem);
1.61 daniel 5553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5554: ctxt->sax->error(ctxt->userData,
1.63 daniel 5555: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.123 daniel 5556: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 5557: ctxt->wellFormed = 0;
1.180 daniel 5558: ctxt->disableSAX = 1;
1.61 daniel 5559: xmlFreeElementContent(ret);
5560: return(NULL);
5561: }
5562:
5563: } else {
5564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5565: ctxt->sax->error(ctxt->userData,
1.61 daniel 5566: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
1.123 daniel 5567: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 5568: ctxt->wellFormed = 0;
1.180 daniel 5569: ctxt->disableSAX = 1;
1.61 daniel 5570: }
5571: return(ret);
5572: }
5573:
5574: /**
5575: * xmlParseElementChildrenContentDecl:
1.50 daniel 5576: * @ctxt: an XML parser context
5577: *
1.61 daniel 5578: * parse the declaration for a Mixed Element content
5579: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 5580: *
1.61 daniel 5581: *
1.22 daniel 5582: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5583: *
5584: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5585: *
5586: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5587: *
5588: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5589: *
1.99 daniel 5590: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5591: * TODO Parameter-entity replacement text must be properly nested
5592: * with parenthetized groups. That is to say, if either of the
5593: * opening or closing parentheses in a choice, seq, or Mixed
5594: * construct is contained in the replacement text for a parameter
5595: * entity, both must be contained in the same replacement text. For
5596: * interoperability, if a parameter-entity reference appears in a
5597: * choice, seq, or Mixed construct, its replacement text should not
5598: * be empty, and neither the first nor last non-blank character of
5599: * the replacement text should be a connector (| or ,).
5600: *
1.62 daniel 5601: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 5602: * hierarchy.
5603: */
5604: xmlElementContentPtr
1.62 daniel 5605: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 5606: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 5607: xmlChar *elem;
5608: xmlChar type = 0;
1.62 daniel 5609:
5610: SKIP_BLANKS;
1.94 daniel 5611: GROW;
1.152 daniel 5612: if (RAW == '(') {
1.63 daniel 5613: /* Recurse on first child */
1.62 daniel 5614: NEXT;
5615: SKIP_BLANKS;
5616: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
5617: SKIP_BLANKS;
1.101 daniel 5618: GROW;
1.62 daniel 5619: } else {
5620: elem = xmlParseName(ctxt);
5621: if (elem == NULL) {
5622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5623: ctxt->sax->error(ctxt->userData,
1.62 daniel 5624: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5625: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5626: ctxt->wellFormed = 0;
1.180 daniel 5627: ctxt->disableSAX = 1;
1.62 daniel 5628: return(NULL);
5629: }
5630: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 5631: GROW;
1.152 daniel 5632: if (RAW == '?') {
1.104 daniel 5633: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 5634: NEXT;
1.152 daniel 5635: } else if (RAW == '*') {
1.104 daniel 5636: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 5637: NEXT;
1.152 daniel 5638: } else if (RAW == '+') {
1.104 daniel 5639: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 5640: NEXT;
5641: } else {
1.104 daniel 5642: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 5643: }
1.119 daniel 5644: xmlFree(elem);
1.101 daniel 5645: GROW;
1.62 daniel 5646: }
5647: SKIP_BLANKS;
1.91 daniel 5648: SHRINK;
1.152 daniel 5649: while (RAW != ')') {
1.63 daniel 5650: /*
5651: * Each loop we parse one separator and one element.
5652: */
1.152 daniel 5653: if (RAW == ',') {
1.62 daniel 5654: if (type == 0) type = CUR;
5655:
5656: /*
5657: * Detect "Name | Name , Name" error
5658: */
5659: else if (type != CUR) {
5660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5661: ctxt->sax->error(ctxt->userData,
1.62 daniel 5662: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5663: type);
1.123 daniel 5664: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5665: ctxt->wellFormed = 0;
1.180 daniel 5666: ctxt->disableSAX = 1;
1.170 daniel 5667: if ((op != NULL) && (op != ret))
5668: xmlFreeElementContent(op);
1.211 veillard 5669: if ((last != NULL) && (last != ret) &&
5670: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5671: xmlFreeElementContent(last);
5672: if (ret != NULL)
5673: xmlFreeElementContent(ret);
1.62 daniel 5674: return(NULL);
5675: }
1.64 daniel 5676: NEXT;
1.62 daniel 5677:
1.63 daniel 5678: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5679: if (op == NULL) {
5680: xmlFreeElementContent(ret);
5681: return(NULL);
5682: }
5683: if (last == NULL) {
5684: op->c1 = ret;
1.65 daniel 5685: ret = cur = op;
1.63 daniel 5686: } else {
5687: cur->c2 = op;
5688: op->c1 = last;
5689: cur =op;
1.65 daniel 5690: last = NULL;
1.63 daniel 5691: }
1.152 daniel 5692: } else if (RAW == '|') {
1.62 daniel 5693: if (type == 0) type = CUR;
5694:
5695: /*
1.63 daniel 5696: * Detect "Name , Name | Name" error
1.62 daniel 5697: */
5698: else if (type != CUR) {
5699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5700: ctxt->sax->error(ctxt->userData,
1.62 daniel 5701: "xmlParseElementChildrenContentDecl : '%c' expected\n",
5702: type);
1.123 daniel 5703: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 5704: ctxt->wellFormed = 0;
1.180 daniel 5705: ctxt->disableSAX = 1;
1.211 veillard 5706: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 5707: xmlFreeElementContent(op);
1.211 veillard 5708: if ((last != NULL) && (last != ret) &&
5709: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5710: xmlFreeElementContent(last);
5711: if (ret != NULL)
5712: xmlFreeElementContent(ret);
1.62 daniel 5713: return(NULL);
5714: }
1.64 daniel 5715: NEXT;
1.62 daniel 5716:
1.63 daniel 5717: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5718: if (op == NULL) {
1.170 daniel 5719: if ((op != NULL) && (op != ret))
5720: xmlFreeElementContent(op);
1.211 veillard 5721: if ((last != NULL) && (last != ret) &&
5722: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5723: xmlFreeElementContent(last);
5724: if (ret != NULL)
5725: xmlFreeElementContent(ret);
1.63 daniel 5726: return(NULL);
5727: }
5728: if (last == NULL) {
5729: op->c1 = ret;
1.65 daniel 5730: ret = cur = op;
1.63 daniel 5731: } else {
5732: cur->c2 = op;
5733: op->c1 = last;
5734: cur =op;
1.65 daniel 5735: last = NULL;
1.63 daniel 5736: }
1.62 daniel 5737: } else {
5738: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5739: ctxt->sax->error(ctxt->userData,
1.62 daniel 5740: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
5741: ctxt->wellFormed = 0;
1.180 daniel 5742: ctxt->disableSAX = 1;
1.123 daniel 5743: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.170 daniel 5744: if ((op != NULL) && (op != ret))
5745: xmlFreeElementContent(op);
1.211 veillard 5746: if ((last != NULL) && (last != ret) &&
5747: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5748: xmlFreeElementContent(last);
5749: if (ret != NULL)
5750: xmlFreeElementContent(ret);
1.62 daniel 5751: return(NULL);
5752: }
1.101 daniel 5753: GROW;
1.62 daniel 5754: SKIP_BLANKS;
1.101 daniel 5755: GROW;
1.152 daniel 5756: if (RAW == '(') {
1.63 daniel 5757: /* Recurse on second child */
1.62 daniel 5758: NEXT;
5759: SKIP_BLANKS;
1.65 daniel 5760: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 5761: SKIP_BLANKS;
5762: } else {
5763: elem = xmlParseName(ctxt);
5764: if (elem == NULL) {
5765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5766: ctxt->sax->error(ctxt->userData,
1.122 daniel 5767: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.123 daniel 5768: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 5769: ctxt->wellFormed = 0;
1.180 daniel 5770: ctxt->disableSAX = 1;
1.170 daniel 5771: if ((op != NULL) && (op != ret))
5772: xmlFreeElementContent(op);
1.211 veillard 5773: if ((last != NULL) && (last != ret) &&
5774: (last != ret->c1) && (last != ret->c2))
1.170 daniel 5775: xmlFreeElementContent(last);
5776: if (ret != NULL)
5777: xmlFreeElementContent(ret);
1.62 daniel 5778: return(NULL);
5779: }
1.65 daniel 5780: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 5781: xmlFree(elem);
1.152 daniel 5782: if (RAW == '?') {
1.105 daniel 5783: last->ocur = XML_ELEMENT_CONTENT_OPT;
5784: NEXT;
1.152 daniel 5785: } else if (RAW == '*') {
1.105 daniel 5786: last->ocur = XML_ELEMENT_CONTENT_MULT;
5787: NEXT;
1.152 daniel 5788: } else if (RAW == '+') {
1.105 daniel 5789: last->ocur = XML_ELEMENT_CONTENT_PLUS;
5790: NEXT;
5791: } else {
5792: last->ocur = XML_ELEMENT_CONTENT_ONCE;
5793: }
1.63 daniel 5794: }
5795: SKIP_BLANKS;
1.97 daniel 5796: GROW;
1.64 daniel 5797: }
1.65 daniel 5798: if ((cur != NULL) && (last != NULL)) {
5799: cur->c2 = last;
1.62 daniel 5800: }
1.187 daniel 5801: ctxt->entity = ctxt->input;
1.62 daniel 5802: NEXT;
1.152 daniel 5803: if (RAW == '?') {
1.62 daniel 5804: ret->ocur = XML_ELEMENT_CONTENT_OPT;
5805: NEXT;
1.152 daniel 5806: } else if (RAW == '*') {
1.62 daniel 5807: ret->ocur = XML_ELEMENT_CONTENT_MULT;
5808: NEXT;
1.152 daniel 5809: } else if (RAW == '+') {
1.62 daniel 5810: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5811: NEXT;
5812: }
5813: return(ret);
1.61 daniel 5814: }
5815:
5816: /**
5817: * xmlParseElementContentDecl:
5818: * @ctxt: an XML parser context
5819: * @name: the name of the element being defined.
5820: * @result: the Element Content pointer will be stored here if any
1.22 daniel 5821: *
1.61 daniel 5822: * parse the declaration for an Element content either Mixed or Children,
5823: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5824: *
5825: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 5826: *
1.61 daniel 5827: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 5828: */
5829:
1.61 daniel 5830: int
1.123 daniel 5831: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 5832: xmlElementContentPtr *result) {
5833:
5834: xmlElementContentPtr tree = NULL;
1.187 daniel 5835: xmlParserInputPtr input = ctxt->input;
1.61 daniel 5836: int res;
5837:
5838: *result = NULL;
5839:
1.152 daniel 5840: if (RAW != '(') {
1.61 daniel 5841: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5842: ctxt->sax->error(ctxt->userData,
1.61 daniel 5843: "xmlParseElementContentDecl : '(' expected\n");
1.123 daniel 5844: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 5845: ctxt->wellFormed = 0;
1.180 daniel 5846: ctxt->disableSAX = 1;
1.61 daniel 5847: return(-1);
5848: }
5849: NEXT;
1.97 daniel 5850: GROW;
1.61 daniel 5851: SKIP_BLANKS;
1.152 daniel 5852: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 5853: (NXT(2) == 'C') && (NXT(3) == 'D') &&
5854: (NXT(4) == 'A') && (NXT(5) == 'T') &&
5855: (NXT(6) == 'A')) {
1.62 daniel 5856: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 5857: res = XML_ELEMENT_TYPE_MIXED;
5858: } else {
1.62 daniel 5859: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 5860: res = XML_ELEMENT_TYPE_ELEMENT;
5861: }
1.187 daniel 5862: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
5863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5864: ctxt->sax->error(ctxt->userData,
5865: "Element content declaration doesn't start and stop in the same entity\n");
5866: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5867: ctxt->wellFormed = 0;
5868: ctxt->disableSAX = 1;
5869: }
1.61 daniel 5870: SKIP_BLANKS;
1.63 daniel 5871: /****************************
1.152 daniel 5872: if (RAW != ')') {
1.61 daniel 5873: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5874: ctxt->sax->error(ctxt->userData,
1.61 daniel 5875: "xmlParseElementContentDecl : ')' expected\n");
5876: ctxt->wellFormed = 0;
1.180 daniel 5877: ctxt->disableSAX = 1;
1.61 daniel 5878: return(-1);
5879: }
1.63 daniel 5880: ****************************/
5881: *result = tree;
1.61 daniel 5882: return(res);
1.22 daniel 5883: }
5884:
1.50 daniel 5885: /**
5886: * xmlParseElementDecl:
5887: * @ctxt: an XML parser context
5888: *
5889: * parse an Element declaration.
1.22 daniel 5890: *
5891: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5892: *
1.99 daniel 5893: * [ VC: Unique Element Type Declaration ]
1.117 daniel 5894: * No element type may be declared more than once
1.69 daniel 5895: *
5896: * Returns the type of the element, or -1 in case of error
1.22 daniel 5897: */
1.59 daniel 5898: int
1.55 daniel 5899: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 5900: xmlChar *name;
1.59 daniel 5901: int ret = -1;
1.61 daniel 5902: xmlElementContentPtr content = NULL;
1.22 daniel 5903:
1.97 daniel 5904: GROW;
1.152 daniel 5905: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 5906: (NXT(2) == 'E') && (NXT(3) == 'L') &&
5907: (NXT(4) == 'E') && (NXT(5) == 'M') &&
5908: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 5909: (NXT(8) == 'T')) {
1.187 daniel 5910: xmlParserInputPtr input = ctxt->input;
5911:
1.40 daniel 5912: SKIP(9);
1.59 daniel 5913: if (!IS_BLANK(CUR)) {
5914: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5915: ctxt->sax->error(ctxt->userData,
1.59 daniel 5916: "Space required after 'ELEMENT'\n");
1.123 daniel 5917: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5918: ctxt->wellFormed = 0;
1.180 daniel 5919: ctxt->disableSAX = 1;
1.59 daniel 5920: }
1.42 daniel 5921: SKIP_BLANKS;
1.22 daniel 5922: name = xmlParseName(ctxt);
5923: if (name == NULL) {
1.55 daniel 5924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5925: ctxt->sax->error(ctxt->userData,
1.59 daniel 5926: "xmlParseElementDecl: no name for Element\n");
1.123 daniel 5927: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5928: ctxt->wellFormed = 0;
1.180 daniel 5929: ctxt->disableSAX = 1;
1.59 daniel 5930: return(-1);
5931: }
5932: if (!IS_BLANK(CUR)) {
5933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5934: ctxt->sax->error(ctxt->userData,
1.59 daniel 5935: "Space required after the element name\n");
1.123 daniel 5936: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 5937: ctxt->wellFormed = 0;
1.180 daniel 5938: ctxt->disableSAX = 1;
1.22 daniel 5939: }
1.42 daniel 5940: SKIP_BLANKS;
1.152 daniel 5941: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 5942: (NXT(2) == 'P') && (NXT(3) == 'T') &&
5943: (NXT(4) == 'Y')) {
5944: SKIP(5);
1.22 daniel 5945: /*
5946: * Element must always be empty.
5947: */
1.59 daniel 5948: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 5949: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 5950: (NXT(2) == 'Y')) {
5951: SKIP(3);
1.22 daniel 5952: /*
5953: * Element is a generic container.
5954: */
1.59 daniel 5955: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 5956: } else if (RAW == '(') {
1.61 daniel 5957: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 5958: } else {
1.98 daniel 5959: /*
5960: * [ WFC: PEs in Internal Subset ] error handling.
5961: */
1.152 daniel 5962: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 5963: (ctxt->inputNr == 1)) {
5964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5965: ctxt->sax->error(ctxt->userData,
5966: "PEReference: forbidden within markup decl in internal subset\n");
1.123 daniel 5967: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 5968: } else {
5969: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5970: ctxt->sax->error(ctxt->userData,
5971: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
1.123 daniel 5972: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 5973: }
1.61 daniel 5974: ctxt->wellFormed = 0;
1.180 daniel 5975: ctxt->disableSAX = 1;
1.119 daniel 5976: if (name != NULL) xmlFree(name);
1.61 daniel 5977: return(-1);
1.22 daniel 5978: }
1.142 daniel 5979:
5980: SKIP_BLANKS;
5981: /*
5982: * Pop-up of finished entities.
5983: */
1.152 daniel 5984: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 5985: xmlPopInput(ctxt);
1.42 daniel 5986: SKIP_BLANKS;
1.142 daniel 5987:
1.152 daniel 5988: if (RAW != '>') {
1.55 daniel 5989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5990: ctxt->sax->error(ctxt->userData,
1.31 daniel 5991: "xmlParseElementDecl: expected '>' at the end\n");
1.123 daniel 5992: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 5993: ctxt->wellFormed = 0;
1.180 daniel 5994: ctxt->disableSAX = 1;
1.61 daniel 5995: } else {
1.187 daniel 5996: if (input != ctxt->input) {
5997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5998: ctxt->sax->error(ctxt->userData,
5999: "Element declaration doesn't start and stop in the same entity\n");
6000: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6001: ctxt->wellFormed = 0;
6002: ctxt->disableSAX = 1;
6003: }
6004:
1.40 daniel 6005: NEXT;
1.171 daniel 6006: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6007: (ctxt->sax->elementDecl != NULL))
1.76 daniel 6008: ctxt->sax->elementDecl(ctxt->userData, name, ret,
6009: content);
1.61 daniel 6010: }
1.84 daniel 6011: if (content != NULL) {
6012: xmlFreeElementContent(content);
6013: }
1.61 daniel 6014: if (name != NULL) {
1.119 daniel 6015: xmlFree(name);
1.61 daniel 6016: }
1.22 daniel 6017: }
1.59 daniel 6018: return(ret);
1.22 daniel 6019: }
6020:
1.50 daniel 6021: /**
6022: * xmlParseMarkupDecl:
6023: * @ctxt: an XML parser context
6024: *
6025: * parse Markup declarations
1.22 daniel 6026: *
6027: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6028: * NotationDecl | PI | Comment
6029: *
1.98 daniel 6030: * [ VC: Proper Declaration/PE Nesting ]
6031: * TODO Parameter-entity replacement text must be properly nested with
6032: * markup declarations. That is to say, if either the first character
6033: * or the last character of a markup declaration (markupdecl above) is
6034: * contained in the replacement text for a parameter-entity reference,
6035: * both must be contained in the same replacement text.
6036: *
6037: * [ WFC: PEs in Internal Subset ]
6038: * In the internal DTD subset, parameter-entity references can occur
6039: * only where markup declarations can occur, not within markup declarations.
6040: * (This does not apply to references that occur in external parameter
6041: * entities or to the external subset.)
1.22 daniel 6042: */
1.55 daniel 6043: void
6044: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 6045: GROW;
1.22 daniel 6046: xmlParseElementDecl(ctxt);
6047: xmlParseAttributeListDecl(ctxt);
6048: xmlParseEntityDecl(ctxt);
6049: xmlParseNotationDecl(ctxt);
6050: xmlParsePI(ctxt);
1.114 daniel 6051: xmlParseComment(ctxt);
1.98 daniel 6052: /*
6053: * This is only for internal subset. On external entities,
6054: * the replacement is done before parsing stage
6055: */
6056: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6057: xmlParsePEReference(ctxt);
1.97 daniel 6058: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 6059: }
6060:
1.50 daniel 6061: /**
1.76 daniel 6062: * xmlParseTextDecl:
6063: * @ctxt: an XML parser context
6064: *
6065: * parse an XML declaration header for external entities
6066: *
6067: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 6068: *
6069: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 6070: */
6071:
1.172 daniel 6072: void
1.76 daniel 6073: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6074: xmlChar *version;
1.76 daniel 6075:
6076: /*
6077: * We know that '<?xml' is here.
6078: */
1.193 daniel 6079: if ((RAW == '<') && (NXT(1) == '?') &&
6080: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6081: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6082: SKIP(5);
6083: } else {
6084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6085: ctxt->sax->error(ctxt->userData,
6086: "Text declaration '<?xml' required\n");
6087: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6088: ctxt->wellFormed = 0;
6089: ctxt->disableSAX = 1;
6090:
6091: return;
6092: }
1.76 daniel 6093:
6094: if (!IS_BLANK(CUR)) {
6095: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6096: ctxt->sax->error(ctxt->userData,
6097: "Space needed after '<?xml'\n");
1.123 daniel 6098: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6099: ctxt->wellFormed = 0;
1.180 daniel 6100: ctxt->disableSAX = 1;
1.76 daniel 6101: }
6102: SKIP_BLANKS;
6103:
6104: /*
6105: * We may have the VersionInfo here.
6106: */
6107: version = xmlParseVersionInfo(ctxt);
6108: if (version == NULL)
6109: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 6110: ctxt->input->version = version;
1.76 daniel 6111:
6112: /*
6113: * We must have the encoding declaration
6114: */
6115: if (!IS_BLANK(CUR)) {
6116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6117: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.123 daniel 6118: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 6119: ctxt->wellFormed = 0;
1.180 daniel 6120: ctxt->disableSAX = 1;
1.76 daniel 6121: }
1.195 daniel 6122: xmlParseEncodingDecl(ctxt);
1.193 daniel 6123: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6124: /*
6125: * The XML REC instructs us to stop parsing right here
6126: */
6127: return;
6128: }
1.76 daniel 6129:
6130: SKIP_BLANKS;
1.152 daniel 6131: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 6132: SKIP(2);
1.152 daniel 6133: } else if (RAW == '>') {
1.76 daniel 6134: /* Deprecated old WD ... */
6135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6136: ctxt->sax->error(ctxt->userData,
6137: "XML declaration must end-up with '?>'\n");
1.123 daniel 6138: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6139: ctxt->wellFormed = 0;
1.180 daniel 6140: ctxt->disableSAX = 1;
1.76 daniel 6141: NEXT;
6142: } else {
6143: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6144: ctxt->sax->error(ctxt->userData,
6145: "parsing XML declaration: '?>' expected\n");
1.123 daniel 6146: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 6147: ctxt->wellFormed = 0;
1.180 daniel 6148: ctxt->disableSAX = 1;
1.76 daniel 6149: MOVETO_ENDTAG(CUR_PTR);
6150: NEXT;
6151: }
6152: }
6153:
6154: /*
6155: * xmlParseConditionalSections
6156: * @ctxt: an XML parser context
6157: *
6158: * TODO : Conditionnal section are not yet supported !
6159: *
6160: * [61] conditionalSect ::= includeSect | ignoreSect
6161: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6162: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6163: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6164: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6165: */
6166:
6167: void
6168: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 6169: SKIP(3);
6170: SKIP_BLANKS;
1.168 daniel 6171: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6172: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6173: (NXT(6) == 'E')) {
1.165 daniel 6174: SKIP(7);
1.168 daniel 6175: SKIP_BLANKS;
6176: if (RAW != '[') {
6177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6178: ctxt->sax->error(ctxt->userData,
6179: "XML conditional section '[' expected\n");
6180: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6181: ctxt->wellFormed = 0;
1.180 daniel 6182: ctxt->disableSAX = 1;
1.168 daniel 6183: } else {
6184: NEXT;
6185: }
1.165 daniel 6186: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6187: (NXT(2) != '>'))) {
6188: const xmlChar *check = CUR_PTR;
6189: int cons = ctxt->input->consumed;
6190: int tok = ctxt->token;
6191:
6192: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6193: xmlParseConditionalSections(ctxt);
6194: } else if (IS_BLANK(CUR)) {
6195: NEXT;
6196: } else if (RAW == '%') {
6197: xmlParsePEReference(ctxt);
6198: } else
6199: xmlParseMarkupDecl(ctxt);
6200:
6201: /*
6202: * Pop-up of finished entities.
6203: */
6204: while ((RAW == 0) && (ctxt->inputNr > 1))
6205: xmlPopInput(ctxt);
6206:
6207: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6208: (tok == ctxt->token)) {
6209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6210: ctxt->sax->error(ctxt->userData,
6211: "Content error in the external subset\n");
6212: ctxt->wellFormed = 0;
1.180 daniel 6213: ctxt->disableSAX = 1;
1.165 daniel 6214: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6215: break;
6216: }
6217: }
1.168 daniel 6218: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6219: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 6220: int state;
6221:
1.168 daniel 6222: SKIP(6);
6223: SKIP_BLANKS;
6224: if (RAW != '[') {
6225: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6226: ctxt->sax->error(ctxt->userData,
6227: "XML conditional section '[' expected\n");
6228: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6229: ctxt->wellFormed = 0;
1.180 daniel 6230: ctxt->disableSAX = 1;
1.168 daniel 6231: } else {
6232: NEXT;
6233: }
1.171 daniel 6234:
1.143 daniel 6235: /*
1.171 daniel 6236: * Parse up to the end of the conditionnal section
6237: * But disable SAX event generating DTD building in the meantime
1.143 daniel 6238: */
1.171 daniel 6239: state = ctxt->disableSAX;
1.165 daniel 6240: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6241: (NXT(2) != '>'))) {
1.171 daniel 6242: const xmlChar *check = CUR_PTR;
6243: int cons = ctxt->input->consumed;
6244: int tok = ctxt->token;
6245:
6246: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6247: xmlParseConditionalSections(ctxt);
6248: } else if (IS_BLANK(CUR)) {
6249: NEXT;
6250: } else if (RAW == '%') {
6251: xmlParsePEReference(ctxt);
6252: } else
6253: xmlParseMarkupDecl(ctxt);
6254:
1.165 daniel 6255: /*
6256: * Pop-up of finished entities.
6257: */
6258: while ((RAW == 0) && (ctxt->inputNr > 1))
6259: xmlPopInput(ctxt);
1.143 daniel 6260:
1.171 daniel 6261: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6262: (tok == ctxt->token)) {
6263: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6264: ctxt->sax->error(ctxt->userData,
6265: "Content error in the external subset\n");
6266: ctxt->wellFormed = 0;
1.180 daniel 6267: ctxt->disableSAX = 1;
1.171 daniel 6268: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6269: break;
6270: }
1.165 daniel 6271: }
1.171 daniel 6272: ctxt->disableSAX = state;
1.168 daniel 6273: } else {
6274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6275: ctxt->sax->error(ctxt->userData,
6276: "XML conditional section INCLUDE or IGNORE keyword expected\n");
6277: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6278: ctxt->wellFormed = 0;
1.180 daniel 6279: ctxt->disableSAX = 1;
1.143 daniel 6280: }
6281:
1.152 daniel 6282: if (RAW == 0)
1.143 daniel 6283: SHRINK;
6284:
1.152 daniel 6285: if (RAW == 0) {
1.76 daniel 6286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6287: ctxt->sax->error(ctxt->userData,
6288: "XML conditional section not closed\n");
1.123 daniel 6289: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 6290: ctxt->wellFormed = 0;
1.180 daniel 6291: ctxt->disableSAX = 1;
1.143 daniel 6292: } else {
6293: SKIP(3);
1.76 daniel 6294: }
6295: }
6296:
6297: /**
1.124 daniel 6298: * xmlParseExternalSubset:
1.76 daniel 6299: * @ctxt: an XML parser context
1.124 daniel 6300: * @ExternalID: the external identifier
6301: * @SystemID: the system identifier (or URL)
1.76 daniel 6302: *
6303: * parse Markup declarations from an external subset
6304: *
6305: * [30] extSubset ::= textDecl? extSubsetDecl
6306: *
6307: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6308: */
6309: void
1.123 daniel 6310: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6311: const xmlChar *SystemID) {
1.132 daniel 6312: GROW;
1.152 daniel 6313: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 6314: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6315: (NXT(4) == 'l')) {
1.172 daniel 6316: xmlParseTextDecl(ctxt);
1.193 daniel 6317: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6318: /*
6319: * The XML REC instructs us to stop parsing right here
6320: */
6321: ctxt->instate = XML_PARSER_EOF;
6322: return;
6323: }
1.76 daniel 6324: }
1.79 daniel 6325: if (ctxt->myDoc == NULL) {
1.116 daniel 6326: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 6327: }
6328: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6329: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6330:
1.96 daniel 6331: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 6332: ctxt->external = 1;
1.152 daniel 6333: while (((RAW == '<') && (NXT(1) == '?')) ||
6334: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 6335: IS_BLANK(CUR)) {
1.123 daniel 6336: const xmlChar *check = CUR_PTR;
1.115 daniel 6337: int cons = ctxt->input->consumed;
1.164 daniel 6338: int tok = ctxt->token;
1.115 daniel 6339:
1.152 daniel 6340: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 6341: xmlParseConditionalSections(ctxt);
6342: } else if (IS_BLANK(CUR)) {
6343: NEXT;
1.152 daniel 6344: } else if (RAW == '%') {
1.76 daniel 6345: xmlParsePEReference(ctxt);
6346: } else
6347: xmlParseMarkupDecl(ctxt);
1.77 daniel 6348:
6349: /*
6350: * Pop-up of finished entities.
6351: */
1.166 daniel 6352: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 6353: xmlPopInput(ctxt);
6354:
1.164 daniel 6355: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6356: (tok == ctxt->token)) {
1.115 daniel 6357: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6358: ctxt->sax->error(ctxt->userData,
6359: "Content error in the external subset\n");
6360: ctxt->wellFormed = 0;
1.180 daniel 6361: ctxt->disableSAX = 1;
1.123 daniel 6362: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 6363: break;
6364: }
1.76 daniel 6365: }
6366:
1.152 daniel 6367: if (RAW != 0) {
1.76 daniel 6368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6369: ctxt->sax->error(ctxt->userData,
6370: "Extra content at the end of the document\n");
1.123 daniel 6371: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 6372: ctxt->wellFormed = 0;
1.180 daniel 6373: ctxt->disableSAX = 1;
1.76 daniel 6374: }
6375:
6376: }
6377:
6378: /**
1.77 daniel 6379: * xmlParseReference:
6380: * @ctxt: an XML parser context
6381: *
6382: * parse and handle entity references in content, depending on the SAX
6383: * interface, this may end-up in a call to character() if this is a
1.79 daniel 6384: * CharRef, a predefined entity, if there is no reference() callback.
6385: * or if the parser was asked to switch to that mode.
1.77 daniel 6386: *
6387: * [67] Reference ::= EntityRef | CharRef
6388: */
6389: void
6390: xmlParseReference(xmlParserCtxtPtr ctxt) {
6391: xmlEntityPtr ent;
1.123 daniel 6392: xmlChar *val;
1.152 daniel 6393: if (RAW != '&') return;
1.77 daniel 6394:
1.113 daniel 6395: if (ctxt->inputNr > 1) {
1.123 daniel 6396: xmlChar cur[2] = { '&' , 0 } ;
1.113 daniel 6397:
1.171 daniel 6398: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6399: (!ctxt->disableSAX))
1.113 daniel 6400: ctxt->sax->characters(ctxt->userData, cur, 1);
6401: if (ctxt->token == '&')
6402: ctxt->token = 0;
6403: else {
6404: SKIP(1);
6405: }
6406: return;
6407: }
1.77 daniel 6408: if (NXT(1) == '#') {
1.152 daniel 6409: int i = 0;
1.153 daniel 6410: xmlChar out[10];
6411: int hex = NXT(2);
1.77 daniel 6412: int val = xmlParseCharRef(ctxt);
1.152 daniel 6413:
1.198 daniel 6414: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 6415: /*
6416: * So we are using non-UTF-8 buffers
6417: * Check that the char fit on 8bits, if not
6418: * generate a CharRef.
6419: */
6420: if (val <= 0xFF) {
6421: out[0] = val;
6422: out[1] = 0;
1.171 daniel 6423: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6424: (!ctxt->disableSAX))
1.153 daniel 6425: ctxt->sax->characters(ctxt->userData, out, 1);
6426: } else {
6427: if ((hex == 'x') || (hex == 'X'))
6428: sprintf((char *)out, "#x%X", val);
6429: else
6430: sprintf((char *)out, "#%d", val);
1.171 daniel 6431: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6432: (!ctxt->disableSAX))
1.153 daniel 6433: ctxt->sax->reference(ctxt->userData, out);
6434: }
6435: } else {
6436: /*
6437: * Just encode the value in UTF-8
6438: */
6439: COPY_BUF(0 ,out, i, val);
6440: out[i] = 0;
1.171 daniel 6441: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6442: (!ctxt->disableSAX))
1.153 daniel 6443: ctxt->sax->characters(ctxt->userData, out, i);
6444: }
1.77 daniel 6445: } else {
6446: ent = xmlParseEntityRef(ctxt);
6447: if (ent == NULL) return;
6448: if ((ent->name != NULL) &&
1.159 daniel 6449: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 6450: xmlNodePtr list = NULL;
6451: int ret;
6452:
6453:
6454: /*
6455: * The first reference to the entity trigger a parsing phase
6456: * where the ent->children is filled with the result from
6457: * the parsing.
6458: */
6459: if (ent->children == NULL) {
6460: xmlChar *value;
6461: value = ent->content;
6462:
6463: /*
6464: * Check that this entity is well formed
6465: */
6466: if ((value != NULL) &&
6467: (value[1] == 0) && (value[0] == '<') &&
6468: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
6469: /*
6470: * TODO: get definite answer on this !!!
6471: * Lots of entity decls are used to declare a single
6472: * char
6473: * <!ENTITY lt "<">
6474: * Which seems to be valid since
6475: * 2.4: The ampersand character (&) and the left angle
6476: * bracket (<) may appear in their literal form only
6477: * when used ... They are also legal within the literal
6478: * entity value of an internal entity declaration;i
6479: * see "4.3.2 Well-Formed Parsed Entities".
6480: * IMHO 2.4 and 4.3.2 are directly in contradiction.
6481: * Looking at the OASIS test suite and James Clark
6482: * tests, this is broken. However the XML REC uses
6483: * it. Is the XML REC not well-formed ????
6484: * This is a hack to avoid this problem
6485: */
6486: list = xmlNewDocText(ctxt->myDoc, value);
6487: if (list != NULL) {
6488: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6489: (ent->children == NULL)) {
6490: ent->children = list;
6491: ent->last = list;
6492: list->parent = (xmlNodePtr) ent;
6493: } else {
6494: xmlFreeNodeList(list);
6495: }
6496: } else if (list != NULL) {
6497: xmlFreeNodeList(list);
6498: }
1.181 daniel 6499: } else {
1.180 daniel 6500: /*
6501: * 4.3.2: An internal general parsed entity is well-formed
6502: * if its replacement text matches the production labeled
6503: * content.
6504: */
1.185 daniel 6505: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6506: ctxt->depth++;
1.180 daniel 6507: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 6508: ctxt->sax, NULL, ctxt->depth,
6509: value, &list);
6510: ctxt->depth--;
6511: } else if (ent->etype ==
6512: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6513: ctxt->depth++;
1.180 daniel 6514: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 6515: ctxt->sax, NULL, ctxt->depth,
6516: ent->SystemID, ent->ExternalID, &list);
6517: ctxt->depth--;
6518: } else {
1.180 daniel 6519: ret = -1;
6520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6521: ctxt->sax->error(ctxt->userData,
6522: "Internal: invalid entity type\n");
6523: }
1.185 daniel 6524: if (ret == XML_ERR_ENTITY_LOOP) {
6525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6526: ctxt->sax->error(ctxt->userData,
6527: "Detected entity reference loop\n");
6528: ctxt->wellFormed = 0;
6529: ctxt->disableSAX = 1;
6530: ctxt->errNo = XML_ERR_ENTITY_LOOP;
6531: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 6532: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6533: (ent->children == NULL)) {
6534: ent->children = list;
6535: while (list != NULL) {
6536: list->parent = (xmlNodePtr) ent;
6537: if (list->next == NULL)
6538: ent->last = list;
6539: list = list->next;
6540: }
6541: } else {
6542: xmlFreeNodeList(list);
6543: }
6544: } else if (ret > 0) {
6545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6546: ctxt->sax->error(ctxt->userData,
6547: "Entity value required\n");
6548: ctxt->errNo = ret;
6549: ctxt->wellFormed = 0;
6550: ctxt->disableSAX = 1;
6551: } else if (list != NULL) {
6552: xmlFreeNodeList(list);
6553: }
6554: }
6555: }
1.113 daniel 6556: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 6557: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 6558: /*
6559: * Create a node.
6560: */
6561: ctxt->sax->reference(ctxt->userData, ent->name);
6562: return;
6563: } else if (ctxt->replaceEntities) {
6564: xmlParserInputPtr input;
1.79 daniel 6565:
1.113 daniel 6566: input = xmlNewEntityInputStream(ctxt, ent);
6567: xmlPushInput(ctxt, input);
1.167 daniel 6568: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
6569: (RAW == '<') && (NXT(1) == '?') &&
6570: (NXT(2) == 'x') && (NXT(3) == 'm') &&
6571: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 6572: xmlParseTextDecl(ctxt);
1.193 daniel 6573: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6574: /*
6575: * The XML REC instructs us to stop parsing right here
6576: */
6577: ctxt->instate = XML_PARSER_EOF;
6578: return;
6579: }
1.199 daniel 6580: if (input->standalone == 1) {
1.167 daniel 6581: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6582: ctxt->sax->error(ctxt->userData,
6583: "external parsed entities cannot be standalone\n");
6584: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
6585: ctxt->wellFormed = 0;
1.180 daniel 6586: ctxt->disableSAX = 1;
1.167 daniel 6587: }
6588: }
1.179 daniel 6589: /*
6590: * !!! TODO: build the tree under the entity first
6591: * 1234
6592: */
1.113 daniel 6593: return;
6594: }
1.77 daniel 6595: }
6596: val = ent->content;
6597: if (val == NULL) return;
6598: /*
6599: * inline the entity.
6600: */
1.171 daniel 6601: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6602: (!ctxt->disableSAX))
1.77 daniel 6603: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6604: }
1.24 daniel 6605: }
6606:
1.50 daniel 6607: /**
6608: * xmlParseEntityRef:
6609: * @ctxt: an XML parser context
6610: *
6611: * parse ENTITY references declarations
1.24 daniel 6612: *
6613: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 6614: *
1.98 daniel 6615: * [ WFC: Entity Declared ]
6616: * In a document without any DTD, a document with only an internal DTD
6617: * subset which contains no parameter entity references, or a document
6618: * with "standalone='yes'", the Name given in the entity reference
6619: * must match that in an entity declaration, except that well-formed
6620: * documents need not declare any of the following entities: amp, lt,
6621: * gt, apos, quot. The declaration of a parameter entity must precede
6622: * any reference to it. Similarly, the declaration of a general entity
6623: * must precede any reference to it which appears in a default value in an
6624: * attribute-list declaration. Note that if entities are declared in the
6625: * external subset or in external parameter entities, a non-validating
6626: * processor is not obligated to read and process their declarations;
6627: * for such documents, the rule that an entity must be declared is a
6628: * well-formedness constraint only if standalone='yes'.
6629: *
6630: * [ WFC: Parsed Entity ]
6631: * An entity reference must not contain the name of an unparsed entity
6632: *
1.77 daniel 6633: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 6634: */
1.77 daniel 6635: xmlEntityPtr
1.55 daniel 6636: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 6637: xmlChar *name;
1.72 daniel 6638: xmlEntityPtr ent = NULL;
1.24 daniel 6639:
1.91 daniel 6640: GROW;
1.111 daniel 6641:
1.152 daniel 6642: if (RAW == '&') {
1.40 daniel 6643: NEXT;
1.24 daniel 6644: name = xmlParseName(ctxt);
6645: if (name == NULL) {
1.55 daniel 6646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6647: ctxt->sax->error(ctxt->userData,
6648: "xmlParseEntityRef: no name\n");
1.123 daniel 6649: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 6650: ctxt->wellFormed = 0;
1.180 daniel 6651: ctxt->disableSAX = 1;
1.24 daniel 6652: } else {
1.152 daniel 6653: if (RAW == ';') {
1.40 daniel 6654: NEXT;
1.24 daniel 6655: /*
1.77 daniel 6656: * Ask first SAX for entity resolution, otherwise try the
6657: * predefined set.
6658: */
6659: if (ctxt->sax != NULL) {
6660: if (ctxt->sax->getEntity != NULL)
6661: ent = ctxt->sax->getEntity(ctxt->userData, name);
6662: if (ent == NULL)
6663: ent = xmlGetPredefinedEntity(name);
6664: }
6665: /*
1.98 daniel 6666: * [ WFC: Entity Declared ]
6667: * In a document without any DTD, a document with only an
6668: * internal DTD subset which contains no parameter entity
6669: * references, or a document with "standalone='yes'", the
6670: * Name given in the entity reference must match that in an
6671: * entity declaration, except that well-formed documents
6672: * need not declare any of the following entities: amp, lt,
6673: * gt, apos, quot.
6674: * The declaration of a parameter entity must precede any
6675: * reference to it.
6676: * Similarly, the declaration of a general entity must
6677: * precede any reference to it which appears in a default
6678: * value in an attribute-list declaration. Note that if
6679: * entities are declared in the external subset or in
6680: * external parameter entities, a non-validating processor
6681: * is not obligated to read and process their declarations;
6682: * for such documents, the rule that an entity must be
6683: * declared is a well-formedness constraint only if
6684: * standalone='yes'.
1.59 daniel 6685: */
1.77 daniel 6686: if (ent == NULL) {
1.98 daniel 6687: if ((ctxt->standalone == 1) ||
6688: ((ctxt->hasExternalSubset == 0) &&
6689: (ctxt->hasPErefs == 0))) {
6690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 6691: ctxt->sax->error(ctxt->userData,
6692: "Entity '%s' not defined\n", name);
1.123 daniel 6693: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.77 daniel 6694: ctxt->wellFormed = 0;
1.180 daniel 6695: ctxt->disableSAX = 1;
1.77 daniel 6696: } else {
1.98 daniel 6697: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6698: ctxt->sax->warning(ctxt->userData,
6699: "Entity '%s' not defined\n", name);
1.123 daniel 6700: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.59 daniel 6701: }
1.77 daniel 6702: }
1.59 daniel 6703:
6704: /*
1.98 daniel 6705: * [ WFC: Parsed Entity ]
6706: * An entity reference must not contain the name of an
6707: * unparsed entity
6708: */
1.159 daniel 6709: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.98 daniel 6710: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6711: ctxt->sax->error(ctxt->userData,
6712: "Entity reference to unparsed entity %s\n", name);
1.123 daniel 6713: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 6714: ctxt->wellFormed = 0;
1.180 daniel 6715: ctxt->disableSAX = 1;
1.98 daniel 6716: }
6717:
6718: /*
6719: * [ WFC: No External Entity References ]
6720: * Attribute values cannot contain direct or indirect
6721: * entity references to external entities.
6722: */
6723: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6724: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.98 daniel 6725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6726: ctxt->sax->error(ctxt->userData,
6727: "Attribute references external entity '%s'\n", name);
1.123 daniel 6728: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 6729: ctxt->wellFormed = 0;
1.180 daniel 6730: ctxt->disableSAX = 1;
1.98 daniel 6731: }
6732: /*
6733: * [ WFC: No < in Attribute Values ]
6734: * The replacement text of any entity referred to directly or
6735: * indirectly in an attribute value (other than "<") must
6736: * not contain a <.
1.59 daniel 6737: */
1.98 daniel 6738: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 6739: (ent != NULL) &&
6740: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 6741: (ent->content != NULL) &&
6742: (xmlStrchr(ent->content, '<'))) {
6743: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6744: ctxt->sax->error(ctxt->userData,
6745: "'<' in entity '%s' is not allowed in attributes values\n", name);
1.123 daniel 6746: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 6747: ctxt->wellFormed = 0;
1.180 daniel 6748: ctxt->disableSAX = 1;
1.98 daniel 6749: }
6750:
6751: /*
6752: * Internal check, no parameter entities here ...
6753: */
6754: else {
1.159 daniel 6755: switch (ent->etype) {
1.59 daniel 6756: case XML_INTERNAL_PARAMETER_ENTITY:
6757: case XML_EXTERNAL_PARAMETER_ENTITY:
6758: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6759: ctxt->sax->error(ctxt->userData,
1.59 daniel 6760: "Attempt to reference the parameter entity '%s'\n", name);
1.123 daniel 6761: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 6762: ctxt->wellFormed = 0;
1.180 daniel 6763: ctxt->disableSAX = 1;
6764: break;
6765: default:
1.59 daniel 6766: break;
6767: }
6768: }
6769:
6770: /*
1.98 daniel 6771: * [ WFC: No Recursion ]
1.117 daniel 6772: * TODO A parsed entity must not contain a recursive reference
6773: * to itself, either directly or indirectly.
1.59 daniel 6774: */
1.77 daniel 6775:
1.24 daniel 6776: } else {
1.55 daniel 6777: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6778: ctxt->sax->error(ctxt->userData,
1.59 daniel 6779: "xmlParseEntityRef: expecting ';'\n");
1.123 daniel 6780: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 6781: ctxt->wellFormed = 0;
1.180 daniel 6782: ctxt->disableSAX = 1;
1.24 daniel 6783: }
1.119 daniel 6784: xmlFree(name);
1.24 daniel 6785: }
6786: }
1.77 daniel 6787: return(ent);
1.24 daniel 6788: }
1.135 daniel 6789: /**
6790: * xmlParseStringEntityRef:
6791: * @ctxt: an XML parser context
6792: * @str: a pointer to an index in the string
6793: *
6794: * parse ENTITY references declarations, but this version parses it from
6795: * a string value.
6796: *
6797: * [68] EntityRef ::= '&' Name ';'
6798: *
6799: * [ WFC: Entity Declared ]
6800: * In a document without any DTD, a document with only an internal DTD
6801: * subset which contains no parameter entity references, or a document
6802: * with "standalone='yes'", the Name given in the entity reference
6803: * must match that in an entity declaration, except that well-formed
6804: * documents need not declare any of the following entities: amp, lt,
6805: * gt, apos, quot. The declaration of a parameter entity must precede
6806: * any reference to it. Similarly, the declaration of a general entity
6807: * must precede any reference to it which appears in a default value in an
6808: * attribute-list declaration. Note that if entities are declared in the
6809: * external subset or in external parameter entities, a non-validating
6810: * processor is not obligated to read and process their declarations;
6811: * for such documents, the rule that an entity must be declared is a
6812: * well-formedness constraint only if standalone='yes'.
6813: *
6814: * [ WFC: Parsed Entity ]
6815: * An entity reference must not contain the name of an unparsed entity
6816: *
6817: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6818: * is updated to the current location in the string.
6819: */
6820: xmlEntityPtr
6821: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6822: xmlChar *name;
6823: const xmlChar *ptr;
6824: xmlChar cur;
6825: xmlEntityPtr ent = NULL;
6826:
1.156 daniel 6827: if ((str == NULL) || (*str == NULL))
6828: return(NULL);
1.135 daniel 6829: ptr = *str;
6830: cur = *ptr;
6831: if (cur == '&') {
6832: ptr++;
6833: cur = *ptr;
6834: name = xmlParseStringName(ctxt, &ptr);
6835: if (name == NULL) {
6836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6837: ctxt->sax->error(ctxt->userData,
6838: "xmlParseEntityRef: no name\n");
6839: ctxt->errNo = XML_ERR_NAME_REQUIRED;
6840: ctxt->wellFormed = 0;
1.180 daniel 6841: ctxt->disableSAX = 1;
1.135 daniel 6842: } else {
1.185 daniel 6843: if (*ptr == ';') {
6844: ptr++;
1.135 daniel 6845: /*
6846: * Ask first SAX for entity resolution, otherwise try the
6847: * predefined set.
6848: */
6849: if (ctxt->sax != NULL) {
6850: if (ctxt->sax->getEntity != NULL)
6851: ent = ctxt->sax->getEntity(ctxt->userData, name);
6852: if (ent == NULL)
6853: ent = xmlGetPredefinedEntity(name);
6854: }
6855: /*
6856: * [ WFC: Entity Declared ]
6857: * In a document without any DTD, a document with only an
6858: * internal DTD subset which contains no parameter entity
6859: * references, or a document with "standalone='yes'", the
6860: * Name given in the entity reference must match that in an
6861: * entity declaration, except that well-formed documents
6862: * need not declare any of the following entities: amp, lt,
6863: * gt, apos, quot.
6864: * The declaration of a parameter entity must precede any
6865: * reference to it.
6866: * Similarly, the declaration of a general entity must
6867: * precede any reference to it which appears in a default
6868: * value in an attribute-list declaration. Note that if
6869: * entities are declared in the external subset or in
6870: * external parameter entities, a non-validating processor
6871: * is not obligated to read and process their declarations;
6872: * for such documents, the rule that an entity must be
6873: * declared is a well-formedness constraint only if
6874: * standalone='yes'.
6875: */
6876: if (ent == NULL) {
6877: if ((ctxt->standalone == 1) ||
6878: ((ctxt->hasExternalSubset == 0) &&
6879: (ctxt->hasPErefs == 0))) {
6880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6881: ctxt->sax->error(ctxt->userData,
6882: "Entity '%s' not defined\n", name);
6883: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6884: ctxt->wellFormed = 0;
1.180 daniel 6885: ctxt->disableSAX = 1;
1.135 daniel 6886: } else {
6887: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6888: ctxt->sax->warning(ctxt->userData,
6889: "Entity '%s' not defined\n", name);
6890: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
6891: }
6892: }
6893:
6894: /*
6895: * [ WFC: Parsed Entity ]
6896: * An entity reference must not contain the name of an
6897: * unparsed entity
6898: */
1.159 daniel 6899: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.135 daniel 6900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6901: ctxt->sax->error(ctxt->userData,
6902: "Entity reference to unparsed entity %s\n", name);
6903: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
6904: ctxt->wellFormed = 0;
1.180 daniel 6905: ctxt->disableSAX = 1;
1.135 daniel 6906: }
6907:
6908: /*
6909: * [ WFC: No External Entity References ]
6910: * Attribute values cannot contain direct or indirect
6911: * entity references to external entities.
6912: */
6913: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 6914: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.135 daniel 6915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6916: ctxt->sax->error(ctxt->userData,
6917: "Attribute references external entity '%s'\n", name);
6918: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
6919: ctxt->wellFormed = 0;
1.180 daniel 6920: ctxt->disableSAX = 1;
1.135 daniel 6921: }
6922: /*
6923: * [ WFC: No < in Attribute Values ]
6924: * The replacement text of any entity referred to directly or
6925: * indirectly in an attribute value (other than "<") must
6926: * not contain a <.
6927: */
6928: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6929: (ent != NULL) &&
6930: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
6931: (ent->content != NULL) &&
6932: (xmlStrchr(ent->content, '<'))) {
6933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6934: ctxt->sax->error(ctxt->userData,
6935: "'<' in entity '%s' is not allowed in attributes values\n", name);
6936: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
6937: ctxt->wellFormed = 0;
1.180 daniel 6938: ctxt->disableSAX = 1;
1.135 daniel 6939: }
6940:
6941: /*
6942: * Internal check, no parameter entities here ...
6943: */
6944: else {
1.159 daniel 6945: switch (ent->etype) {
1.135 daniel 6946: case XML_INTERNAL_PARAMETER_ENTITY:
6947: case XML_EXTERNAL_PARAMETER_ENTITY:
6948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6949: ctxt->sax->error(ctxt->userData,
6950: "Attempt to reference the parameter entity '%s'\n", name);
6951: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
6952: ctxt->wellFormed = 0;
1.180 daniel 6953: ctxt->disableSAX = 1;
6954: break;
6955: default:
1.135 daniel 6956: break;
6957: }
6958: }
6959:
6960: /*
6961: * [ WFC: No Recursion ]
6962: * TODO A parsed entity must not contain a recursive reference
6963: * to itself, either directly or indirectly.
6964: */
6965:
6966: } else {
6967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6968: ctxt->sax->error(ctxt->userData,
6969: "xmlParseEntityRef: expecting ';'\n");
6970: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6971: ctxt->wellFormed = 0;
1.180 daniel 6972: ctxt->disableSAX = 1;
1.135 daniel 6973: }
6974: xmlFree(name);
6975: }
6976: }
1.185 daniel 6977: *str = ptr;
1.135 daniel 6978: return(ent);
6979: }
1.24 daniel 6980:
1.50 daniel 6981: /**
6982: * xmlParsePEReference:
6983: * @ctxt: an XML parser context
6984: *
6985: * parse PEReference declarations
1.77 daniel 6986: * The entity content is handled directly by pushing it's content as
6987: * a new input stream.
1.22 daniel 6988: *
6989: * [69] PEReference ::= '%' Name ';'
1.68 daniel 6990: *
1.98 daniel 6991: * [ WFC: No Recursion ]
6992: * TODO A parsed entity must not contain a recursive
6993: * reference to itself, either directly or indirectly.
6994: *
6995: * [ WFC: Entity Declared ]
6996: * In a document without any DTD, a document with only an internal DTD
6997: * subset which contains no parameter entity references, or a document
6998: * with "standalone='yes'", ... ... The declaration of a parameter
6999: * entity must precede any reference to it...
7000: *
7001: * [ VC: Entity Declared ]
7002: * In a document with an external subset or external parameter entities
7003: * with "standalone='no'", ... ... The declaration of a parameter entity
7004: * must precede any reference to it...
7005: *
7006: * [ WFC: In DTD ]
7007: * Parameter-entity references may only appear in the DTD.
7008: * NOTE: misleading but this is handled.
1.22 daniel 7009: */
1.77 daniel 7010: void
1.55 daniel 7011: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 7012: xmlChar *name;
1.72 daniel 7013: xmlEntityPtr entity = NULL;
1.50 daniel 7014: xmlParserInputPtr input;
1.22 daniel 7015:
1.152 daniel 7016: if (RAW == '%') {
1.40 daniel 7017: NEXT;
1.22 daniel 7018: name = xmlParseName(ctxt);
7019: if (name == NULL) {
1.55 daniel 7020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7021: ctxt->sax->error(ctxt->userData,
7022: "xmlParsePEReference: no name\n");
1.123 daniel 7023: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7024: ctxt->wellFormed = 0;
1.180 daniel 7025: ctxt->disableSAX = 1;
1.22 daniel 7026: } else {
1.152 daniel 7027: if (RAW == ';') {
1.40 daniel 7028: NEXT;
1.98 daniel 7029: if ((ctxt->sax != NULL) &&
7030: (ctxt->sax->getParameterEntity != NULL))
7031: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7032: name);
1.45 daniel 7033: if (entity == NULL) {
1.98 daniel 7034: /*
7035: * [ WFC: Entity Declared ]
7036: * In a document without any DTD, a document with only an
7037: * internal DTD subset which contains no parameter entity
7038: * references, or a document with "standalone='yes'", ...
7039: * ... The declaration of a parameter entity must precede
7040: * any reference to it...
7041: */
7042: if ((ctxt->standalone == 1) ||
7043: ((ctxt->hasExternalSubset == 0) &&
7044: (ctxt->hasPErefs == 0))) {
7045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7046: ctxt->sax->error(ctxt->userData,
7047: "PEReference: %%%s; not found\n", name);
1.123 daniel 7048: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 7049: ctxt->wellFormed = 0;
1.180 daniel 7050: ctxt->disableSAX = 1;
1.98 daniel 7051: } else {
7052: /*
7053: * [ VC: Entity Declared ]
7054: * In a document with an external subset or external
7055: * parameter entities with "standalone='no'", ...
7056: * ... The declaration of a parameter entity must precede
7057: * any reference to it...
7058: */
7059: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7060: ctxt->sax->warning(ctxt->userData,
7061: "PEReference: %%%s; not found\n", name);
7062: ctxt->valid = 0;
7063: }
1.50 daniel 7064: } else {
1.98 daniel 7065: /*
7066: * Internal checking in case the entity quest barfed
7067: */
1.159 daniel 7068: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7069: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 7070: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7071: ctxt->sax->warning(ctxt->userData,
7072: "Internal: %%%s; is not a parameter entity\n", name);
7073: } else {
1.164 daniel 7074: /*
7075: * TODO !!!
7076: * handle the extra spaces added before and after
7077: * c.f. http://www.w3.org/TR/REC-xml#as-PE
7078: */
1.98 daniel 7079: input = xmlNewEntityInputStream(ctxt, entity);
7080: xmlPushInput(ctxt, input);
1.164 daniel 7081: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7082: (RAW == '<') && (NXT(1) == '?') &&
7083: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7084: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 7085: xmlParseTextDecl(ctxt);
1.193 daniel 7086: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7087: /*
7088: * The XML REC instructs us to stop parsing
7089: * right here
7090: */
7091: ctxt->instate = XML_PARSER_EOF;
7092: xmlFree(name);
7093: return;
7094: }
1.164 daniel 7095: }
7096: if (ctxt->token == 0)
7097: ctxt->token = ' ';
1.98 daniel 7098: }
1.45 daniel 7099: }
1.98 daniel 7100: ctxt->hasPErefs = 1;
1.22 daniel 7101: } else {
1.55 daniel 7102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7103: ctxt->sax->error(ctxt->userData,
1.59 daniel 7104: "xmlParsePEReference: expecting ';'\n");
1.123 daniel 7105: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.59 daniel 7106: ctxt->wellFormed = 0;
1.180 daniel 7107: ctxt->disableSAX = 1;
1.22 daniel 7108: }
1.119 daniel 7109: xmlFree(name);
1.3 veillard 7110: }
7111: }
7112: }
7113:
1.50 daniel 7114: /**
1.135 daniel 7115: * xmlParseStringPEReference:
7116: * @ctxt: an XML parser context
7117: * @str: a pointer to an index in the string
7118: *
7119: * parse PEReference declarations
7120: *
7121: * [69] PEReference ::= '%' Name ';'
7122: *
7123: * [ WFC: No Recursion ]
7124: * TODO A parsed entity must not contain a recursive
7125: * reference to itself, either directly or indirectly.
7126: *
7127: * [ WFC: Entity Declared ]
7128: * In a document without any DTD, a document with only an internal DTD
7129: * subset which contains no parameter entity references, or a document
7130: * with "standalone='yes'", ... ... The declaration of a parameter
7131: * entity must precede any reference to it...
7132: *
7133: * [ VC: Entity Declared ]
7134: * In a document with an external subset or external parameter entities
7135: * with "standalone='no'", ... ... The declaration of a parameter entity
7136: * must precede any reference to it...
7137: *
7138: * [ WFC: In DTD ]
7139: * Parameter-entity references may only appear in the DTD.
7140: * NOTE: misleading but this is handled.
7141: *
7142: * Returns the string of the entity content.
7143: * str is updated to the current value of the index
7144: */
7145: xmlEntityPtr
7146: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7147: const xmlChar *ptr;
7148: xmlChar cur;
7149: xmlChar *name;
7150: xmlEntityPtr entity = NULL;
7151:
7152: if ((str == NULL) || (*str == NULL)) return(NULL);
7153: ptr = *str;
7154: cur = *ptr;
7155: if (cur == '%') {
7156: ptr++;
7157: cur = *ptr;
7158: name = xmlParseStringName(ctxt, &ptr);
7159: if (name == NULL) {
7160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7161: ctxt->sax->error(ctxt->userData,
7162: "xmlParseStringPEReference: no name\n");
7163: ctxt->errNo = XML_ERR_NAME_REQUIRED;
7164: ctxt->wellFormed = 0;
1.180 daniel 7165: ctxt->disableSAX = 1;
1.135 daniel 7166: } else {
7167: cur = *ptr;
7168: if (cur == ';') {
7169: ptr++;
7170: cur = *ptr;
7171: if ((ctxt->sax != NULL) &&
7172: (ctxt->sax->getParameterEntity != NULL))
7173: entity = ctxt->sax->getParameterEntity(ctxt->userData,
7174: name);
7175: if (entity == NULL) {
7176: /*
7177: * [ WFC: Entity Declared ]
7178: * In a document without any DTD, a document with only an
7179: * internal DTD subset which contains no parameter entity
7180: * references, or a document with "standalone='yes'", ...
7181: * ... The declaration of a parameter entity must precede
7182: * any reference to it...
7183: */
7184: if ((ctxt->standalone == 1) ||
7185: ((ctxt->hasExternalSubset == 0) &&
7186: (ctxt->hasPErefs == 0))) {
7187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7188: ctxt->sax->error(ctxt->userData,
7189: "PEReference: %%%s; not found\n", name);
7190: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7191: ctxt->wellFormed = 0;
1.180 daniel 7192: ctxt->disableSAX = 1;
1.135 daniel 7193: } else {
7194: /*
7195: * [ VC: Entity Declared ]
7196: * In a document with an external subset or external
7197: * parameter entities with "standalone='no'", ...
7198: * ... The declaration of a parameter entity must
7199: * precede any reference to it...
7200: */
7201: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7202: ctxt->sax->warning(ctxt->userData,
7203: "PEReference: %%%s; not found\n", name);
7204: ctxt->valid = 0;
7205: }
7206: } else {
7207: /*
7208: * Internal checking in case the entity quest barfed
7209: */
1.159 daniel 7210: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7211: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 7212: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7213: ctxt->sax->warning(ctxt->userData,
7214: "Internal: %%%s; is not a parameter entity\n", name);
7215: }
7216: }
7217: ctxt->hasPErefs = 1;
7218: } else {
7219: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7220: ctxt->sax->error(ctxt->userData,
7221: "xmlParseStringPEReference: expecting ';'\n");
7222: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7223: ctxt->wellFormed = 0;
1.180 daniel 7224: ctxt->disableSAX = 1;
1.135 daniel 7225: }
7226: xmlFree(name);
7227: }
7228: }
7229: *str = ptr;
7230: return(entity);
7231: }
7232:
7233: /**
1.181 daniel 7234: * xmlParseDocTypeDecl:
1.50 daniel 7235: * @ctxt: an XML parser context
7236: *
7237: * parse a DOCTYPE declaration
1.21 daniel 7238: *
1.22 daniel 7239: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7240: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 7241: *
7242: * [ VC: Root Element Type ]
1.99 daniel 7243: * The Name in the document type declaration must match the element
1.98 daniel 7244: * type of the root element.
1.21 daniel 7245: */
7246:
1.55 daniel 7247: void
7248: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 7249: xmlChar *name = NULL;
1.123 daniel 7250: xmlChar *ExternalID = NULL;
7251: xmlChar *URI = NULL;
1.21 daniel 7252:
7253: /*
7254: * We know that '<!DOCTYPE' has been detected.
7255: */
1.40 daniel 7256: SKIP(9);
1.21 daniel 7257:
1.42 daniel 7258: SKIP_BLANKS;
1.21 daniel 7259:
7260: /*
7261: * Parse the DOCTYPE name.
7262: */
7263: name = xmlParseName(ctxt);
7264: if (name == NULL) {
1.55 daniel 7265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7266: ctxt->sax->error(ctxt->userData,
7267: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 7268: ctxt->wellFormed = 0;
1.180 daniel 7269: ctxt->disableSAX = 1;
1.123 daniel 7270: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.21 daniel 7271: }
1.165 daniel 7272: ctxt->intSubName = name;
1.21 daniel 7273:
1.42 daniel 7274: SKIP_BLANKS;
1.21 daniel 7275:
7276: /*
1.22 daniel 7277: * Check for SystemID and ExternalID
7278: */
1.67 daniel 7279: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 7280:
7281: if ((URI != NULL) || (ExternalID != NULL)) {
7282: ctxt->hasExternalSubset = 1;
7283: }
1.165 daniel 7284: ctxt->extSubURI = URI;
7285: ctxt->extSubSystem = ExternalID;
1.98 daniel 7286:
1.42 daniel 7287: SKIP_BLANKS;
1.36 daniel 7288:
1.76 daniel 7289: /*
1.165 daniel 7290: * Create and update the internal subset.
1.76 daniel 7291: */
1.171 daniel 7292: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7293: (!ctxt->disableSAX))
1.74 daniel 7294: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 7295:
7296: /*
1.140 daniel 7297: * Is there any internal subset declarations ?
7298: * they are handled separately in xmlParseInternalSubset()
7299: */
1.152 daniel 7300: if (RAW == '[')
1.140 daniel 7301: return;
7302:
7303: /*
7304: * We should be at the end of the DOCTYPE declaration.
7305: */
1.152 daniel 7306: if (RAW != '>') {
1.140 daniel 7307: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7308: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7309: ctxt->wellFormed = 0;
1.180 daniel 7310: ctxt->disableSAX = 1;
1.140 daniel 7311: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7312: }
7313: NEXT;
7314: }
7315:
7316: /**
1.181 daniel 7317: * xmlParseInternalsubset:
1.140 daniel 7318: * @ctxt: an XML parser context
7319: *
7320: * parse the internal subset declaration
7321: *
7322: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7323: */
7324:
7325: void
7326: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7327: /*
1.22 daniel 7328: * Is there any DTD definition ?
7329: */
1.152 daniel 7330: if (RAW == '[') {
1.96 daniel 7331: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 7332: NEXT;
1.22 daniel 7333: /*
7334: * Parse the succession of Markup declarations and
7335: * PEReferences.
7336: * Subsequence (markupdecl | PEReference | S)*
7337: */
1.152 daniel 7338: while (RAW != ']') {
1.123 daniel 7339: const xmlChar *check = CUR_PTR;
1.115 daniel 7340: int cons = ctxt->input->consumed;
1.22 daniel 7341:
1.42 daniel 7342: SKIP_BLANKS;
1.22 daniel 7343: xmlParseMarkupDecl(ctxt);
1.50 daniel 7344: xmlParsePEReference(ctxt);
1.22 daniel 7345:
1.115 daniel 7346: /*
7347: * Pop-up of finished entities.
7348: */
1.152 daniel 7349: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 7350: xmlPopInput(ctxt);
7351:
1.118 daniel 7352: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.55 daniel 7353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7354: ctxt->sax->error(ctxt->userData,
1.140 daniel 7355: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 7356: ctxt->wellFormed = 0;
1.180 daniel 7357: ctxt->disableSAX = 1;
1.123 daniel 7358: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.22 daniel 7359: break;
7360: }
7361: }
1.209 veillard 7362: if (RAW == ']') {
7363: NEXT;
7364: SKIP_BLANKS;
7365: }
1.22 daniel 7366: }
7367:
7368: /*
7369: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 7370: */
1.152 daniel 7371: if (RAW != '>') {
1.55 daniel 7372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7373: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 7374: ctxt->wellFormed = 0;
1.180 daniel 7375: ctxt->disableSAX = 1;
1.123 daniel 7376: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.21 daniel 7377: }
1.40 daniel 7378: NEXT;
1.21 daniel 7379: }
7380:
1.50 daniel 7381: /**
7382: * xmlParseAttribute:
7383: * @ctxt: an XML parser context
1.123 daniel 7384: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 7385: *
7386: * parse an attribute
1.3 veillard 7387: *
1.22 daniel 7388: * [41] Attribute ::= Name Eq AttValue
7389: *
1.98 daniel 7390: * [ WFC: No External Entity References ]
7391: * Attribute values cannot contain direct or indirect entity references
7392: * to external entities.
7393: *
7394: * [ WFC: No < in Attribute Values ]
7395: * The replacement text of any entity referred to directly or indirectly in
7396: * an attribute value (other than "<") must not contain a <.
7397: *
7398: * [ VC: Attribute Value Type ]
1.117 daniel 7399: * The attribute must have been declared; the value must be of the type
1.99 daniel 7400: * declared for it.
1.98 daniel 7401: *
1.22 daniel 7402: * [25] Eq ::= S? '=' S?
7403: *
1.29 daniel 7404: * With namespace:
7405: *
7406: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 7407: *
7408: * Also the case QName == xmlns:??? is handled independently as a namespace
7409: * definition.
1.69 daniel 7410: *
1.72 daniel 7411: * Returns the attribute name, and the value in *value.
1.3 veillard 7412: */
7413:
1.123 daniel 7414: xmlChar *
7415: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7416: xmlChar *name, *val;
1.3 veillard 7417:
1.72 daniel 7418: *value = NULL;
7419: name = xmlParseName(ctxt);
1.22 daniel 7420: if (name == NULL) {
1.55 daniel 7421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7422: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 7423: ctxt->wellFormed = 0;
1.180 daniel 7424: ctxt->disableSAX = 1;
1.123 daniel 7425: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.52 daniel 7426: return(NULL);
1.3 veillard 7427: }
7428:
7429: /*
1.29 daniel 7430: * read the value
1.3 veillard 7431: */
1.42 daniel 7432: SKIP_BLANKS;
1.152 daniel 7433: if (RAW == '=') {
1.40 daniel 7434: NEXT;
1.42 daniel 7435: SKIP_BLANKS;
1.72 daniel 7436: val = xmlParseAttValue(ctxt);
1.96 daniel 7437: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 7438: } else {
1.55 daniel 7439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7440: ctxt->sax->error(ctxt->userData,
1.59 daniel 7441: "Specification mandate value for attribute %s\n", name);
1.123 daniel 7442: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.59 daniel 7443: ctxt->wellFormed = 0;
1.180 daniel 7444: ctxt->disableSAX = 1;
1.170 daniel 7445: xmlFree(name);
1.52 daniel 7446: return(NULL);
1.43 daniel 7447: }
7448:
1.172 daniel 7449: /*
7450: * Check that xml:lang conforms to the specification
7451: */
7452: if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
7453: if (!xmlCheckLanguageID(val)) {
7454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7455: ctxt->sax->error(ctxt->userData,
7456: "Invalid value for xml:lang : %s\n", val);
7457: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7458: ctxt->wellFormed = 0;
1.180 daniel 7459: ctxt->disableSAX = 1;
1.172 daniel 7460: }
7461: }
7462:
1.176 daniel 7463: /*
7464: * Check that xml:space conforms to the specification
7465: */
7466: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
7467: if (!xmlStrcmp(val, BAD_CAST "default"))
7468: *(ctxt->space) = 0;
7469: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
7470: *(ctxt->space) = 1;
7471: else {
7472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7473: ctxt->sax->error(ctxt->userData,
7474: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
7475: val);
7476: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
7477: ctxt->wellFormed = 0;
1.180 daniel 7478: ctxt->disableSAX = 1;
1.176 daniel 7479: }
7480: }
7481:
1.72 daniel 7482: *value = val;
7483: return(name);
1.3 veillard 7484: }
7485:
1.50 daniel 7486: /**
7487: * xmlParseStartTag:
7488: * @ctxt: an XML parser context
7489: *
7490: * parse a start of tag either for rule element or
7491: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 7492: *
7493: * [40] STag ::= '<' Name (S Attribute)* S? '>'
7494: *
1.98 daniel 7495: * [ WFC: Unique Att Spec ]
7496: * No attribute name may appear more than once in the same start-tag or
7497: * empty-element tag.
7498: *
1.29 daniel 7499: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7500: *
1.98 daniel 7501: * [ WFC: Unique Att Spec ]
7502: * No attribute name may appear more than once in the same start-tag or
7503: * empty-element tag.
7504: *
1.29 daniel 7505: * With namespace:
7506: *
7507: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7508: *
7509: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 7510: *
1.192 daniel 7511: * Returns the element name parsed
1.2 veillard 7512: */
7513:
1.123 daniel 7514: xmlChar *
1.69 daniel 7515: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7516: xmlChar *name;
7517: xmlChar *attname;
7518: xmlChar *attvalue;
7519: const xmlChar **atts = NULL;
1.72 daniel 7520: int nbatts = 0;
7521: int maxatts = 0;
7522: int i;
1.2 veillard 7523:
1.152 daniel 7524: if (RAW != '<') return(NULL);
1.40 daniel 7525: NEXT;
1.3 veillard 7526:
1.72 daniel 7527: name = xmlParseName(ctxt);
1.59 daniel 7528: if (name == NULL) {
7529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7530: ctxt->sax->error(ctxt->userData,
1.59 daniel 7531: "xmlParseStartTag: invalid element name\n");
1.123 daniel 7532: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 7533: ctxt->wellFormed = 0;
1.180 daniel 7534: ctxt->disableSAX = 1;
1.83 daniel 7535: return(NULL);
1.50 daniel 7536: }
7537:
7538: /*
1.3 veillard 7539: * Now parse the attributes, it ends up with the ending
7540: *
7541: * (S Attribute)* S?
7542: */
1.42 daniel 7543: SKIP_BLANKS;
1.91 daniel 7544: GROW;
1.168 daniel 7545:
1.153 daniel 7546: while ((IS_CHAR(RAW)) &&
1.152 daniel 7547: (RAW != '>') &&
7548: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 7549: const xmlChar *q = CUR_PTR;
1.91 daniel 7550: int cons = ctxt->input->consumed;
1.29 daniel 7551:
1.72 daniel 7552: attname = xmlParseAttribute(ctxt, &attvalue);
7553: if ((attname != NULL) && (attvalue != NULL)) {
7554: /*
1.98 daniel 7555: * [ WFC: Unique Att Spec ]
7556: * No attribute name may appear more than once in the same
7557: * start-tag or empty-element tag.
1.72 daniel 7558: */
7559: for (i = 0; i < nbatts;i += 2) {
7560: if (!xmlStrcmp(atts[i], attname)) {
7561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 7562: ctxt->sax->error(ctxt->userData,
7563: "Attribute %s redefined\n",
7564: attname);
1.72 daniel 7565: ctxt->wellFormed = 0;
1.180 daniel 7566: ctxt->disableSAX = 1;
1.123 daniel 7567: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.119 daniel 7568: xmlFree(attname);
7569: xmlFree(attvalue);
1.98 daniel 7570: goto failed;
1.72 daniel 7571: }
7572: }
7573:
7574: /*
7575: * Add the pair to atts
7576: */
7577: if (atts == NULL) {
7578: maxatts = 10;
1.123 daniel 7579: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 7580: if (atts == NULL) {
1.86 daniel 7581: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 7582: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7583: return(NULL);
1.72 daniel 7584: }
1.127 daniel 7585: } else if (nbatts + 4 > maxatts) {
1.72 daniel 7586: maxatts *= 2;
1.123 daniel 7587: atts = (const xmlChar **) xmlRealloc(atts,
7588: maxatts * sizeof(xmlChar *));
1.72 daniel 7589: if (atts == NULL) {
1.86 daniel 7590: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 7591: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 7592: return(NULL);
1.72 daniel 7593: }
7594: }
7595: atts[nbatts++] = attname;
7596: atts[nbatts++] = attvalue;
7597: atts[nbatts] = NULL;
7598: atts[nbatts + 1] = NULL;
1.176 daniel 7599: } else {
7600: if (attname != NULL)
7601: xmlFree(attname);
7602: if (attvalue != NULL)
7603: xmlFree(attvalue);
1.72 daniel 7604: }
7605:
1.116 daniel 7606: failed:
1.168 daniel 7607:
7608: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7609: break;
7610: if (!IS_BLANK(RAW)) {
7611: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7612: ctxt->sax->error(ctxt->userData,
7613: "attributes construct error\n");
7614: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7615: ctxt->wellFormed = 0;
1.180 daniel 7616: ctxt->disableSAX = 1;
1.168 daniel 7617: }
1.42 daniel 7618: SKIP_BLANKS;
1.91 daniel 7619: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 7620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7621: ctxt->sax->error(ctxt->userData,
1.31 daniel 7622: "xmlParseStartTag: problem parsing attributes\n");
1.123 daniel 7623: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7624: ctxt->wellFormed = 0;
1.180 daniel 7625: ctxt->disableSAX = 1;
1.29 daniel 7626: break;
1.3 veillard 7627: }
1.91 daniel 7628: GROW;
1.3 veillard 7629: }
7630:
1.43 daniel 7631: /*
1.72 daniel 7632: * SAX: Start of Element !
1.43 daniel 7633: */
1.171 daniel 7634: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7635: (!ctxt->disableSAX))
1.74 daniel 7636: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 7637:
1.72 daniel 7638: if (atts != NULL) {
1.123 daniel 7639: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 7640: xmlFree(atts);
1.72 daniel 7641: }
1.83 daniel 7642: return(name);
1.3 veillard 7643: }
7644:
1.50 daniel 7645: /**
7646: * xmlParseEndTag:
7647: * @ctxt: an XML parser context
7648: *
7649: * parse an end of tag
1.27 daniel 7650: *
7651: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 7652: *
7653: * With namespace
7654: *
1.72 daniel 7655: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 7656: */
7657:
1.55 daniel 7658: void
1.140 daniel 7659: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 7660: xmlChar *name;
1.140 daniel 7661: xmlChar *oldname;
1.7 veillard 7662:
1.91 daniel 7663: GROW;
1.152 daniel 7664: if ((RAW != '<') || (NXT(1) != '/')) {
1.55 daniel 7665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7666: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 7667: ctxt->wellFormed = 0;
1.180 daniel 7668: ctxt->disableSAX = 1;
1.123 daniel 7669: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.27 daniel 7670: return;
7671: }
1.40 daniel 7672: SKIP(2);
1.7 veillard 7673:
1.72 daniel 7674: name = xmlParseName(ctxt);
1.7 veillard 7675:
7676: /*
7677: * We should definitely be at the ending "S? '>'" part
7678: */
1.91 daniel 7679: GROW;
1.42 daniel 7680: SKIP_BLANKS;
1.153 daniel 7681: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.55 daniel 7682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7683: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.123 daniel 7684: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.59 daniel 7685: ctxt->wellFormed = 0;
1.180 daniel 7686: ctxt->disableSAX = 1;
1.7 veillard 7687: } else
1.40 daniel 7688: NEXT;
1.7 veillard 7689:
1.72 daniel 7690: /*
1.98 daniel 7691: * [ WFC: Element Type Match ]
7692: * The Name in an element's end-tag must match the element type in the
7693: * start-tag.
7694: *
1.83 daniel 7695: */
1.147 daniel 7696: if ((name == NULL) || (ctxt->name == NULL) ||
7697: (xmlStrcmp(name, ctxt->name))) {
7698: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
7699: if ((name != NULL) && (ctxt->name != NULL)) {
7700: ctxt->sax->error(ctxt->userData,
7701: "Opening and ending tag mismatch: %s and %s\n",
7702: ctxt->name, name);
7703: } else if (ctxt->name != NULL) {
7704: ctxt->sax->error(ctxt->userData,
7705: "Ending tag eror for: %s\n", ctxt->name);
7706: } else {
7707: ctxt->sax->error(ctxt->userData,
7708: "Ending tag error: internal error ???\n");
7709: }
1.122 daniel 7710:
1.147 daniel 7711: }
1.123 daniel 7712: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.83 daniel 7713: ctxt->wellFormed = 0;
1.180 daniel 7714: ctxt->disableSAX = 1;
1.83 daniel 7715: }
7716:
7717: /*
1.72 daniel 7718: * SAX: End of Tag
7719: */
1.171 daniel 7720: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7721: (!ctxt->disableSAX))
1.74 daniel 7722: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 7723:
7724: if (name != NULL)
1.119 daniel 7725: xmlFree(name);
1.140 daniel 7726: oldname = namePop(ctxt);
1.176 daniel 7727: spacePop(ctxt);
1.140 daniel 7728: if (oldname != NULL) {
7729: #ifdef DEBUG_STACK
7730: fprintf(stderr,"Close: popped %s\n", oldname);
7731: #endif
7732: xmlFree(oldname);
7733: }
1.7 veillard 7734: return;
7735: }
7736:
1.50 daniel 7737: /**
7738: * xmlParseCDSect:
7739: * @ctxt: an XML parser context
7740: *
7741: * Parse escaped pure raw content.
1.29 daniel 7742: *
7743: * [18] CDSect ::= CDStart CData CDEnd
7744: *
7745: * [19] CDStart ::= '<![CDATA['
7746: *
7747: * [20] Data ::= (Char* - (Char* ']]>' Char*))
7748: *
7749: * [21] CDEnd ::= ']]>'
1.3 veillard 7750: */
1.55 daniel 7751: void
7752: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 7753: xmlChar *buf = NULL;
7754: int len = 0;
1.140 daniel 7755: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 7756: int r, rl;
7757: int s, sl;
7758: int cur, l;
1.3 veillard 7759:
1.106 daniel 7760: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 7761: (NXT(2) == '[') && (NXT(3) == 'C') &&
7762: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7763: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7764: (NXT(8) == '[')) {
7765: SKIP(9);
1.29 daniel 7766: } else
1.45 daniel 7767: return;
1.109 daniel 7768:
7769: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 7770: r = CUR_CHAR(rl);
7771: if (!IS_CHAR(r)) {
1.55 daniel 7772: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7773: ctxt->sax->error(ctxt->userData,
1.135 daniel 7774: "CData section not finished\n");
1.59 daniel 7775: ctxt->wellFormed = 0;
1.180 daniel 7776: ctxt->disableSAX = 1;
1.123 daniel 7777: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.109 daniel 7778: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7779: return;
1.3 veillard 7780: }
1.152 daniel 7781: NEXTL(rl);
7782: s = CUR_CHAR(sl);
7783: if (!IS_CHAR(s)) {
1.55 daniel 7784: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7785: ctxt->sax->error(ctxt->userData,
1.135 daniel 7786: "CData section not finished\n");
1.123 daniel 7787: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7788: ctxt->wellFormed = 0;
1.180 daniel 7789: ctxt->disableSAX = 1;
1.109 daniel 7790: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 7791: return;
1.3 veillard 7792: }
1.152 daniel 7793: NEXTL(sl);
7794: cur = CUR_CHAR(l);
1.135 daniel 7795: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7796: if (buf == NULL) {
7797: fprintf(stderr, "malloc of %d byte failed\n", size);
7798: return;
7799: }
1.108 veillard 7800: while (IS_CHAR(cur) &&
1.110 daniel 7801: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 7802: if (len + 5 >= size) {
1.135 daniel 7803: size *= 2;
1.204 veillard 7804: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 7805: if (buf == NULL) {
7806: fprintf(stderr, "realloc of %d byte failed\n", size);
7807: return;
7808: }
7809: }
1.152 daniel 7810: COPY_BUF(rl,buf,len,r);
1.110 daniel 7811: r = s;
1.152 daniel 7812: rl = sl;
1.110 daniel 7813: s = cur;
1.152 daniel 7814: sl = l;
7815: NEXTL(l);
7816: cur = CUR_CHAR(l);
1.3 veillard 7817: }
1.135 daniel 7818: buf[len] = 0;
1.109 daniel 7819: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 7820: if (cur != '>') {
1.55 daniel 7821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7822: ctxt->sax->error(ctxt->userData,
1.135 daniel 7823: "CData section not finished\n%.50s\n", buf);
1.123 daniel 7824: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.59 daniel 7825: ctxt->wellFormed = 0;
1.180 daniel 7826: ctxt->disableSAX = 1;
1.135 daniel 7827: xmlFree(buf);
1.45 daniel 7828: return;
1.3 veillard 7829: }
1.152 daniel 7830: NEXTL(l);
1.16 daniel 7831:
1.45 daniel 7832: /*
1.135 daniel 7833: * Ok the buffer is to be consumed as cdata.
1.45 daniel 7834: */
1.171 daniel 7835: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 7836: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 7837: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 7838: }
1.135 daniel 7839: xmlFree(buf);
1.2 veillard 7840: }
7841:
1.50 daniel 7842: /**
7843: * xmlParseContent:
7844: * @ctxt: an XML parser context
7845: *
7846: * Parse a content:
1.2 veillard 7847: *
1.27 daniel 7848: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 7849: */
7850:
1.55 daniel 7851: void
7852: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 7853: GROW;
1.176 daniel 7854: while (((RAW != 0) || (ctxt->token != 0)) &&
7855: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 7856: const xmlChar *test = CUR_PTR;
1.91 daniel 7857: int cons = ctxt->input->consumed;
1.123 daniel 7858: xmlChar tok = ctxt->token;
1.27 daniel 7859:
7860: /*
1.152 daniel 7861: * Handle possible processed charrefs.
7862: */
7863: if (ctxt->token != 0) {
7864: xmlParseCharData(ctxt, 0);
7865: }
7866: /*
1.27 daniel 7867: * First case : a Processing Instruction.
7868: */
1.152 daniel 7869: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 7870: xmlParsePI(ctxt);
7871: }
1.72 daniel 7872:
1.27 daniel 7873: /*
7874: * Second case : a CDSection
7875: */
1.152 daniel 7876: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7877: (NXT(2) == '[') && (NXT(3) == 'C') &&
7878: (NXT(4) == 'D') && (NXT(5) == 'A') &&
7879: (NXT(6) == 'T') && (NXT(7) == 'A') &&
7880: (NXT(8) == '[')) {
1.45 daniel 7881: xmlParseCDSect(ctxt);
1.27 daniel 7882: }
1.72 daniel 7883:
1.27 daniel 7884: /*
7885: * Third case : a comment
7886: */
1.152 daniel 7887: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7888: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 7889: xmlParseComment(ctxt);
1.97 daniel 7890: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 7891: }
1.72 daniel 7892:
1.27 daniel 7893: /*
7894: * Fourth case : a sub-element.
7895: */
1.152 daniel 7896: else if (RAW == '<') {
1.72 daniel 7897: xmlParseElement(ctxt);
1.45 daniel 7898: }
1.72 daniel 7899:
1.45 daniel 7900: /*
1.50 daniel 7901: * Fifth case : a reference. If if has not been resolved,
7902: * parsing returns it's Name, create the node
1.45 daniel 7903: */
1.97 daniel 7904:
1.152 daniel 7905: else if (RAW == '&') {
1.77 daniel 7906: xmlParseReference(ctxt);
1.27 daniel 7907: }
1.72 daniel 7908:
1.27 daniel 7909: /*
7910: * Last case, text. Note that References are handled directly.
7911: */
7912: else {
1.45 daniel 7913: xmlParseCharData(ctxt, 0);
1.3 veillard 7914: }
1.14 veillard 7915:
1.91 daniel 7916: GROW;
1.14 veillard 7917: /*
1.45 daniel 7918: * Pop-up of finished entities.
1.14 veillard 7919: */
1.152 daniel 7920: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 7921: xmlPopInput(ctxt);
1.135 daniel 7922: SHRINK;
1.45 daniel 7923:
1.113 daniel 7924: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7925: (tok == ctxt->token)) {
1.55 daniel 7926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7927: ctxt->sax->error(ctxt->userData,
1.59 daniel 7928: "detected an error in element content\n");
1.123 daniel 7929: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.59 daniel 7930: ctxt->wellFormed = 0;
1.180 daniel 7931: ctxt->disableSAX = 1;
1.29 daniel 7932: break;
7933: }
1.3 veillard 7934: }
1.2 veillard 7935: }
7936:
1.50 daniel 7937: /**
7938: * xmlParseElement:
7939: * @ctxt: an XML parser context
7940: *
7941: * parse an XML element, this is highly recursive
1.26 daniel 7942: *
7943: * [39] element ::= EmptyElemTag | STag content ETag
7944: *
1.98 daniel 7945: * [ WFC: Element Type Match ]
7946: * The Name in an element's end-tag must match the element type in the
7947: * start-tag.
7948: *
7949: * [ VC: Element Valid ]
1.117 daniel 7950: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 7951: * where the Name matches the element type and one of the following holds:
7952: * - The declaration matches EMPTY and the element has no content.
7953: * - The declaration matches children and the sequence of child elements
7954: * belongs to the language generated by the regular expression in the
7955: * content model, with optional white space (characters matching the
7956: * nonterminal S) between each pair of child elements.
7957: * - The declaration matches Mixed and the content consists of character
7958: * data and child elements whose types match names in the content model.
7959: * - The declaration matches ANY, and the types of any child elements have
7960: * been declared.
1.2 veillard 7961: */
1.26 daniel 7962:
1.72 daniel 7963: void
1.69 daniel 7964: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 7965: const xmlChar *openTag = CUR_PTR;
7966: xmlChar *name;
1.140 daniel 7967: xmlChar *oldname;
1.32 daniel 7968: xmlParserNodeInfo node_info;
1.118 daniel 7969: xmlNodePtr ret;
1.2 veillard 7970:
1.32 daniel 7971: /* Capture start position */
1.118 daniel 7972: if (ctxt->record_info) {
7973: node_info.begin_pos = ctxt->input->consumed +
7974: (CUR_PTR - ctxt->input->base);
7975: node_info.begin_line = ctxt->input->line;
7976: }
1.32 daniel 7977:
1.176 daniel 7978: if (ctxt->spaceNr == 0)
7979: spacePush(ctxt, -1);
7980: else
7981: spacePush(ctxt, *ctxt->space);
7982:
1.83 daniel 7983: name = xmlParseStartTag(ctxt);
7984: if (name == NULL) {
1.176 daniel 7985: spacePop(ctxt);
1.83 daniel 7986: return;
7987: }
1.140 daniel 7988: namePush(ctxt, name);
1.118 daniel 7989: ret = ctxt->node;
1.2 veillard 7990:
7991: /*
1.99 daniel 7992: * [ VC: Root Element Type ]
7993: * The Name in the document type declaration must match the element
7994: * type of the root element.
7995: */
1.105 daniel 7996: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7997: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 7998: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 7999:
8000: /*
1.2 veillard 8001: * Check for an Empty Element.
8002: */
1.152 daniel 8003: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 8004: SKIP(2);
1.171 daniel 8005: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8006: (!ctxt->disableSAX))
1.83 daniel 8007: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 8008: oldname = namePop(ctxt);
1.176 daniel 8009: spacePop(ctxt);
1.140 daniel 8010: if (oldname != NULL) {
8011: #ifdef DEBUG_STACK
8012: fprintf(stderr,"Close: popped %s\n", oldname);
8013: #endif
8014: xmlFree(oldname);
1.211 veillard 8015: }
8016: if ( ret != NULL && ctxt->record_info ) {
8017: node_info.end_pos = ctxt->input->consumed +
8018: (CUR_PTR - ctxt->input->base);
8019: node_info.end_line = ctxt->input->line;
8020: node_info.node = ret;
8021: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 8022: }
1.72 daniel 8023: return;
1.2 veillard 8024: }
1.152 daniel 8025: if (RAW == '>') {
1.91 daniel 8026: NEXT;
8027: } else {
1.55 daniel 8028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8029: ctxt->sax->error(ctxt->userData,
8030: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 8031: openTag);
1.59 daniel 8032: ctxt->wellFormed = 0;
1.180 daniel 8033: ctxt->disableSAX = 1;
1.123 daniel 8034: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.45 daniel 8035:
8036: /*
8037: * end of parsing of this node.
8038: */
8039: nodePop(ctxt);
1.140 daniel 8040: oldname = namePop(ctxt);
1.176 daniel 8041: spacePop(ctxt);
1.140 daniel 8042: if (oldname != NULL) {
8043: #ifdef DEBUG_STACK
8044: fprintf(stderr,"Close: popped %s\n", oldname);
8045: #endif
8046: xmlFree(oldname);
8047: }
1.118 daniel 8048:
8049: /*
8050: * Capture end position and add node
8051: */
8052: if ( ret != NULL && ctxt->record_info ) {
8053: node_info.end_pos = ctxt->input->consumed +
8054: (CUR_PTR - ctxt->input->base);
8055: node_info.end_line = ctxt->input->line;
8056: node_info.node = ret;
8057: xmlParserAddNodeInfo(ctxt, &node_info);
8058: }
1.72 daniel 8059: return;
1.2 veillard 8060: }
8061:
8062: /*
8063: * Parse the content of the element:
8064: */
1.45 daniel 8065: xmlParseContent(ctxt);
1.153 daniel 8066: if (!IS_CHAR(RAW)) {
1.55 daniel 8067: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8068: ctxt->sax->error(ctxt->userData,
1.57 daniel 8069: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 8070: ctxt->wellFormed = 0;
1.180 daniel 8071: ctxt->disableSAX = 1;
1.123 daniel 8072: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.45 daniel 8073:
8074: /*
8075: * end of parsing of this node.
8076: */
8077: nodePop(ctxt);
1.140 daniel 8078: oldname = namePop(ctxt);
1.176 daniel 8079: spacePop(ctxt);
1.140 daniel 8080: if (oldname != NULL) {
8081: #ifdef DEBUG_STACK
8082: fprintf(stderr,"Close: popped %s\n", oldname);
8083: #endif
8084: xmlFree(oldname);
8085: }
1.72 daniel 8086: return;
1.2 veillard 8087: }
8088:
8089: /*
1.27 daniel 8090: * parse the end of tag: '</' should be here.
1.2 veillard 8091: */
1.140 daniel 8092: xmlParseEndTag(ctxt);
1.118 daniel 8093:
8094: /*
8095: * Capture end position and add node
8096: */
8097: if ( ret != NULL && ctxt->record_info ) {
8098: node_info.end_pos = ctxt->input->consumed +
8099: (CUR_PTR - ctxt->input->base);
8100: node_info.end_line = ctxt->input->line;
8101: node_info.node = ret;
8102: xmlParserAddNodeInfo(ctxt, &node_info);
8103: }
1.2 veillard 8104: }
8105:
1.50 daniel 8106: /**
8107: * xmlParseVersionNum:
8108: * @ctxt: an XML parser context
8109: *
8110: * parse the XML version value.
1.29 daniel 8111: *
8112: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 8113: *
8114: * Returns the string giving the XML version number, or NULL
1.29 daniel 8115: */
1.123 daniel 8116: xmlChar *
1.55 daniel 8117: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 8118: xmlChar *buf = NULL;
8119: int len = 0;
8120: int size = 10;
8121: xmlChar cur;
1.29 daniel 8122:
1.135 daniel 8123: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8124: if (buf == NULL) {
8125: fprintf(stderr, "malloc of %d byte failed\n", size);
8126: return(NULL);
8127: }
8128: cur = CUR;
1.152 daniel 8129: while (((cur >= 'a') && (cur <= 'z')) ||
8130: ((cur >= 'A') && (cur <= 'Z')) ||
8131: ((cur >= '0') && (cur <= '9')) ||
8132: (cur == '_') || (cur == '.') ||
8133: (cur == ':') || (cur == '-')) {
1.135 daniel 8134: if (len + 1 >= size) {
8135: size *= 2;
1.204 veillard 8136: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8137: if (buf == NULL) {
8138: fprintf(stderr, "realloc of %d byte failed\n", size);
8139: return(NULL);
8140: }
8141: }
8142: buf[len++] = cur;
8143: NEXT;
8144: cur=CUR;
8145: }
8146: buf[len] = 0;
8147: return(buf);
1.29 daniel 8148: }
8149:
1.50 daniel 8150: /**
8151: * xmlParseVersionInfo:
8152: * @ctxt: an XML parser context
8153: *
8154: * parse the XML version.
1.29 daniel 8155: *
8156: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8157: *
8158: * [25] Eq ::= S? '=' S?
1.50 daniel 8159: *
1.68 daniel 8160: * Returns the version string, e.g. "1.0"
1.29 daniel 8161: */
8162:
1.123 daniel 8163: xmlChar *
1.55 daniel 8164: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 8165: xmlChar *version = NULL;
8166: const xmlChar *q;
1.29 daniel 8167:
1.152 daniel 8168: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 8169: (NXT(2) == 'r') && (NXT(3) == 's') &&
8170: (NXT(4) == 'i') && (NXT(5) == 'o') &&
8171: (NXT(6) == 'n')) {
8172: SKIP(7);
1.42 daniel 8173: SKIP_BLANKS;
1.152 daniel 8174: if (RAW != '=') {
1.55 daniel 8175: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8176: ctxt->sax->error(ctxt->userData,
8177: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 8178: ctxt->wellFormed = 0;
1.180 daniel 8179: ctxt->disableSAX = 1;
1.123 daniel 8180: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8181: return(NULL);
8182: }
1.40 daniel 8183: NEXT;
1.42 daniel 8184: SKIP_BLANKS;
1.152 daniel 8185: if (RAW == '"') {
1.40 daniel 8186: NEXT;
8187: q = CUR_PTR;
1.29 daniel 8188: version = xmlParseVersionNum(ctxt);
1.152 daniel 8189: if (RAW != '"') {
1.55 daniel 8190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8191: ctxt->sax->error(ctxt->userData,
8192: "String not closed\n%.50s\n", q);
1.59 daniel 8193: ctxt->wellFormed = 0;
1.180 daniel 8194: ctxt->disableSAX = 1;
1.123 daniel 8195: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8196: } else
1.40 daniel 8197: NEXT;
1.152 daniel 8198: } else if (RAW == '\''){
1.40 daniel 8199: NEXT;
8200: q = CUR_PTR;
1.29 daniel 8201: version = xmlParseVersionNum(ctxt);
1.152 daniel 8202: if (RAW != '\'') {
1.55 daniel 8203: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8204: ctxt->sax->error(ctxt->userData,
8205: "String not closed\n%.50s\n", q);
1.123 daniel 8206: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8207: ctxt->wellFormed = 0;
1.180 daniel 8208: ctxt->disableSAX = 1;
1.55 daniel 8209: } else
1.40 daniel 8210: NEXT;
1.31 daniel 8211: } else {
1.55 daniel 8212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8213: ctxt->sax->error(ctxt->userData,
1.59 daniel 8214: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 8215: ctxt->wellFormed = 0;
1.180 daniel 8216: ctxt->disableSAX = 1;
1.123 daniel 8217: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8218: }
8219: }
8220: return(version);
8221: }
8222:
1.50 daniel 8223: /**
8224: * xmlParseEncName:
8225: * @ctxt: an XML parser context
8226: *
8227: * parse the XML encoding name
1.29 daniel 8228: *
8229: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 8230: *
1.68 daniel 8231: * Returns the encoding name value or NULL
1.29 daniel 8232: */
1.123 daniel 8233: xmlChar *
1.55 daniel 8234: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 8235: xmlChar *buf = NULL;
8236: int len = 0;
8237: int size = 10;
8238: xmlChar cur;
1.29 daniel 8239:
1.135 daniel 8240: cur = CUR;
8241: if (((cur >= 'a') && (cur <= 'z')) ||
8242: ((cur >= 'A') && (cur <= 'Z'))) {
8243: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8244: if (buf == NULL) {
8245: fprintf(stderr, "malloc of %d byte failed\n", size);
8246: return(NULL);
8247: }
8248:
8249: buf[len++] = cur;
1.40 daniel 8250: NEXT;
1.135 daniel 8251: cur = CUR;
1.152 daniel 8252: while (((cur >= 'a') && (cur <= 'z')) ||
8253: ((cur >= 'A') && (cur <= 'Z')) ||
8254: ((cur >= '0') && (cur <= '9')) ||
8255: (cur == '.') || (cur == '_') ||
8256: (cur == '-')) {
1.135 daniel 8257: if (len + 1 >= size) {
8258: size *= 2;
1.204 veillard 8259: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 8260: if (buf == NULL) {
8261: fprintf(stderr, "realloc of %d byte failed\n", size);
8262: return(NULL);
8263: }
8264: }
8265: buf[len++] = cur;
8266: NEXT;
8267: cur = CUR;
8268: if (cur == 0) {
8269: SHRINK;
8270: GROW;
8271: cur = CUR;
8272: }
8273: }
8274: buf[len] = 0;
1.29 daniel 8275: } else {
1.55 daniel 8276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8277: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 8278: ctxt->wellFormed = 0;
1.180 daniel 8279: ctxt->disableSAX = 1;
1.123 daniel 8280: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.29 daniel 8281: }
1.135 daniel 8282: return(buf);
1.29 daniel 8283: }
8284:
1.50 daniel 8285: /**
8286: * xmlParseEncodingDecl:
8287: * @ctxt: an XML parser context
8288: *
8289: * parse the XML encoding declaration
1.29 daniel 8290: *
8291: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 8292: *
8293: * TODO: this should setup the conversion filters.
8294: *
1.68 daniel 8295: * Returns the encoding value or NULL
1.29 daniel 8296: */
8297:
1.123 daniel 8298: xmlChar *
1.55 daniel 8299: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8300: xmlChar *encoding = NULL;
8301: const xmlChar *q;
1.29 daniel 8302:
1.42 daniel 8303: SKIP_BLANKS;
1.152 daniel 8304: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 8305: (NXT(2) == 'c') && (NXT(3) == 'o') &&
8306: (NXT(4) == 'd') && (NXT(5) == 'i') &&
8307: (NXT(6) == 'n') && (NXT(7) == 'g')) {
8308: SKIP(8);
1.42 daniel 8309: SKIP_BLANKS;
1.152 daniel 8310: if (RAW != '=') {
1.55 daniel 8311: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8312: ctxt->sax->error(ctxt->userData,
8313: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 8314: ctxt->wellFormed = 0;
1.180 daniel 8315: ctxt->disableSAX = 1;
1.123 daniel 8316: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.31 daniel 8317: return(NULL);
8318: }
1.40 daniel 8319: NEXT;
1.42 daniel 8320: SKIP_BLANKS;
1.152 daniel 8321: if (RAW == '"') {
1.40 daniel 8322: NEXT;
8323: q = CUR_PTR;
1.29 daniel 8324: encoding = xmlParseEncName(ctxt);
1.152 daniel 8325: if (RAW != '"') {
1.55 daniel 8326: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8327: ctxt->sax->error(ctxt->userData,
8328: "String not closed\n%.50s\n", q);
1.59 daniel 8329: ctxt->wellFormed = 0;
1.180 daniel 8330: ctxt->disableSAX = 1;
1.123 daniel 8331: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8332: } else
1.40 daniel 8333: NEXT;
1.152 daniel 8334: } else if (RAW == '\''){
1.40 daniel 8335: NEXT;
8336: q = CUR_PTR;
1.29 daniel 8337: encoding = xmlParseEncName(ctxt);
1.152 daniel 8338: if (RAW != '\'') {
1.55 daniel 8339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8340: ctxt->sax->error(ctxt->userData,
8341: "String not closed\n%.50s\n", q);
1.59 daniel 8342: ctxt->wellFormed = 0;
1.180 daniel 8343: ctxt->disableSAX = 1;
1.123 daniel 8344: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8345: } else
1.40 daniel 8346: NEXT;
1.152 daniel 8347: } else if (RAW == '"'){
1.55 daniel 8348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8349: ctxt->sax->error(ctxt->userData,
1.59 daniel 8350: "xmlParseEncodingDecl : expected ' or \"\n");
8351: ctxt->wellFormed = 0;
1.180 daniel 8352: ctxt->disableSAX = 1;
1.123 daniel 8353: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.29 daniel 8354: }
1.193 daniel 8355: if (encoding != NULL) {
8356: xmlCharEncoding enc;
8357: xmlCharEncodingHandlerPtr handler;
8358:
1.195 daniel 8359: if (ctxt->input->encoding != NULL)
8360: xmlFree((xmlChar *) ctxt->input->encoding);
8361: ctxt->input->encoding = encoding;
8362:
1.193 daniel 8363: enc = xmlParseCharEncoding((const char *) encoding);
8364: /*
8365: * registered set of known encodings
8366: */
8367: if (enc != XML_CHAR_ENCODING_ERROR) {
8368: xmlSwitchEncoding(ctxt, enc);
8369: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8370: xmlFree(encoding);
8371: return(NULL);
8372: }
8373: } else {
8374: /*
8375: * fallback for unknown encodings
8376: */
8377: handler = xmlFindCharEncodingHandler((const char *) encoding);
8378: if (handler != NULL) {
8379: xmlSwitchToEncoding(ctxt, handler);
8380: } else {
8381: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 8382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8383: ctxt->sax->error(ctxt->userData,
8384: "Unsupported encoding %s\n", encoding);
1.193 daniel 8385: return(NULL);
8386: }
8387: }
8388: }
1.29 daniel 8389: }
8390: return(encoding);
8391: }
8392:
1.50 daniel 8393: /**
8394: * xmlParseSDDecl:
8395: * @ctxt: an XML parser context
8396: *
8397: * parse the XML standalone declaration
1.29 daniel 8398: *
8399: * [32] SDDecl ::= S 'standalone' Eq
8400: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 8401: *
8402: * [ VC: Standalone Document Declaration ]
8403: * TODO The standalone document declaration must have the value "no"
8404: * if any external markup declarations contain declarations of:
8405: * - attributes with default values, if elements to which these
8406: * attributes apply appear in the document without specifications
8407: * of values for these attributes, or
8408: * - entities (other than amp, lt, gt, apos, quot), if references
8409: * to those entities appear in the document, or
8410: * - attributes with values subject to normalization, where the
8411: * attribute appears in the document with a value which will change
8412: * as a result of normalization, or
8413: * - element types with element content, if white space occurs directly
8414: * within any instance of those types.
1.68 daniel 8415: *
8416: * Returns 1 if standalone, 0 otherwise
1.29 daniel 8417: */
8418:
1.55 daniel 8419: int
8420: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 8421: int standalone = -1;
8422:
1.42 daniel 8423: SKIP_BLANKS;
1.152 daniel 8424: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 8425: (NXT(2) == 'a') && (NXT(3) == 'n') &&
8426: (NXT(4) == 'd') && (NXT(5) == 'a') &&
8427: (NXT(6) == 'l') && (NXT(7) == 'o') &&
8428: (NXT(8) == 'n') && (NXT(9) == 'e')) {
8429: SKIP(10);
1.81 daniel 8430: SKIP_BLANKS;
1.152 daniel 8431: if (RAW != '=') {
1.55 daniel 8432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8433: ctxt->sax->error(ctxt->userData,
1.59 daniel 8434: "XML standalone declaration : expected '='\n");
1.123 daniel 8435: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.59 daniel 8436: ctxt->wellFormed = 0;
1.180 daniel 8437: ctxt->disableSAX = 1;
1.32 daniel 8438: return(standalone);
8439: }
1.40 daniel 8440: NEXT;
1.42 daniel 8441: SKIP_BLANKS;
1.152 daniel 8442: if (RAW == '\''){
1.40 daniel 8443: NEXT;
1.152 daniel 8444: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8445: standalone = 0;
1.40 daniel 8446: SKIP(2);
1.152 daniel 8447: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8448: (NXT(2) == 's')) {
1.29 daniel 8449: standalone = 1;
1.40 daniel 8450: SKIP(3);
1.29 daniel 8451: } else {
1.55 daniel 8452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8453: ctxt->sax->error(ctxt->userData,
8454: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8455: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8456: ctxt->wellFormed = 0;
1.180 daniel 8457: ctxt->disableSAX = 1;
1.29 daniel 8458: }
1.152 daniel 8459: if (RAW != '\'') {
1.55 daniel 8460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8461: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.123 daniel 8462: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.59 daniel 8463: ctxt->wellFormed = 0;
1.180 daniel 8464: ctxt->disableSAX = 1;
1.55 daniel 8465: } else
1.40 daniel 8466: NEXT;
1.152 daniel 8467: } else if (RAW == '"'){
1.40 daniel 8468: NEXT;
1.152 daniel 8469: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 8470: standalone = 0;
1.40 daniel 8471: SKIP(2);
1.152 daniel 8472: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 8473: (NXT(2) == 's')) {
1.29 daniel 8474: standalone = 1;
1.40 daniel 8475: SKIP(3);
1.29 daniel 8476: } else {
1.55 daniel 8477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8478: ctxt->sax->error(ctxt->userData,
1.59 daniel 8479: "standalone accepts only 'yes' or 'no'\n");
1.123 daniel 8480: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.59 daniel 8481: ctxt->wellFormed = 0;
1.180 daniel 8482: ctxt->disableSAX = 1;
1.29 daniel 8483: }
1.152 daniel 8484: if (RAW != '"') {
1.55 daniel 8485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8486: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 8487: ctxt->wellFormed = 0;
1.180 daniel 8488: ctxt->disableSAX = 1;
1.123 daniel 8489: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 8490: } else
1.40 daniel 8491: NEXT;
1.37 daniel 8492: } else {
1.55 daniel 8493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8494: ctxt->sax->error(ctxt->userData,
8495: "Standalone value not found\n");
1.59 daniel 8496: ctxt->wellFormed = 0;
1.180 daniel 8497: ctxt->disableSAX = 1;
1.123 daniel 8498: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.37 daniel 8499: }
1.29 daniel 8500: }
8501: return(standalone);
8502: }
8503:
1.50 daniel 8504: /**
8505: * xmlParseXMLDecl:
8506: * @ctxt: an XML parser context
8507: *
8508: * parse an XML declaration header
1.29 daniel 8509: *
8510: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 8511: */
8512:
1.55 daniel 8513: void
8514: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 8515: xmlChar *version;
1.1 veillard 8516:
8517: /*
1.19 daniel 8518: * We know that '<?xml' is here.
1.1 veillard 8519: */
1.40 daniel 8520: SKIP(5);
1.1 veillard 8521:
1.153 daniel 8522: if (!IS_BLANK(RAW)) {
1.59 daniel 8523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8524: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.123 daniel 8525: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8526: ctxt->wellFormed = 0;
1.180 daniel 8527: ctxt->disableSAX = 1;
1.59 daniel 8528: }
1.42 daniel 8529: SKIP_BLANKS;
1.1 veillard 8530:
8531: /*
1.29 daniel 8532: * We should have the VersionInfo here.
1.1 veillard 8533: */
1.29 daniel 8534: version = xmlParseVersionInfo(ctxt);
8535: if (version == NULL)
1.45 daniel 8536: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 8537: ctxt->version = xmlStrdup(version);
1.119 daniel 8538: xmlFree(version);
1.29 daniel 8539:
8540: /*
8541: * We may have the encoding declaration
8542: */
1.153 daniel 8543: if (!IS_BLANK(RAW)) {
1.152 daniel 8544: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8545: SKIP(2);
8546: return;
8547: }
8548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8549: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.123 daniel 8550: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8551: ctxt->wellFormed = 0;
1.180 daniel 8552: ctxt->disableSAX = 1;
1.59 daniel 8553: }
1.195 daniel 8554: xmlParseEncodingDecl(ctxt);
1.193 daniel 8555: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8556: /*
8557: * The XML REC instructs us to stop parsing right here
8558: */
8559: return;
8560: }
1.1 veillard 8561:
8562: /*
1.29 daniel 8563: * We may have the standalone status.
1.1 veillard 8564: */
1.164 daniel 8565: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 8566: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 8567: SKIP(2);
8568: return;
8569: }
8570: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8571: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 8572: ctxt->wellFormed = 0;
1.180 daniel 8573: ctxt->disableSAX = 1;
1.123 daniel 8574: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 8575: }
8576: SKIP_BLANKS;
1.167 daniel 8577: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 8578:
1.42 daniel 8579: SKIP_BLANKS;
1.152 daniel 8580: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 8581: SKIP(2);
1.152 daniel 8582: } else if (RAW == '>') {
1.31 daniel 8583: /* Deprecated old WD ... */
1.55 daniel 8584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8585: ctxt->sax->error(ctxt->userData,
8586: "XML declaration must end-up with '?>'\n");
1.59 daniel 8587: ctxt->wellFormed = 0;
1.180 daniel 8588: ctxt->disableSAX = 1;
1.123 daniel 8589: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8590: NEXT;
1.29 daniel 8591: } else {
1.55 daniel 8592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 8593: ctxt->sax->error(ctxt->userData,
8594: "parsing XML declaration: '?>' expected\n");
1.59 daniel 8595: ctxt->wellFormed = 0;
1.180 daniel 8596: ctxt->disableSAX = 1;
1.123 daniel 8597: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.40 daniel 8598: MOVETO_ENDTAG(CUR_PTR);
8599: NEXT;
1.29 daniel 8600: }
1.1 veillard 8601: }
8602:
1.50 daniel 8603: /**
8604: * xmlParseMisc:
8605: * @ctxt: an XML parser context
8606: *
8607: * parse an XML Misc* optionnal field.
1.21 daniel 8608: *
1.22 daniel 8609: * [27] Misc ::= Comment | PI | S
1.1 veillard 8610: */
8611:
1.55 daniel 8612: void
8613: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 8614: while (((RAW == '<') && (NXT(1) == '?')) ||
8615: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8616: (NXT(2) == '-') && (NXT(3) == '-')) ||
8617: IS_BLANK(CUR)) {
1.152 daniel 8618: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 8619: xmlParsePI(ctxt);
1.40 daniel 8620: } else if (IS_BLANK(CUR)) {
8621: NEXT;
1.1 veillard 8622: } else
1.114 daniel 8623: xmlParseComment(ctxt);
1.1 veillard 8624: }
8625: }
8626:
1.50 daniel 8627: /**
1.181 daniel 8628: * xmlParseDocument:
1.50 daniel 8629: * @ctxt: an XML parser context
8630: *
8631: * parse an XML document (and build a tree if using the standard SAX
8632: * interface).
1.21 daniel 8633: *
1.22 daniel 8634: * [1] document ::= prolog element Misc*
1.29 daniel 8635: *
8636: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 8637: *
1.68 daniel 8638: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 8639: * as a result of the parsing.
1.1 veillard 8640: */
8641:
1.55 daniel 8642: int
8643: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 8644: xmlChar start[4];
8645: xmlCharEncoding enc;
8646:
1.45 daniel 8647: xmlDefaultSAXHandlerInit();
8648:
1.91 daniel 8649: GROW;
8650:
1.14 veillard 8651: /*
1.44 daniel 8652: * SAX: beginning of the document processing.
8653: */
1.72 daniel 8654: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 8655: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 8656:
1.156 daniel 8657: /*
8658: * Get the 4 first bytes and decode the charset
8659: * if enc != XML_CHAR_ENCODING_NONE
8660: * plug some encoding conversion routines.
8661: */
8662: start[0] = RAW;
8663: start[1] = NXT(1);
8664: start[2] = NXT(2);
8665: start[3] = NXT(3);
8666: enc = xmlDetectCharEncoding(start, 4);
8667: if (enc != XML_CHAR_ENCODING_NONE) {
8668: xmlSwitchEncoding(ctxt, enc);
8669: }
8670:
1.1 veillard 8671:
1.59 daniel 8672: if (CUR == 0) {
8673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8674: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.123 daniel 8675: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8676: ctxt->wellFormed = 0;
1.180 daniel 8677: ctxt->disableSAX = 1;
1.59 daniel 8678: }
1.1 veillard 8679:
8680: /*
8681: * Check for the XMLDecl in the Prolog.
8682: */
1.91 daniel 8683: GROW;
1.152 daniel 8684: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 8685: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 8686: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 8687:
8688: /*
8689: * Note that we will switch encoding on the fly.
8690: */
1.19 daniel 8691: xmlParseXMLDecl(ctxt);
1.193 daniel 8692: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8693: /*
8694: * The XML REC instructs us to stop parsing right here
8695: */
8696: return(-1);
8697: }
1.167 daniel 8698: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 8699: SKIP_BLANKS;
1.1 veillard 8700: } else {
1.72 daniel 8701: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 8702: }
1.171 daniel 8703: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 8704: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 8705:
8706: /*
8707: * The Misc part of the Prolog
8708: */
1.91 daniel 8709: GROW;
1.16 daniel 8710: xmlParseMisc(ctxt);
1.1 veillard 8711:
8712: /*
1.29 daniel 8713: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 8714: * (doctypedecl Misc*)?
8715: */
1.91 daniel 8716: GROW;
1.152 daniel 8717: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 8718: (NXT(2) == 'D') && (NXT(3) == 'O') &&
8719: (NXT(4) == 'C') && (NXT(5) == 'T') &&
8720: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
8721: (NXT(8) == 'E')) {
1.165 daniel 8722:
1.166 daniel 8723: ctxt->inSubset = 1;
1.22 daniel 8724: xmlParseDocTypeDecl(ctxt);
1.152 daniel 8725: if (RAW == '[') {
1.140 daniel 8726: ctxt->instate = XML_PARSER_DTD;
8727: xmlParseInternalSubset(ctxt);
8728: }
1.165 daniel 8729:
8730: /*
8731: * Create and update the external subset.
8732: */
1.166 daniel 8733: ctxt->inSubset = 2;
1.171 daniel 8734: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8735: (!ctxt->disableSAX))
1.165 daniel 8736: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8737: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 8738: ctxt->inSubset = 0;
1.165 daniel 8739:
8740:
1.96 daniel 8741: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 8742: xmlParseMisc(ctxt);
1.21 daniel 8743: }
8744:
8745: /*
8746: * Time to start parsing the tree itself
1.1 veillard 8747: */
1.91 daniel 8748: GROW;
1.152 daniel 8749: if (RAW != '<') {
1.59 daniel 8750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 8751: ctxt->sax->error(ctxt->userData,
1.151 daniel 8752: "Start tag expected, '<' not found\n");
1.140 daniel 8753: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 8754: ctxt->wellFormed = 0;
1.180 daniel 8755: ctxt->disableSAX = 1;
1.140 daniel 8756: ctxt->instate = XML_PARSER_EOF;
8757: } else {
8758: ctxt->instate = XML_PARSER_CONTENT;
8759: xmlParseElement(ctxt);
8760: ctxt->instate = XML_PARSER_EPILOG;
8761:
8762:
8763: /*
8764: * The Misc part at the end
8765: */
8766: xmlParseMisc(ctxt);
8767:
1.152 daniel 8768: if (RAW != 0) {
1.140 daniel 8769: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8770: ctxt->sax->error(ctxt->userData,
8771: "Extra content at the end of the document\n");
8772: ctxt->wellFormed = 0;
1.180 daniel 8773: ctxt->disableSAX = 1;
1.140 daniel 8774: ctxt->errNo = XML_ERR_DOCUMENT_END;
8775: }
8776: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 8777: }
8778:
1.44 daniel 8779: /*
8780: * SAX: end of the document processing.
8781: */
1.171 daniel 8782: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8783: (!ctxt->disableSAX))
1.74 daniel 8784: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 8785:
1.59 daniel 8786: if (! ctxt->wellFormed) return(-1);
1.16 daniel 8787: return(0);
8788: }
8789:
1.98 daniel 8790: /************************************************************************
8791: * *
1.128 daniel 8792: * Progressive parsing interfaces *
8793: * *
8794: ************************************************************************/
8795:
8796: /**
8797: * xmlParseLookupSequence:
8798: * @ctxt: an XML parser context
8799: * @first: the first char to lookup
1.140 daniel 8800: * @next: the next char to lookup or zero
8801: * @third: the next char to lookup or zero
1.128 daniel 8802: *
1.140 daniel 8803: * Try to find if a sequence (first, next, third) or just (first next) or
8804: * (first) is available in the input stream.
8805: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8806: * to avoid rescanning sequences of bytes, it DOES change the state of the
8807: * parser, do not use liberally.
1.128 daniel 8808: *
1.140 daniel 8809: * Returns the index to the current parsing point if the full sequence
8810: * is available, -1 otherwise.
1.128 daniel 8811: */
8812: int
1.140 daniel 8813: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8814: xmlChar next, xmlChar third) {
8815: int base, len;
8816: xmlParserInputPtr in;
8817: const xmlChar *buf;
8818:
8819: in = ctxt->input;
8820: if (in == NULL) return(-1);
8821: base = in->cur - in->base;
8822: if (base < 0) return(-1);
8823: if (ctxt->checkIndex > base)
8824: base = ctxt->checkIndex;
8825: if (in->buf == NULL) {
8826: buf = in->base;
8827: len = in->length;
8828: } else {
8829: buf = in->buf->buffer->content;
8830: len = in->buf->buffer->use;
8831: }
8832: /* take into account the sequence length */
8833: if (third) len -= 2;
8834: else if (next) len --;
8835: for (;base < len;base++) {
8836: if (buf[base] == first) {
8837: if (third != 0) {
8838: if ((buf[base + 1] != next) ||
8839: (buf[base + 2] != third)) continue;
8840: } else if (next != 0) {
8841: if (buf[base + 1] != next) continue;
8842: }
8843: ctxt->checkIndex = 0;
8844: #ifdef DEBUG_PUSH
8845: if (next == 0)
8846: fprintf(stderr, "PP: lookup '%c' found at %d\n",
8847: first, base);
8848: else if (third == 0)
8849: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
8850: first, next, base);
8851: else
8852: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
8853: first, next, third, base);
8854: #endif
8855: return(base - (in->cur - in->base));
8856: }
8857: }
8858: ctxt->checkIndex = base;
8859: #ifdef DEBUG_PUSH
8860: if (next == 0)
8861: fprintf(stderr, "PP: lookup '%c' failed\n", first);
8862: else if (third == 0)
8863: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
8864: else
8865: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
8866: #endif
8867: return(-1);
1.128 daniel 8868: }
8869:
8870: /**
1.143 daniel 8871: * xmlParseTryOrFinish:
1.128 daniel 8872: * @ctxt: an XML parser context
1.143 daniel 8873: * @terminate: last chunk indicator
1.128 daniel 8874: *
8875: * Try to progress on parsing
8876: *
8877: * Returns zero if no parsing was possible
8878: */
8879: int
1.143 daniel 8880: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 8881: int ret = 0;
1.140 daniel 8882: int avail;
8883: xmlChar cur, next;
8884:
8885: #ifdef DEBUG_PUSH
8886: switch (ctxt->instate) {
8887: case XML_PARSER_EOF:
8888: fprintf(stderr, "PP: try EOF\n"); break;
8889: case XML_PARSER_START:
8890: fprintf(stderr, "PP: try START\n"); break;
8891: case XML_PARSER_MISC:
8892: fprintf(stderr, "PP: try MISC\n");break;
8893: case XML_PARSER_COMMENT:
8894: fprintf(stderr, "PP: try COMMENT\n");break;
8895: case XML_PARSER_PROLOG:
8896: fprintf(stderr, "PP: try PROLOG\n");break;
8897: case XML_PARSER_START_TAG:
8898: fprintf(stderr, "PP: try START_TAG\n");break;
8899: case XML_PARSER_CONTENT:
8900: fprintf(stderr, "PP: try CONTENT\n");break;
8901: case XML_PARSER_CDATA_SECTION:
8902: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
8903: case XML_PARSER_END_TAG:
8904: fprintf(stderr, "PP: try END_TAG\n");break;
8905: case XML_PARSER_ENTITY_DECL:
8906: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
8907: case XML_PARSER_ENTITY_VALUE:
8908: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
8909: case XML_PARSER_ATTRIBUTE_VALUE:
8910: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
8911: case XML_PARSER_DTD:
8912: fprintf(stderr, "PP: try DTD\n");break;
8913: case XML_PARSER_EPILOG:
8914: fprintf(stderr, "PP: try EPILOG\n");break;
8915: case XML_PARSER_PI:
8916: fprintf(stderr, "PP: try PI\n");break;
8917: }
8918: #endif
1.128 daniel 8919:
8920: while (1) {
1.140 daniel 8921: /*
8922: * Pop-up of finished entities.
8923: */
1.152 daniel 8924: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 8925: xmlPopInput(ctxt);
8926:
1.184 daniel 8927: if (ctxt->input ==NULL) break;
8928: if (ctxt->input->buf == NULL)
8929: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8930: else
1.184 daniel 8931: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8932: if (avail < 1)
8933: goto done;
1.128 daniel 8934: switch (ctxt->instate) {
8935: case XML_PARSER_EOF:
1.140 daniel 8936: /*
8937: * Document parsing is done !
8938: */
8939: goto done;
8940: case XML_PARSER_START:
8941: /*
8942: * Very first chars read from the document flow.
8943: */
1.184 daniel 8944: cur = ctxt->input->cur[0];
1.140 daniel 8945: if (IS_BLANK(cur)) {
8946: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8947: ctxt->sax->setDocumentLocator(ctxt->userData,
8948: &xmlDefaultSAXLocator);
8949: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8950: ctxt->sax->error(ctxt->userData,
8951: "Extra spaces at the beginning of the document are not allowed\n");
8952: ctxt->errNo = XML_ERR_DOCUMENT_START;
8953: ctxt->wellFormed = 0;
1.180 daniel 8954: ctxt->disableSAX = 1;
1.140 daniel 8955: SKIP_BLANKS;
8956: ret++;
1.184 daniel 8957: if (ctxt->input->buf == NULL)
8958: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8959: else
1.184 daniel 8960: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 8961: }
8962: if (avail < 2)
8963: goto done;
8964:
1.184 daniel 8965: cur = ctxt->input->cur[0];
8966: next = ctxt->input->cur[1];
1.140 daniel 8967: if (cur == 0) {
8968: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8969: ctxt->sax->setDocumentLocator(ctxt->userData,
8970: &xmlDefaultSAXLocator);
8971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8972: ctxt->sax->error(ctxt->userData, "Document is empty\n");
8973: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8974: ctxt->wellFormed = 0;
1.180 daniel 8975: ctxt->disableSAX = 1;
1.140 daniel 8976: ctxt->instate = XML_PARSER_EOF;
8977: #ifdef DEBUG_PUSH
8978: fprintf(stderr, "PP: entering EOF\n");
8979: #endif
8980: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8981: ctxt->sax->endDocument(ctxt->userData);
8982: goto done;
8983: }
8984: if ((cur == '<') && (next == '?')) {
8985: /* PI or XML decl */
8986: if (avail < 5) return(ret);
1.143 daniel 8987: if ((!terminate) &&
8988: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 8989: return(ret);
8990: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8991: ctxt->sax->setDocumentLocator(ctxt->userData,
8992: &xmlDefaultSAXLocator);
1.184 daniel 8993: if ((ctxt->input->cur[2] == 'x') &&
8994: (ctxt->input->cur[3] == 'm') &&
8995: (ctxt->input->cur[4] == 'l') &&
8996: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 8997: ret += 5;
8998: #ifdef DEBUG_PUSH
8999: fprintf(stderr, "PP: Parsing XML Decl\n");
9000: #endif
9001: xmlParseXMLDecl(ctxt);
1.193 daniel 9002: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9003: /*
9004: * The XML REC instructs us to stop parsing right
9005: * here
9006: */
9007: ctxt->instate = XML_PARSER_EOF;
9008: return(0);
9009: }
1.167 daniel 9010: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 9011: if ((ctxt->encoding == NULL) &&
9012: (ctxt->input->encoding != NULL))
9013: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 9014: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9015: (!ctxt->disableSAX))
1.140 daniel 9016: ctxt->sax->startDocument(ctxt->userData);
9017: ctxt->instate = XML_PARSER_MISC;
9018: #ifdef DEBUG_PUSH
9019: fprintf(stderr, "PP: entering MISC\n");
9020: #endif
9021: } else {
9022: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9023: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9024: (!ctxt->disableSAX))
1.140 daniel 9025: ctxt->sax->startDocument(ctxt->userData);
9026: ctxt->instate = XML_PARSER_MISC;
9027: #ifdef DEBUG_PUSH
9028: fprintf(stderr, "PP: entering MISC\n");
9029: #endif
9030: }
9031: } else {
9032: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9033: ctxt->sax->setDocumentLocator(ctxt->userData,
9034: &xmlDefaultSAXLocator);
9035: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 9036: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9037: (!ctxt->disableSAX))
1.140 daniel 9038: ctxt->sax->startDocument(ctxt->userData);
9039: ctxt->instate = XML_PARSER_MISC;
9040: #ifdef DEBUG_PUSH
9041: fprintf(stderr, "PP: entering MISC\n");
9042: #endif
9043: }
9044: break;
9045: case XML_PARSER_MISC:
9046: SKIP_BLANKS;
1.184 daniel 9047: if (ctxt->input->buf == NULL)
9048: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9049: else
1.184 daniel 9050: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9051: if (avail < 2)
9052: goto done;
1.184 daniel 9053: cur = ctxt->input->cur[0];
9054: next = ctxt->input->cur[1];
1.140 daniel 9055: if ((cur == '<') && (next == '?')) {
1.143 daniel 9056: if ((!terminate) &&
9057: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9058: goto done;
9059: #ifdef DEBUG_PUSH
9060: fprintf(stderr, "PP: Parsing PI\n");
9061: #endif
9062: xmlParsePI(ctxt);
9063: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9064: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9065: if ((!terminate) &&
9066: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9067: goto done;
9068: #ifdef DEBUG_PUSH
9069: fprintf(stderr, "PP: Parsing Comment\n");
9070: #endif
9071: xmlParseComment(ctxt);
9072: ctxt->instate = XML_PARSER_MISC;
9073: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9074: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9075: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9076: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9077: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 9078: if ((!terminate) &&
9079: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9080: goto done;
9081: #ifdef DEBUG_PUSH
9082: fprintf(stderr, "PP: Parsing internal subset\n");
9083: #endif
1.166 daniel 9084: ctxt->inSubset = 1;
1.140 daniel 9085: xmlParseDocTypeDecl(ctxt);
1.152 daniel 9086: if (RAW == '[') {
1.140 daniel 9087: ctxt->instate = XML_PARSER_DTD;
9088: #ifdef DEBUG_PUSH
9089: fprintf(stderr, "PP: entering DTD\n");
9090: #endif
9091: } else {
1.166 daniel 9092: /*
9093: * Create and update the external subset.
9094: */
9095: ctxt->inSubset = 2;
1.171 daniel 9096: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9097: (ctxt->sax->externalSubset != NULL))
9098: ctxt->sax->externalSubset(ctxt->userData,
9099: ctxt->intSubName, ctxt->extSubSystem,
9100: ctxt->extSubURI);
9101: ctxt->inSubset = 0;
1.140 daniel 9102: ctxt->instate = XML_PARSER_PROLOG;
9103: #ifdef DEBUG_PUSH
9104: fprintf(stderr, "PP: entering PROLOG\n");
9105: #endif
9106: }
9107: } else if ((cur == '<') && (next == '!') &&
9108: (avail < 9)) {
9109: goto done;
9110: } else {
9111: ctxt->instate = XML_PARSER_START_TAG;
9112: #ifdef DEBUG_PUSH
9113: fprintf(stderr, "PP: entering START_TAG\n");
9114: #endif
9115: }
9116: break;
1.128 daniel 9117: case XML_PARSER_PROLOG:
1.140 daniel 9118: SKIP_BLANKS;
1.184 daniel 9119: if (ctxt->input->buf == NULL)
9120: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9121: else
1.184 daniel 9122: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9123: if (avail < 2)
9124: goto done;
1.184 daniel 9125: cur = ctxt->input->cur[0];
9126: next = ctxt->input->cur[1];
1.140 daniel 9127: if ((cur == '<') && (next == '?')) {
1.143 daniel 9128: if ((!terminate) &&
9129: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9130: goto done;
9131: #ifdef DEBUG_PUSH
9132: fprintf(stderr, "PP: Parsing PI\n");
9133: #endif
9134: xmlParsePI(ctxt);
9135: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9136: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9137: if ((!terminate) &&
9138: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9139: goto done;
9140: #ifdef DEBUG_PUSH
9141: fprintf(stderr, "PP: Parsing Comment\n");
9142: #endif
9143: xmlParseComment(ctxt);
9144: ctxt->instate = XML_PARSER_PROLOG;
9145: } else if ((cur == '<') && (next == '!') &&
9146: (avail < 4)) {
9147: goto done;
9148: } else {
9149: ctxt->instate = XML_PARSER_START_TAG;
9150: #ifdef DEBUG_PUSH
9151: fprintf(stderr, "PP: entering START_TAG\n");
9152: #endif
9153: }
9154: break;
9155: case XML_PARSER_EPILOG:
9156: SKIP_BLANKS;
1.184 daniel 9157: if (ctxt->input->buf == NULL)
9158: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9159: else
1.184 daniel 9160: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 9161: if (avail < 2)
9162: goto done;
1.184 daniel 9163: cur = ctxt->input->cur[0];
9164: next = ctxt->input->cur[1];
1.140 daniel 9165: if ((cur == '<') && (next == '?')) {
1.143 daniel 9166: if ((!terminate) &&
9167: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9168: goto done;
9169: #ifdef DEBUG_PUSH
9170: fprintf(stderr, "PP: Parsing PI\n");
9171: #endif
9172: xmlParsePI(ctxt);
9173: ctxt->instate = XML_PARSER_EPILOG;
9174: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9175: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9176: if ((!terminate) &&
9177: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9178: goto done;
9179: #ifdef DEBUG_PUSH
9180: fprintf(stderr, "PP: Parsing Comment\n");
9181: #endif
9182: xmlParseComment(ctxt);
9183: ctxt->instate = XML_PARSER_EPILOG;
9184: } else if ((cur == '<') && (next == '!') &&
9185: (avail < 4)) {
9186: goto done;
9187: } else {
9188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9189: ctxt->sax->error(ctxt->userData,
9190: "Extra content at the end of the document\n");
9191: ctxt->wellFormed = 0;
1.180 daniel 9192: ctxt->disableSAX = 1;
1.140 daniel 9193: ctxt->errNo = XML_ERR_DOCUMENT_END;
9194: ctxt->instate = XML_PARSER_EOF;
9195: #ifdef DEBUG_PUSH
9196: fprintf(stderr, "PP: entering EOF\n");
9197: #endif
1.171 daniel 9198: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9199: (!ctxt->disableSAX))
1.140 daniel 9200: ctxt->sax->endDocument(ctxt->userData);
9201: goto done;
9202: }
9203: break;
9204: case XML_PARSER_START_TAG: {
9205: xmlChar *name, *oldname;
9206:
1.184 daniel 9207: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9208: goto done;
1.184 daniel 9209: cur = ctxt->input->cur[0];
1.140 daniel 9210: if (cur != '<') {
9211: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9212: ctxt->sax->error(ctxt->userData,
9213: "Start tag expect, '<' not found\n");
9214: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9215: ctxt->wellFormed = 0;
1.180 daniel 9216: ctxt->disableSAX = 1;
1.140 daniel 9217: ctxt->instate = XML_PARSER_EOF;
9218: #ifdef DEBUG_PUSH
9219: fprintf(stderr, "PP: entering EOF\n");
9220: #endif
1.171 daniel 9221: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9222: (!ctxt->disableSAX))
1.140 daniel 9223: ctxt->sax->endDocument(ctxt->userData);
9224: goto done;
9225: }
1.143 daniel 9226: if ((!terminate) &&
9227: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9228: goto done;
1.176 daniel 9229: if (ctxt->spaceNr == 0)
9230: spacePush(ctxt, -1);
9231: else
9232: spacePush(ctxt, *ctxt->space);
1.140 daniel 9233: name = xmlParseStartTag(ctxt);
9234: if (name == NULL) {
1.176 daniel 9235: spacePop(ctxt);
1.140 daniel 9236: ctxt->instate = XML_PARSER_EOF;
9237: #ifdef DEBUG_PUSH
9238: fprintf(stderr, "PP: entering EOF\n");
9239: #endif
1.171 daniel 9240: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9241: (!ctxt->disableSAX))
1.140 daniel 9242: ctxt->sax->endDocument(ctxt->userData);
9243: goto done;
9244: }
9245: namePush(ctxt, xmlStrdup(name));
9246:
9247: /*
9248: * [ VC: Root Element Type ]
9249: * The Name in the document type declaration must match
9250: * the element type of the root element.
9251: */
9252: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 9253: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 9254: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9255:
9256: /*
9257: * Check for an Empty Element.
9258: */
1.152 daniel 9259: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 9260: SKIP(2);
1.171 daniel 9261: if ((ctxt->sax != NULL) &&
9262: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 9263: ctxt->sax->endElement(ctxt->userData, name);
9264: xmlFree(name);
9265: oldname = namePop(ctxt);
1.176 daniel 9266: spacePop(ctxt);
1.140 daniel 9267: if (oldname != NULL) {
9268: #ifdef DEBUG_STACK
9269: fprintf(stderr,"Close: popped %s\n", oldname);
9270: #endif
9271: xmlFree(oldname);
9272: }
9273: if (ctxt->name == NULL) {
9274: ctxt->instate = XML_PARSER_EPILOG;
9275: #ifdef DEBUG_PUSH
9276: fprintf(stderr, "PP: entering EPILOG\n");
9277: #endif
9278: } else {
9279: ctxt->instate = XML_PARSER_CONTENT;
9280: #ifdef DEBUG_PUSH
9281: fprintf(stderr, "PP: entering CONTENT\n");
9282: #endif
9283: }
9284: break;
9285: }
1.152 daniel 9286: if (RAW == '>') {
1.140 daniel 9287: NEXT;
9288: } else {
9289: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9290: ctxt->sax->error(ctxt->userData,
9291: "Couldn't find end of Start Tag %s\n",
9292: name);
9293: ctxt->wellFormed = 0;
1.180 daniel 9294: ctxt->disableSAX = 1;
1.140 daniel 9295: ctxt->errNo = XML_ERR_GT_REQUIRED;
9296:
9297: /*
9298: * end of parsing of this node.
9299: */
9300: nodePop(ctxt);
9301: oldname = namePop(ctxt);
1.176 daniel 9302: spacePop(ctxt);
1.140 daniel 9303: if (oldname != NULL) {
9304: #ifdef DEBUG_STACK
9305: fprintf(stderr,"Close: popped %s\n", oldname);
9306: #endif
9307: xmlFree(oldname);
9308: }
9309: }
9310: xmlFree(name);
9311: ctxt->instate = XML_PARSER_CONTENT;
9312: #ifdef DEBUG_PUSH
9313: fprintf(stderr, "PP: entering CONTENT\n");
9314: #endif
9315: break;
9316: }
1.128 daniel 9317: case XML_PARSER_CONTENT:
1.140 daniel 9318: /*
9319: * Handle preparsed entities and charRef
9320: */
9321: if (ctxt->token != 0) {
9322: xmlChar cur[2] = { 0 , 0 } ;
9323:
9324: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 9325: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9326: (ctxt->sax->characters != NULL))
1.140 daniel 9327: ctxt->sax->characters(ctxt->userData, cur, 1);
9328: ctxt->token = 0;
9329: }
1.184 daniel 9330: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 9331: goto done;
1.184 daniel 9332: cur = ctxt->input->cur[0];
9333: next = ctxt->input->cur[1];
1.140 daniel 9334: if ((cur == '<') && (next == '?')) {
1.143 daniel 9335: if ((!terminate) &&
9336: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 9337: goto done;
9338: #ifdef DEBUG_PUSH
9339: fprintf(stderr, "PP: Parsing PI\n");
9340: #endif
9341: xmlParsePI(ctxt);
9342: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 9343: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 9344: if ((!terminate) &&
9345: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 9346: goto done;
9347: #ifdef DEBUG_PUSH
9348: fprintf(stderr, "PP: Parsing Comment\n");
9349: #endif
9350: xmlParseComment(ctxt);
9351: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 9352: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9353: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9354: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9355: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9356: (ctxt->input->cur[8] == '[')) {
1.140 daniel 9357: SKIP(9);
9358: ctxt->instate = XML_PARSER_CDATA_SECTION;
9359: #ifdef DEBUG_PUSH
9360: fprintf(stderr, "PP: entering CDATA_SECTION\n");
9361: #endif
9362: break;
9363: } else if ((cur == '<') && (next == '!') &&
9364: (avail < 9)) {
9365: goto done;
9366: } else if ((cur == '<') && (next == '/')) {
9367: ctxt->instate = XML_PARSER_END_TAG;
9368: #ifdef DEBUG_PUSH
9369: fprintf(stderr, "PP: entering END_TAG\n");
9370: #endif
9371: break;
9372: } else if (cur == '<') {
9373: ctxt->instate = XML_PARSER_START_TAG;
9374: #ifdef DEBUG_PUSH
9375: fprintf(stderr, "PP: entering START_TAG\n");
9376: #endif
9377: break;
9378: } else if (cur == '&') {
1.143 daniel 9379: if ((!terminate) &&
9380: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 9381: goto done;
9382: #ifdef DEBUG_PUSH
9383: fprintf(stderr, "PP: Parsing Reference\n");
9384: #endif
9385: /* TODO: check generation of subtrees if noent !!! */
9386: xmlParseReference(ctxt);
9387: } else {
1.156 daniel 9388: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 9389: /*
1.181 daniel 9390: * Goal of the following test is:
1.140 daniel 9391: * - minimize calls to the SAX 'character' callback
9392: * when they are mergeable
9393: * - handle an problem for isBlank when we only parse
9394: * a sequence of blank chars and the next one is
9395: * not available to check against '<' presence.
9396: * - tries to homogenize the differences in SAX
9397: * callbacks beween the push and pull versions
9398: * of the parser.
9399: */
9400: if ((ctxt->inputNr == 1) &&
9401: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 9402: if ((!terminate) &&
9403: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 9404: goto done;
9405: }
9406: ctxt->checkIndex = 0;
9407: #ifdef DEBUG_PUSH
9408: fprintf(stderr, "PP: Parsing char data\n");
9409: #endif
9410: xmlParseCharData(ctxt, 0);
9411: }
9412: /*
9413: * Pop-up of finished entities.
9414: */
1.152 daniel 9415: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 9416: xmlPopInput(ctxt);
9417: break;
9418: case XML_PARSER_CDATA_SECTION: {
9419: /*
9420: * The Push mode need to have the SAX callback for
9421: * cdataBlock merge back contiguous callbacks.
9422: */
9423: int base;
9424:
9425: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9426: if (base < 0) {
9427: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 9428: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 9429: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 9430: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 9431: XML_PARSER_BIG_BUFFER_SIZE);
9432: }
9433: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9434: ctxt->checkIndex = 0;
9435: }
9436: goto done;
9437: } else {
1.171 daniel 9438: if ((ctxt->sax != NULL) && (base > 0) &&
9439: (!ctxt->disableSAX)) {
1.140 daniel 9440: if (ctxt->sax->cdataBlock != NULL)
9441: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 9442: ctxt->input->cur, base);
1.140 daniel 9443: }
9444: SKIP(base + 3);
9445: ctxt->checkIndex = 0;
9446: ctxt->instate = XML_PARSER_CONTENT;
9447: #ifdef DEBUG_PUSH
9448: fprintf(stderr, "PP: entering CONTENT\n");
9449: #endif
9450: }
9451: break;
9452: }
1.141 daniel 9453: case XML_PARSER_END_TAG:
1.140 daniel 9454: if (avail < 2)
9455: goto done;
1.143 daniel 9456: if ((!terminate) &&
9457: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 9458: goto done;
9459: xmlParseEndTag(ctxt);
9460: if (ctxt->name == NULL) {
9461: ctxt->instate = XML_PARSER_EPILOG;
9462: #ifdef DEBUG_PUSH
9463: fprintf(stderr, "PP: entering EPILOG\n");
9464: #endif
9465: } else {
9466: ctxt->instate = XML_PARSER_CONTENT;
9467: #ifdef DEBUG_PUSH
9468: fprintf(stderr, "PP: entering CONTENT\n");
9469: #endif
9470: }
9471: break;
9472: case XML_PARSER_DTD: {
9473: /*
9474: * Sorry but progressive parsing of the internal subset
9475: * is not expected to be supported. We first check that
9476: * the full content of the internal subset is available and
9477: * the parsing is launched only at that point.
9478: * Internal subset ends up with "']' S? '>'" in an unescaped
9479: * section and not in a ']]>' sequence which are conditional
9480: * sections (whoever argued to keep that crap in XML deserve
9481: * a place in hell !).
9482: */
9483: int base, i;
9484: xmlChar *buf;
9485: xmlChar quote = 0;
9486:
1.184 daniel 9487: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 9488: if (base < 0) return(0);
9489: if (ctxt->checkIndex > base)
9490: base = ctxt->checkIndex;
1.184 daniel 9491: buf = ctxt->input->buf->buffer->content;
1.202 daniel 9492: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9493: base++) {
1.140 daniel 9494: if (quote != 0) {
9495: if (buf[base] == quote)
9496: quote = 0;
9497: continue;
9498: }
9499: if (buf[base] == '"') {
9500: quote = '"';
9501: continue;
9502: }
9503: if (buf[base] == '\'') {
9504: quote = '\'';
9505: continue;
9506: }
9507: if (buf[base] == ']') {
1.202 daniel 9508: if ((unsigned int) base +1 >=
9509: ctxt->input->buf->buffer->use)
1.140 daniel 9510: break;
9511: if (buf[base + 1] == ']') {
9512: /* conditional crap, skip both ']' ! */
9513: base++;
9514: continue;
9515: }
1.202 daniel 9516: for (i = 0;
9517: (unsigned int) base + i < ctxt->input->buf->buffer->use;
9518: i++) {
1.140 daniel 9519: if (buf[base + i] == '>')
9520: goto found_end_int_subset;
9521: }
9522: break;
9523: }
9524: }
9525: /*
9526: * We didn't found the end of the Internal subset
9527: */
9528: if (quote == 0)
9529: ctxt->checkIndex = base;
9530: #ifdef DEBUG_PUSH
9531: if (next == 0)
9532: fprintf(stderr, "PP: lookup of int subset end filed\n");
9533: #endif
9534: goto done;
9535:
9536: found_end_int_subset:
9537: xmlParseInternalSubset(ctxt);
1.166 daniel 9538: ctxt->inSubset = 2;
1.171 daniel 9539: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 9540: (ctxt->sax->externalSubset != NULL))
9541: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9542: ctxt->extSubSystem, ctxt->extSubURI);
9543: ctxt->inSubset = 0;
1.140 daniel 9544: ctxt->instate = XML_PARSER_PROLOG;
9545: ctxt->checkIndex = 0;
9546: #ifdef DEBUG_PUSH
9547: fprintf(stderr, "PP: entering PROLOG\n");
9548: #endif
9549: break;
9550: }
9551: case XML_PARSER_COMMENT:
9552: fprintf(stderr, "PP: internal error, state == COMMENT\n");
9553: ctxt->instate = XML_PARSER_CONTENT;
9554: #ifdef DEBUG_PUSH
9555: fprintf(stderr, "PP: entering CONTENT\n");
9556: #endif
9557: break;
9558: case XML_PARSER_PI:
9559: fprintf(stderr, "PP: internal error, state == PI\n");
9560: ctxt->instate = XML_PARSER_CONTENT;
9561: #ifdef DEBUG_PUSH
9562: fprintf(stderr, "PP: entering CONTENT\n");
9563: #endif
9564: break;
1.128 daniel 9565: case XML_PARSER_ENTITY_DECL:
1.140 daniel 9566: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
9567: ctxt->instate = XML_PARSER_DTD;
9568: #ifdef DEBUG_PUSH
9569: fprintf(stderr, "PP: entering DTD\n");
9570: #endif
9571: break;
1.128 daniel 9572: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 9573: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
9574: ctxt->instate = XML_PARSER_CONTENT;
9575: #ifdef DEBUG_PUSH
9576: fprintf(stderr, "PP: entering DTD\n");
9577: #endif
9578: break;
1.128 daniel 9579: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 9580: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 9581: ctxt->instate = XML_PARSER_START_TAG;
9582: #ifdef DEBUG_PUSH
9583: fprintf(stderr, "PP: entering START_TAG\n");
9584: #endif
9585: break;
9586: case XML_PARSER_SYSTEM_LITERAL:
9587: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 9588: ctxt->instate = XML_PARSER_START_TAG;
9589: #ifdef DEBUG_PUSH
9590: fprintf(stderr, "PP: entering START_TAG\n");
9591: #endif
9592: break;
1.128 daniel 9593: }
9594: }
1.140 daniel 9595: done:
9596: #ifdef DEBUG_PUSH
9597: fprintf(stderr, "PP: done %d\n", ret);
9598: #endif
1.128 daniel 9599: return(ret);
9600: }
9601:
9602: /**
1.143 daniel 9603: * xmlParseTry:
9604: * @ctxt: an XML parser context
9605: *
9606: * Try to progress on parsing
9607: *
9608: * Returns zero if no parsing was possible
9609: */
9610: int
9611: xmlParseTry(xmlParserCtxtPtr ctxt) {
9612: return(xmlParseTryOrFinish(ctxt, 0));
9613: }
9614:
9615: /**
1.128 daniel 9616: * xmlParseChunk:
9617: * @ctxt: an XML parser context
9618: * @chunk: an char array
9619: * @size: the size in byte of the chunk
9620: * @terminate: last chunk indicator
9621: *
9622: * Parse a Chunk of memory
9623: *
9624: * Returns zero if no error, the xmlParserErrors otherwise.
9625: */
1.140 daniel 9626: int
1.128 daniel 9627: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9628: int terminate) {
1.132 daniel 9629: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 9630: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9631: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9632: int cur = ctxt->input->cur - ctxt->input->base;
9633:
1.132 daniel 9634: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 9635: ctxt->input->base = ctxt->input->buf->buffer->content + base;
9636: ctxt->input->cur = ctxt->input->base + cur;
9637: #ifdef DEBUG_PUSH
9638: fprintf(stderr, "PP: pushed %d\n", size);
9639: #endif
9640:
1.150 daniel 9641: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
9642: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9643: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 9644: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 9645: if (terminate) {
1.151 daniel 9646: /*
9647: * Check for termination
9648: */
1.140 daniel 9649: if ((ctxt->instate != XML_PARSER_EOF) &&
9650: (ctxt->instate != XML_PARSER_EPILOG)) {
9651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9652: ctxt->sax->error(ctxt->userData,
9653: "Extra content at the end of the document\n");
9654: ctxt->wellFormed = 0;
1.180 daniel 9655: ctxt->disableSAX = 1;
1.140 daniel 9656: ctxt->errNo = XML_ERR_DOCUMENT_END;
9657: }
9658: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 9659: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9660: (!ctxt->disableSAX))
1.140 daniel 9661: ctxt->sax->endDocument(ctxt->userData);
9662: }
9663: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 9664: }
9665: return((xmlParserErrors) ctxt->errNo);
9666: }
9667:
9668: /************************************************************************
9669: * *
1.98 daniel 9670: * I/O front end functions to the parser *
9671: * *
9672: ************************************************************************/
1.201 daniel 9673:
9674: /**
9675: * xmlCreatePushParserCtxt:
9676: * @ctxt: an XML parser context
9677: *
9678: * Blocks further parser processing
9679: */
9680: void
9681: xmlStopParser(xmlParserCtxtPtr ctxt) {
9682: ctxt->instate = XML_PARSER_EOF;
9683: if (ctxt->input != NULL)
9684: ctxt->input->cur = BAD_CAST"";
9685: }
1.98 daniel 9686:
1.50 daniel 9687: /**
1.181 daniel 9688: * xmlCreatePushParserCtxt:
1.140 daniel 9689: * @sax: a SAX handler
9690: * @user_data: The user data returned on SAX callbacks
9691: * @chunk: a pointer to an array of chars
9692: * @size: number of chars in the array
9693: * @filename: an optional file name or URI
9694: *
9695: * Create a parser context for using the XML parser in push mode
9696: * To allow content encoding detection, @size should be >= 4
9697: * The value of @filename is used for fetching external entities
9698: * and error/warning reports.
9699: *
9700: * Returns the new parser context or NULL
9701: */
9702: xmlParserCtxtPtr
9703: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9704: const char *chunk, int size, const char *filename) {
9705: xmlParserCtxtPtr ctxt;
9706: xmlParserInputPtr inputStream;
9707: xmlParserInputBufferPtr buf;
9708: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9709:
9710: /*
1.156 daniel 9711: * plug some encoding conversion routines
1.140 daniel 9712: */
9713: if ((chunk != NULL) && (size >= 4))
1.156 daniel 9714: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 9715:
9716: buf = xmlAllocParserInputBuffer(enc);
9717: if (buf == NULL) return(NULL);
9718:
9719: ctxt = xmlNewParserCtxt();
9720: if (ctxt == NULL) {
9721: xmlFree(buf);
9722: return(NULL);
9723: }
9724: if (sax != NULL) {
9725: if (ctxt->sax != &xmlDefaultSAXHandler)
9726: xmlFree(ctxt->sax);
9727: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9728: if (ctxt->sax == NULL) {
9729: xmlFree(buf);
9730: xmlFree(ctxt);
9731: return(NULL);
9732: }
9733: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9734: if (user_data != NULL)
9735: ctxt->userData = user_data;
9736: }
9737: if (filename == NULL) {
9738: ctxt->directory = NULL;
9739: } else {
9740: ctxt->directory = xmlParserGetDirectory(filename);
9741: }
9742:
9743: inputStream = xmlNewInputStream(ctxt);
9744: if (inputStream == NULL) {
9745: xmlFreeParserCtxt(ctxt);
9746: return(NULL);
9747: }
9748:
9749: if (filename == NULL)
9750: inputStream->filename = NULL;
9751: else
9752: inputStream->filename = xmlMemStrdup(filename);
9753: inputStream->buf = buf;
9754: inputStream->base = inputStream->buf->buffer->content;
9755: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 9756: if (enc != XML_CHAR_ENCODING_NONE) {
9757: xmlSwitchEncoding(ctxt, enc);
9758: }
1.140 daniel 9759:
9760: inputPush(ctxt, inputStream);
9761:
9762: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9763: (ctxt->input->buf != NULL)) {
9764: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9765: #ifdef DEBUG_PUSH
9766: fprintf(stderr, "PP: pushed %d\n", size);
9767: #endif
9768: }
1.190 daniel 9769:
9770: return(ctxt);
9771: }
9772:
9773: /**
9774: * xmlCreateIOParserCtxt:
9775: * @sax: a SAX handler
9776: * @user_data: The user data returned on SAX callbacks
9777: * @ioread: an I/O read function
9778: * @ioclose: an I/O close function
9779: * @ioctx: an I/O handler
9780: * @enc: the charset encoding if known
9781: *
9782: * Create a parser context for using the XML parser with an existing
9783: * I/O stream
9784: *
9785: * Returns the new parser context or NULL
9786: */
9787: xmlParserCtxtPtr
9788: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9789: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
9790: void *ioctx, xmlCharEncoding enc) {
9791: xmlParserCtxtPtr ctxt;
9792: xmlParserInputPtr inputStream;
9793: xmlParserInputBufferPtr buf;
9794:
9795: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
9796: if (buf == NULL) return(NULL);
9797:
9798: ctxt = xmlNewParserCtxt();
9799: if (ctxt == NULL) {
9800: xmlFree(buf);
9801: return(NULL);
9802: }
9803: if (sax != NULL) {
9804: if (ctxt->sax != &xmlDefaultSAXHandler)
9805: xmlFree(ctxt->sax);
9806: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9807: if (ctxt->sax == NULL) {
9808: xmlFree(buf);
9809: xmlFree(ctxt);
9810: return(NULL);
9811: }
9812: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9813: if (user_data != NULL)
9814: ctxt->userData = user_data;
9815: }
9816:
9817: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9818: if (inputStream == NULL) {
9819: xmlFreeParserCtxt(ctxt);
9820: return(NULL);
9821: }
9822: inputPush(ctxt, inputStream);
1.140 daniel 9823:
9824: return(ctxt);
9825: }
9826:
9827: /**
1.181 daniel 9828: * xmlCreateDocParserCtxt:
1.123 daniel 9829: * @cur: a pointer to an array of xmlChar
1.50 daniel 9830: *
1.192 daniel 9831: * Creates a parser context for an XML in-memory document.
1.69 daniel 9832: *
9833: * Returns the new parser context or NULL
1.16 daniel 9834: */
1.69 daniel 9835: xmlParserCtxtPtr
1.123 daniel 9836: xmlCreateDocParserCtxt(xmlChar *cur) {
1.16 daniel 9837: xmlParserCtxtPtr ctxt;
1.40 daniel 9838: xmlParserInputPtr input;
1.16 daniel 9839:
1.97 daniel 9840: ctxt = xmlNewParserCtxt();
1.16 daniel 9841: if (ctxt == NULL) {
9842: return(NULL);
9843: }
1.96 daniel 9844: input = xmlNewInputStream(ctxt);
1.40 daniel 9845: if (input == NULL) {
1.97 daniel 9846: xmlFreeParserCtxt(ctxt);
1.40 daniel 9847: return(NULL);
9848: }
9849:
9850: input->base = cur;
9851: input->cur = cur;
9852:
9853: inputPush(ctxt, input);
1.69 daniel 9854: return(ctxt);
9855: }
9856:
9857: /**
1.181 daniel 9858: * xmlSAXParseDoc:
1.69 daniel 9859: * @sax: the SAX handler block
1.123 daniel 9860: * @cur: a pointer to an array of xmlChar
1.69 daniel 9861: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9862: * documents
9863: *
9864: * parse an XML in-memory document and build a tree.
9865: * It use the given SAX function block to handle the parsing callback.
9866: * If sax is NULL, fallback to the default DOM tree building routines.
9867: *
9868: * Returns the resulting document tree
9869: */
9870:
9871: xmlDocPtr
1.123 daniel 9872: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
1.69 daniel 9873: xmlDocPtr ret;
9874: xmlParserCtxtPtr ctxt;
9875:
9876: if (cur == NULL) return(NULL);
1.16 daniel 9877:
9878:
1.69 daniel 9879: ctxt = xmlCreateDocParserCtxt(cur);
9880: if (ctxt == NULL) return(NULL);
1.74 daniel 9881: if (sax != NULL) {
9882: ctxt->sax = sax;
9883: ctxt->userData = NULL;
9884: }
1.69 daniel 9885:
1.16 daniel 9886: xmlParseDocument(ctxt);
1.72 daniel 9887: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9888: else {
9889: ret = NULL;
1.72 daniel 9890: xmlFreeDoc(ctxt->myDoc);
9891: ctxt->myDoc = NULL;
1.59 daniel 9892: }
1.86 daniel 9893: if (sax != NULL)
9894: ctxt->sax = NULL;
1.69 daniel 9895: xmlFreeParserCtxt(ctxt);
1.16 daniel 9896:
1.1 veillard 9897: return(ret);
9898: }
9899:
1.50 daniel 9900: /**
1.181 daniel 9901: * xmlParseDoc:
1.123 daniel 9902: * @cur: a pointer to an array of xmlChar
1.55 daniel 9903: *
9904: * parse an XML in-memory document and build a tree.
9905: *
1.68 daniel 9906: * Returns the resulting document tree
1.55 daniel 9907: */
9908:
1.69 daniel 9909: xmlDocPtr
1.123 daniel 9910: xmlParseDoc(xmlChar *cur) {
1.59 daniel 9911: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 9912: }
9913:
9914: /**
1.181 daniel 9915: * xmlSAXParseDTD:
1.76 daniel 9916: * @sax: the SAX handler block
9917: * @ExternalID: a NAME* containing the External ID of the DTD
9918: * @SystemID: a NAME* containing the URL to the DTD
9919: *
9920: * Load and parse an external subset.
9921: *
9922: * Returns the resulting xmlDtdPtr or NULL in case of error.
9923: */
9924:
9925: xmlDtdPtr
1.123 daniel 9926: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9927: const xmlChar *SystemID) {
1.76 daniel 9928: xmlDtdPtr ret = NULL;
9929: xmlParserCtxtPtr ctxt;
1.83 daniel 9930: xmlParserInputPtr input = NULL;
1.76 daniel 9931: xmlCharEncoding enc;
9932:
9933: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9934:
1.97 daniel 9935: ctxt = xmlNewParserCtxt();
1.76 daniel 9936: if (ctxt == NULL) {
9937: return(NULL);
9938: }
9939:
9940: /*
9941: * Set-up the SAX context
9942: */
9943: if (ctxt == NULL) return(NULL);
9944: if (sax != NULL) {
1.93 veillard 9945: if (ctxt->sax != NULL)
1.119 daniel 9946: xmlFree(ctxt->sax);
1.76 daniel 9947: ctxt->sax = sax;
9948: ctxt->userData = NULL;
9949: }
9950:
9951: /*
9952: * Ask the Entity resolver to load the damn thing
9953: */
9954:
9955: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9956: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9957: if (input == NULL) {
1.86 daniel 9958: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9959: xmlFreeParserCtxt(ctxt);
9960: return(NULL);
9961: }
9962:
9963: /*
1.156 daniel 9964: * plug some encoding conversion routines here.
1.76 daniel 9965: */
9966: xmlPushInput(ctxt, input);
1.156 daniel 9967: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 9968: xmlSwitchEncoding(ctxt, enc);
9969:
1.95 veillard 9970: if (input->filename == NULL)
1.156 daniel 9971: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 9972: input->line = 1;
9973: input->col = 1;
9974: input->base = ctxt->input->cur;
9975: input->cur = ctxt->input->cur;
9976: input->free = NULL;
9977:
9978: /*
9979: * let's parse that entity knowing it's an external subset.
9980: */
1.191 daniel 9981: ctxt->inSubset = 2;
9982: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9983: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9984: ExternalID, SystemID);
1.79 daniel 9985: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 9986:
9987: if (ctxt->myDoc != NULL) {
9988: if (ctxt->wellFormed) {
1.191 daniel 9989: ret = ctxt->myDoc->extSubset;
9990: ctxt->myDoc->extSubset = NULL;
1.76 daniel 9991: } else {
9992: ret = NULL;
9993: }
9994: xmlFreeDoc(ctxt->myDoc);
9995: ctxt->myDoc = NULL;
9996: }
1.86 daniel 9997: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 9998: xmlFreeParserCtxt(ctxt);
9999:
10000: return(ret);
10001: }
10002:
10003: /**
1.181 daniel 10004: * xmlParseDTD:
1.76 daniel 10005: * @ExternalID: a NAME* containing the External ID of the DTD
10006: * @SystemID: a NAME* containing the URL to the DTD
10007: *
10008: * Load and parse an external subset.
10009: *
10010: * Returns the resulting xmlDtdPtr or NULL in case of error.
10011: */
10012:
10013: xmlDtdPtr
1.123 daniel 10014: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 10015: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 10016: }
10017:
10018: /**
1.181 daniel 10019: * xmlSAXParseBalancedChunk:
1.144 daniel 10020: * @ctx: an XML parser context (possibly NULL)
10021: * @sax: the SAX handler bloc (possibly NULL)
10022: * @user_data: The user data returned on SAX callbacks (possibly NULL)
10023: * @input: a parser input stream
10024: * @enc: the encoding
10025: *
10026: * Parse a well-balanced chunk of an XML document
10027: * The user has to provide SAX callback block whose routines will be
10028: * called by the parser
10029: * The allowed sequence for the Well Balanced Chunk is the one defined by
10030: * the content production in the XML grammar:
10031: *
10032: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10033: *
1.176 daniel 10034: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 10035: * the error code otherwise
10036: */
10037:
10038: int
10039: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10040: void *user_data, xmlParserInputPtr input,
10041: xmlCharEncoding enc) {
10042: xmlParserCtxtPtr ctxt;
10043: int ret;
10044:
10045: if (input == NULL) return(-1);
10046:
10047: if (ctx != NULL)
10048: ctxt = ctx;
10049: else {
10050: ctxt = xmlNewParserCtxt();
10051: if (ctxt == NULL)
10052: return(-1);
10053: if (sax == NULL)
10054: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10055: }
10056:
10057: /*
10058: * Set-up the SAX context
10059: */
10060: if (sax != NULL) {
10061: if (ctxt->sax != NULL)
10062: xmlFree(ctxt->sax);
10063: ctxt->sax = sax;
10064: ctxt->userData = user_data;
10065: }
10066:
10067: /*
10068: * plug some encoding conversion routines here.
10069: */
10070: xmlPushInput(ctxt, input);
10071: if (enc != XML_CHAR_ENCODING_NONE)
10072: xmlSwitchEncoding(ctxt, enc);
10073:
10074: /*
10075: * let's parse that entity knowing it's an external subset.
10076: */
10077: xmlParseContent(ctxt);
10078: ret = ctxt->errNo;
10079:
10080: if (ctx == NULL) {
10081: if (sax != NULL)
10082: ctxt->sax = NULL;
10083: else
10084: xmlFreeDoc(ctxt->myDoc);
10085: xmlFreeParserCtxt(ctxt);
10086: }
10087: return(ret);
10088: }
10089:
10090: /**
1.213 ! veillard 10091: * xmlParseCtxtExternalEntity:
! 10092: * @ctx: the existing parsing context
! 10093: * @URL: the URL for the entity to load
! 10094: * @ID: the System ID for the entity to load
! 10095: * @list: the return value for the set of parsed nodes
! 10096: *
! 10097: * Parse an external general entity within an existing parsing context
! 10098: * An external general parsed entity is well-formed if it matches the
! 10099: * production labeled extParsedEnt.
! 10100: *
! 10101: * [78] extParsedEnt ::= TextDecl? content
! 10102: *
! 10103: * Returns 0 if the entity is well formed, -1 in case of args problem and
! 10104: * the parser error code otherwise
! 10105: */
! 10106:
! 10107: int
! 10108: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
! 10109: const xmlChar *ID, xmlNodePtr *list) {
! 10110: xmlParserCtxtPtr ctxt;
! 10111: xmlDocPtr newDoc;
! 10112: xmlSAXHandlerPtr oldsax = NULL;
! 10113: int ret = 0;
! 10114:
! 10115: if (ctx->depth > 40) {
! 10116: return(XML_ERR_ENTITY_LOOP);
! 10117: }
! 10118:
! 10119: if (list != NULL)
! 10120: *list = NULL;
! 10121: if ((URL == NULL) && (ID == NULL))
! 10122: return(-1);
! 10123: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
! 10124: return(-1);
! 10125:
! 10126:
! 10127: ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
! 10128: if (ctxt == NULL) return(-1);
! 10129: ctxt->userData = ctxt;
! 10130: oldsax = ctxt->sax;
! 10131: ctxt->sax = ctx->sax;
! 10132: newDoc = xmlNewDoc(BAD_CAST "1.0");
! 10133: if (newDoc == NULL) {
! 10134: xmlFreeParserCtxt(ctxt);
! 10135: return(-1);
! 10136: }
! 10137: if (ctx->myDoc != NULL) {
! 10138: newDoc->intSubset = ctx->myDoc->intSubset;
! 10139: newDoc->extSubset = ctx->myDoc->extSubset;
! 10140: }
! 10141: if (ctx->myDoc->URL != NULL) {
! 10142: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
! 10143: }
! 10144: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
! 10145: if (newDoc->children == NULL) {
! 10146: ctxt->sax = oldsax;
! 10147: xmlFreeParserCtxt(ctxt);
! 10148: newDoc->intSubset = NULL;
! 10149: newDoc->extSubset = NULL;
! 10150: xmlFreeDoc(newDoc);
! 10151: return(-1);
! 10152: }
! 10153: nodePush(ctxt, newDoc->children);
! 10154: if (ctx->myDoc == NULL) {
! 10155: ctxt->myDoc = newDoc;
! 10156: } else {
! 10157: ctxt->myDoc = ctx->myDoc;
! 10158: newDoc->children->doc = ctx->myDoc;
! 10159: }
! 10160:
! 10161: /*
! 10162: * Parse a possible text declaration first
! 10163: */
! 10164: GROW;
! 10165: if ((RAW == '<') && (NXT(1) == '?') &&
! 10166: (NXT(2) == 'x') && (NXT(3) == 'm') &&
! 10167: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
! 10168: xmlParseTextDecl(ctxt);
! 10169: }
! 10170:
! 10171: /*
! 10172: * Doing validity checking on chunk doesn't make sense
! 10173: */
! 10174: ctxt->instate = XML_PARSER_CONTENT;
! 10175: ctxt->validate = ctx->validate;
! 10176: ctxt->depth = ctx->depth + 1;
! 10177: ctxt->replaceEntities = ctx->replaceEntities;
! 10178: if (ctxt->validate) {
! 10179: ctxt->vctxt.error = ctx->vctxt.error;
! 10180: ctxt->vctxt.warning = ctx->vctxt.warning;
! 10181: /* Allocate the Node stack */
! 10182: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
! 10183: ctxt->vctxt.nodeNr = 0;
! 10184: ctxt->vctxt.nodeMax = 4;
! 10185: ctxt->vctxt.node = NULL;
! 10186: } else {
! 10187: ctxt->vctxt.error = NULL;
! 10188: ctxt->vctxt.warning = NULL;
! 10189: }
! 10190:
! 10191: xmlParseContent(ctxt);
! 10192:
! 10193: if ((RAW == '<') && (NXT(1) == '/')) {
! 10194: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 10195: ctxt->sax->error(ctxt->userData,
! 10196: "chunk is not well balanced\n");
! 10197: ctxt->wellFormed = 0;
! 10198: ctxt->disableSAX = 1;
! 10199: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
! 10200: } else if (RAW != 0) {
! 10201: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 10202: ctxt->sax->error(ctxt->userData,
! 10203: "extra content at the end of well balanced chunk\n");
! 10204: ctxt->wellFormed = 0;
! 10205: ctxt->disableSAX = 1;
! 10206: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
! 10207: }
! 10208: if (ctxt->node != newDoc->children) {
! 10209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 10210: ctxt->sax->error(ctxt->userData,
! 10211: "chunk is not well balanced\n");
! 10212: ctxt->wellFormed = 0;
! 10213: ctxt->disableSAX = 1;
! 10214: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
! 10215: }
! 10216:
! 10217: if (!ctxt->wellFormed) {
! 10218: if (ctxt->errNo == 0)
! 10219: ret = 1;
! 10220: else
! 10221: ret = ctxt->errNo;
! 10222: } else {
! 10223: if (list != NULL) {
! 10224: xmlNodePtr cur;
! 10225:
! 10226: /*
! 10227: * Return the newly created nodeset after unlinking it from
! 10228: * they pseudo parent.
! 10229: */
! 10230: cur = newDoc->children->children;
! 10231: *list = cur;
! 10232: while (cur != NULL) {
! 10233: cur->parent = NULL;
! 10234: cur = cur->next;
! 10235: }
! 10236: newDoc->children->children = NULL;
! 10237: }
! 10238: ret = 0;
! 10239: }
! 10240: ctxt->sax = oldsax;
! 10241: xmlFreeParserCtxt(ctxt);
! 10242: newDoc->intSubset = NULL;
! 10243: newDoc->extSubset = NULL;
! 10244: xmlFreeDoc(newDoc);
! 10245:
! 10246: return(ret);
! 10247: }
! 10248:
! 10249: /**
1.181 daniel 10250: * xmlParseExternalEntity:
10251: * @doc: the document the chunk pertains to
10252: * @sax: the SAX handler bloc (possibly NULL)
10253: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10254: * @depth: Used for loop detection, use 0
1.181 daniel 10255: * @URL: the URL for the entity to load
10256: * @ID: the System ID for the entity to load
10257: * @list: the return value for the set of parsed nodes
10258: *
10259: * Parse an external general entity
10260: * An external general parsed entity is well-formed if it matches the
10261: * production labeled extParsedEnt.
10262: *
10263: * [78] extParsedEnt ::= TextDecl? content
10264: *
10265: * Returns 0 if the entity is well formed, -1 in case of args problem and
10266: * the parser error code otherwise
10267: */
10268:
10269: int
10270: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 10271: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 10272: xmlParserCtxtPtr ctxt;
10273: xmlDocPtr newDoc;
10274: xmlSAXHandlerPtr oldsax = NULL;
10275: int ret = 0;
10276:
1.185 daniel 10277: if (depth > 40) {
10278: return(XML_ERR_ENTITY_LOOP);
10279: }
10280:
10281:
1.181 daniel 10282:
10283: if (list != NULL)
10284: *list = NULL;
10285: if ((URL == NULL) && (ID == NULL))
1.213 ! veillard 10286: return(-1);
! 10287: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 10288: return(-1);
10289:
10290:
10291: ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10292: if (ctxt == NULL) return(-1);
10293: ctxt->userData = ctxt;
10294: if (sax != NULL) {
10295: oldsax = ctxt->sax;
10296: ctxt->sax = sax;
10297: if (user_data != NULL)
10298: ctxt->userData = user_data;
10299: }
10300: newDoc = xmlNewDoc(BAD_CAST "1.0");
10301: if (newDoc == NULL) {
10302: xmlFreeParserCtxt(ctxt);
10303: return(-1);
10304: }
10305: if (doc != NULL) {
10306: newDoc->intSubset = doc->intSubset;
10307: newDoc->extSubset = doc->extSubset;
10308: }
10309: if (doc->URL != NULL) {
10310: newDoc->URL = xmlStrdup(doc->URL);
10311: }
10312: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10313: if (newDoc->children == NULL) {
10314: if (sax != NULL)
10315: ctxt->sax = oldsax;
10316: xmlFreeParserCtxt(ctxt);
10317: newDoc->intSubset = NULL;
10318: newDoc->extSubset = NULL;
10319: xmlFreeDoc(newDoc);
10320: return(-1);
10321: }
10322: nodePush(ctxt, newDoc->children);
10323: if (doc == NULL) {
10324: ctxt->myDoc = newDoc;
10325: } else {
10326: ctxt->myDoc = doc;
10327: newDoc->children->doc = doc;
10328: }
10329:
10330: /*
10331: * Parse a possible text declaration first
10332: */
10333: GROW;
10334: if ((RAW == '<') && (NXT(1) == '?') &&
10335: (NXT(2) == 'x') && (NXT(3) == 'm') &&
10336: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10337: xmlParseTextDecl(ctxt);
10338: }
10339:
10340: /*
10341: * Doing validity checking on chunk doesn't make sense
10342: */
10343: ctxt->instate = XML_PARSER_CONTENT;
10344: ctxt->validate = 0;
1.185 daniel 10345: ctxt->depth = depth;
1.181 daniel 10346:
10347: xmlParseContent(ctxt);
10348:
10349: if ((RAW == '<') && (NXT(1) == '/')) {
10350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10351: ctxt->sax->error(ctxt->userData,
10352: "chunk is not well balanced\n");
10353: ctxt->wellFormed = 0;
10354: ctxt->disableSAX = 1;
10355: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10356: } else if (RAW != 0) {
10357: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10358: ctxt->sax->error(ctxt->userData,
10359: "extra content at the end of well balanced chunk\n");
10360: ctxt->wellFormed = 0;
10361: ctxt->disableSAX = 1;
10362: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10363: }
10364: if (ctxt->node != newDoc->children) {
10365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10366: ctxt->sax->error(ctxt->userData,
10367: "chunk is not well balanced\n");
10368: ctxt->wellFormed = 0;
10369: ctxt->disableSAX = 1;
10370: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10371: }
10372:
10373: if (!ctxt->wellFormed) {
10374: if (ctxt->errNo == 0)
10375: ret = 1;
10376: else
10377: ret = ctxt->errNo;
10378: } else {
10379: if (list != NULL) {
10380: xmlNodePtr cur;
10381:
10382: /*
10383: * Return the newly created nodeset after unlinking it from
10384: * they pseudo parent.
10385: */
10386: cur = newDoc->children->children;
10387: *list = cur;
10388: while (cur != NULL) {
10389: cur->parent = NULL;
10390: cur = cur->next;
10391: }
10392: newDoc->children->children = NULL;
10393: }
10394: ret = 0;
10395: }
10396: if (sax != NULL)
10397: ctxt->sax = oldsax;
10398: xmlFreeParserCtxt(ctxt);
10399: newDoc->intSubset = NULL;
10400: newDoc->extSubset = NULL;
10401: xmlFreeDoc(newDoc);
10402:
10403: return(ret);
10404: }
10405:
10406: /**
10407: * xmlParseBalancedChunk:
1.176 daniel 10408: * @doc: the document the chunk pertains to
10409: * @sax: the SAX handler bloc (possibly NULL)
10410: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 10411: * @depth: Used for loop detection, use 0
1.176 daniel 10412: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10413: * @list: the return value for the set of parsed nodes
10414: *
10415: * Parse a well-balanced chunk of an XML document
10416: * called by the parser
10417: * The allowed sequence for the Well Balanced Chunk is the one defined by
10418: * the content production in the XML grammar:
1.144 daniel 10419: *
1.175 daniel 10420: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10421: *
1.176 daniel 10422: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10423: * the parser error code otherwise
1.144 daniel 10424: */
10425:
1.175 daniel 10426: int
10427: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 10428: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 10429: xmlParserCtxtPtr ctxt;
1.175 daniel 10430: xmlDocPtr newDoc;
1.181 daniel 10431: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 10432: int size;
1.176 daniel 10433: int ret = 0;
1.175 daniel 10434:
1.185 daniel 10435: if (depth > 40) {
10436: return(XML_ERR_ENTITY_LOOP);
10437: }
10438:
1.175 daniel 10439:
1.176 daniel 10440: if (list != NULL)
10441: *list = NULL;
10442: if (string == NULL)
10443: return(-1);
10444:
10445: size = xmlStrlen(string);
10446:
1.183 daniel 10447: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 10448: if (ctxt == NULL) return(-1);
10449: ctxt->userData = ctxt;
1.175 daniel 10450: if (sax != NULL) {
1.176 daniel 10451: oldsax = ctxt->sax;
10452: ctxt->sax = sax;
10453: if (user_data != NULL)
10454: ctxt->userData = user_data;
1.175 daniel 10455: }
10456: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 10457: if (newDoc == NULL) {
10458: xmlFreeParserCtxt(ctxt);
10459: return(-1);
10460: }
1.175 daniel 10461: if (doc != NULL) {
10462: newDoc->intSubset = doc->intSubset;
10463: newDoc->extSubset = doc->extSubset;
10464: }
1.176 daniel 10465: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10466: if (newDoc->children == NULL) {
10467: if (sax != NULL)
10468: ctxt->sax = oldsax;
10469: xmlFreeParserCtxt(ctxt);
10470: newDoc->intSubset = NULL;
10471: newDoc->extSubset = NULL;
10472: xmlFreeDoc(newDoc);
10473: return(-1);
10474: }
10475: nodePush(ctxt, newDoc->children);
10476: if (doc == NULL) {
10477: ctxt->myDoc = newDoc;
10478: } else {
10479: ctxt->myDoc = doc;
10480: newDoc->children->doc = doc;
10481: }
10482: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 10483: ctxt->depth = depth;
1.176 daniel 10484:
10485: /*
10486: * Doing validity checking on chunk doesn't make sense
10487: */
10488: ctxt->validate = 0;
10489:
1.175 daniel 10490: xmlParseContent(ctxt);
1.176 daniel 10491:
10492: if ((RAW == '<') && (NXT(1) == '/')) {
10493: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10494: ctxt->sax->error(ctxt->userData,
10495: "chunk is not well balanced\n");
10496: ctxt->wellFormed = 0;
1.180 daniel 10497: ctxt->disableSAX = 1;
1.176 daniel 10498: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10499: } else if (RAW != 0) {
10500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10501: ctxt->sax->error(ctxt->userData,
10502: "extra content at the end of well balanced chunk\n");
10503: ctxt->wellFormed = 0;
1.180 daniel 10504: ctxt->disableSAX = 1;
1.176 daniel 10505: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10506: }
10507: if (ctxt->node != newDoc->children) {
10508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10509: ctxt->sax->error(ctxt->userData,
10510: "chunk is not well balanced\n");
10511: ctxt->wellFormed = 0;
1.180 daniel 10512: ctxt->disableSAX = 1;
1.176 daniel 10513: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10514: }
1.175 daniel 10515:
1.176 daniel 10516: if (!ctxt->wellFormed) {
10517: if (ctxt->errNo == 0)
10518: ret = 1;
10519: else
10520: ret = ctxt->errNo;
10521: } else {
10522: if (list != NULL) {
10523: xmlNodePtr cur;
1.175 daniel 10524:
1.176 daniel 10525: /*
10526: * Return the newly created nodeset after unlinking it from
10527: * they pseudo parent.
10528: */
10529: cur = newDoc->children->children;
10530: *list = cur;
10531: while (cur != NULL) {
10532: cur->parent = NULL;
10533: cur = cur->next;
10534: }
10535: newDoc->children->children = NULL;
10536: }
10537: ret = 0;
1.175 daniel 10538: }
1.176 daniel 10539: if (sax != NULL)
10540: ctxt->sax = oldsax;
1.175 daniel 10541: xmlFreeParserCtxt(ctxt);
10542: newDoc->intSubset = NULL;
10543: newDoc->extSubset = NULL;
1.176 daniel 10544: xmlFreeDoc(newDoc);
1.175 daniel 10545:
1.176 daniel 10546: return(ret);
1.144 daniel 10547: }
10548:
10549: /**
1.181 daniel 10550: * xmlParseBalancedChunkFile:
1.144 daniel 10551: * @doc: the document the chunk pertains to
10552: *
10553: * Parse a well-balanced chunk of an XML document contained in a file
10554: *
10555: * Returns the resulting list of nodes resulting from the parsing,
10556: * they are not added to @node
10557: */
10558:
10559: xmlNodePtr
10560: xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
1.156 daniel 10561: /* TODO !!! */
10562: return(NULL);
1.144 daniel 10563: }
10564:
10565: /**
1.181 daniel 10566: * xmlRecoverDoc:
1.123 daniel 10567: * @cur: a pointer to an array of xmlChar
1.59 daniel 10568: *
10569: * parse an XML in-memory document and build a tree.
10570: * In the case the document is not Well Formed, a tree is built anyway
10571: *
1.68 daniel 10572: * Returns the resulting document tree
1.59 daniel 10573: */
10574:
1.69 daniel 10575: xmlDocPtr
1.123 daniel 10576: xmlRecoverDoc(xmlChar *cur) {
1.59 daniel 10577: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 10578: }
10579:
10580: /**
1.181 daniel 10581: * xmlCreateEntityParserCtxt:
10582: * @URL: the entity URL
10583: * @ID: the entity PUBLIC ID
10584: * @base: a posible base for the target URI
10585: *
10586: * Create a parser context for an external entity
10587: * Automatic support for ZLIB/Compress compressed document is provided
10588: * by default if found at compile-time.
10589: *
10590: * Returns the new parser context or NULL
10591: */
10592: xmlParserCtxtPtr
10593: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
10594: const xmlChar *base) {
10595: xmlParserCtxtPtr ctxt;
10596: xmlParserInputPtr inputStream;
10597: char *directory = NULL;
1.210 veillard 10598: xmlChar *uri;
10599:
1.181 daniel 10600: ctxt = xmlNewParserCtxt();
10601: if (ctxt == NULL) {
10602: return(NULL);
10603: }
10604:
1.210 veillard 10605: uri = xmlBuildURI(URL, base);
10606:
10607: if (uri == NULL) {
10608: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
10609: if (inputStream == NULL) {
10610: xmlFreeParserCtxt(ctxt);
10611: return(NULL);
10612: }
10613:
10614: inputPush(ctxt, inputStream);
10615:
10616: if ((ctxt->directory == NULL) && (directory == NULL))
10617: directory = xmlParserGetDirectory((char *)URL);
10618: if ((ctxt->directory == NULL) && (directory != NULL))
10619: ctxt->directory = directory;
10620: } else {
10621: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
10622: if (inputStream == NULL) {
10623: xmlFreeParserCtxt(ctxt);
10624: return(NULL);
10625: }
1.181 daniel 10626:
1.210 veillard 10627: inputPush(ctxt, inputStream);
1.181 daniel 10628:
1.210 veillard 10629: if ((ctxt->directory == NULL) && (directory == NULL))
10630: directory = xmlParserGetDirectory((char *)uri);
10631: if ((ctxt->directory == NULL) && (directory != NULL))
10632: ctxt->directory = directory;
10633: xmlFree(uri);
10634: }
1.181 daniel 10635:
10636: return(ctxt);
10637: }
10638:
10639: /**
10640: * xmlCreateFileParserCtxt:
1.50 daniel 10641: * @filename: the filename
10642: *
1.69 daniel 10643: * Create a parser context for a file content.
10644: * Automatic support for ZLIB/Compress compressed document is provided
10645: * by default if found at compile-time.
1.50 daniel 10646: *
1.69 daniel 10647: * Returns the new parser context or NULL
1.9 httpng 10648: */
1.69 daniel 10649: xmlParserCtxtPtr
10650: xmlCreateFileParserCtxt(const char *filename)
10651: {
10652: xmlParserCtxtPtr ctxt;
1.40 daniel 10653: xmlParserInputPtr inputStream;
1.91 daniel 10654: xmlParserInputBufferPtr buf;
1.111 daniel 10655: char *directory = NULL;
1.9 httpng 10656:
1.91 daniel 10657: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
10658: if (buf == NULL) return(NULL);
1.9 httpng 10659:
1.97 daniel 10660: ctxt = xmlNewParserCtxt();
1.16 daniel 10661: if (ctxt == NULL) {
10662: return(NULL);
10663: }
1.97 daniel 10664:
1.96 daniel 10665: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 10666: if (inputStream == NULL) {
1.97 daniel 10667: xmlFreeParserCtxt(ctxt);
1.40 daniel 10668: return(NULL);
10669: }
10670:
1.119 daniel 10671: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 10672: inputStream->buf = buf;
10673: inputStream->base = inputStream->buf->buffer->content;
10674: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 10675:
1.40 daniel 10676: inputPush(ctxt, inputStream);
1.110 daniel 10677: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10678: directory = xmlParserGetDirectory(filename);
10679: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 10680: ctxt->directory = directory;
1.106 daniel 10681:
1.69 daniel 10682: return(ctxt);
10683: }
10684:
10685: /**
1.181 daniel 10686: * xmlSAXParseFile:
1.69 daniel 10687: * @sax: the SAX handler block
10688: * @filename: the filename
10689: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10690: * documents
10691: *
10692: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10693: * compressed document is provided by default if found at compile-time.
10694: * It use the given SAX function block to handle the parsing callback.
10695: * If sax is NULL, fallback to the default DOM tree building routines.
10696: *
10697: * Returns the resulting document tree
10698: */
10699:
1.79 daniel 10700: xmlDocPtr
10701: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 10702: int recovery) {
10703: xmlDocPtr ret;
10704: xmlParserCtxtPtr ctxt;
1.111 daniel 10705: char *directory = NULL;
1.69 daniel 10706:
10707: ctxt = xmlCreateFileParserCtxt(filename);
10708: if (ctxt == NULL) return(NULL);
1.74 daniel 10709: if (sax != NULL) {
1.93 veillard 10710: if (ctxt->sax != NULL)
1.119 daniel 10711: xmlFree(ctxt->sax);
1.74 daniel 10712: ctxt->sax = sax;
10713: ctxt->userData = NULL;
10714: }
1.106 daniel 10715:
1.110 daniel 10716: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 10717: directory = xmlParserGetDirectory(filename);
10718: if ((ctxt->directory == NULL) && (directory != NULL))
1.156 daniel 10719: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
1.16 daniel 10720:
10721: xmlParseDocument(ctxt);
1.40 daniel 10722:
1.72 daniel 10723: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10724: else {
10725: ret = NULL;
1.72 daniel 10726: xmlFreeDoc(ctxt->myDoc);
10727: ctxt->myDoc = NULL;
1.59 daniel 10728: }
1.86 daniel 10729: if (sax != NULL)
10730: ctxt->sax = NULL;
1.69 daniel 10731: xmlFreeParserCtxt(ctxt);
1.20 daniel 10732:
10733: return(ret);
10734: }
10735:
1.55 daniel 10736: /**
1.181 daniel 10737: * xmlParseFile:
1.55 daniel 10738: * @filename: the filename
10739: *
10740: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10741: * compressed document is provided by default if found at compile-time.
10742: *
1.68 daniel 10743: * Returns the resulting document tree
1.55 daniel 10744: */
10745:
1.79 daniel 10746: xmlDocPtr
10747: xmlParseFile(const char *filename) {
1.59 daniel 10748: return(xmlSAXParseFile(NULL, filename, 0));
10749: }
10750:
10751: /**
1.181 daniel 10752: * xmlRecoverFile:
1.59 daniel 10753: * @filename: the filename
10754: *
10755: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10756: * compressed document is provided by default if found at compile-time.
10757: * In the case the document is not Well Formed, a tree is built anyway
10758: *
1.68 daniel 10759: * Returns the resulting document tree
1.59 daniel 10760: */
10761:
1.79 daniel 10762: xmlDocPtr
10763: xmlRecoverFile(const char *filename) {
1.59 daniel 10764: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 10765: }
1.32 daniel 10766:
1.50 daniel 10767: /**
1.181 daniel 10768: * xmlCreateMemoryParserCtxt:
10769: * @buffer: a pointer to a zero terminated char array
10770: * @size: the size of the array (without the trailing 0)
1.50 daniel 10771: *
1.69 daniel 10772: * Create a parser context for an XML in-memory document.
1.50 daniel 10773: *
1.69 daniel 10774: * Returns the new parser context or NULL
1.20 daniel 10775: */
1.69 daniel 10776: xmlParserCtxtPtr
10777: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 10778: xmlParserCtxtPtr ctxt;
1.40 daniel 10779: xmlParserInputPtr input;
1.209 veillard 10780: xmlParserInputBufferPtr buf;
1.40 daniel 10781:
1.179 daniel 10782: if (buffer[size] != 0)
1.181 daniel 10783: return(NULL);
1.40 daniel 10784:
1.97 daniel 10785: ctxt = xmlNewParserCtxt();
1.181 daniel 10786: if (ctxt == NULL)
1.20 daniel 10787: return(NULL);
1.97 daniel 10788:
1.209 veillard 10789: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10790: if (buf == NULL) return(NULL);
10791:
1.96 daniel 10792: input = xmlNewInputStream(ctxt);
1.40 daniel 10793: if (input == NULL) {
1.97 daniel 10794: xmlFreeParserCtxt(ctxt);
1.40 daniel 10795: return(NULL);
10796: }
1.20 daniel 10797:
1.40 daniel 10798: input->filename = NULL;
1.209 veillard 10799: input->buf = buf;
10800: input->base = input->buf->buffer->content;
10801: input->cur = input->buf->buffer->content;
1.20 daniel 10802:
1.40 daniel 10803: inputPush(ctxt, input);
1.69 daniel 10804: return(ctxt);
10805: }
10806:
10807: /**
1.181 daniel 10808: * xmlSAXParseMemory:
1.69 daniel 10809: * @sax: the SAX handler block
10810: * @buffer: an pointer to a char array
1.127 daniel 10811: * @size: the size of the array
10812: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 10813: * documents
10814: *
10815: * parse an XML in-memory block and use the given SAX function block
10816: * to handle the parsing callback. If sax is NULL, fallback to the default
10817: * DOM tree building routines.
10818: *
10819: * Returns the resulting document tree
10820: */
10821: xmlDocPtr
10822: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
10823: xmlDocPtr ret;
10824: xmlParserCtxtPtr ctxt;
10825:
10826: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10827: if (ctxt == NULL) return(NULL);
1.74 daniel 10828: if (sax != NULL) {
10829: ctxt->sax = sax;
10830: ctxt->userData = NULL;
10831: }
1.20 daniel 10832:
10833: xmlParseDocument(ctxt);
1.40 daniel 10834:
1.72 daniel 10835: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 10836: else {
10837: ret = NULL;
1.72 daniel 10838: xmlFreeDoc(ctxt->myDoc);
10839: ctxt->myDoc = NULL;
1.59 daniel 10840: }
1.86 daniel 10841: if (sax != NULL)
10842: ctxt->sax = NULL;
1.69 daniel 10843: xmlFreeParserCtxt(ctxt);
1.16 daniel 10844:
1.9 httpng 10845: return(ret);
1.17 daniel 10846: }
10847:
1.55 daniel 10848: /**
1.181 daniel 10849: * xmlParseMemory:
1.68 daniel 10850: * @buffer: an pointer to a char array
1.55 daniel 10851: * @size: the size of the array
10852: *
10853: * parse an XML in-memory block and build a tree.
10854: *
1.68 daniel 10855: * Returns the resulting document tree
1.55 daniel 10856: */
10857:
10858: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 10859: return(xmlSAXParseMemory(NULL, buffer, size, 0));
10860: }
10861:
10862: /**
1.181 daniel 10863: * xmlRecoverMemory:
1.68 daniel 10864: * @buffer: an pointer to a char array
1.59 daniel 10865: * @size: the size of the array
10866: *
10867: * parse an XML in-memory block and build a tree.
10868: * In the case the document is not Well Formed, a tree is built anyway
10869: *
1.68 daniel 10870: * Returns the resulting document tree
1.59 daniel 10871: */
10872:
10873: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
10874: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 10875: }
10876:
10877:
1.50 daniel 10878: /**
10879: * xmlSetupParserForBuffer:
10880: * @ctxt: an XML parser context
1.123 daniel 10881: * @buffer: a xmlChar * buffer
1.50 daniel 10882: * @filename: a file name
10883: *
1.19 daniel 10884: * Setup the parser context to parse a new buffer; Clears any prior
10885: * contents from the parser context. The buffer parameter must not be
10886: * NULL, but the filename parameter can be
10887: */
1.55 daniel 10888: void
1.123 daniel 10889: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
1.17 daniel 10890: const char* filename)
10891: {
1.96 daniel 10892: xmlParserInputPtr input;
1.40 daniel 10893:
1.96 daniel 10894: input = xmlNewInputStream(ctxt);
10895: if (input == NULL) {
10896: perror("malloc");
1.119 daniel 10897: xmlFree(ctxt);
1.145 daniel 10898: return;
1.96 daniel 10899: }
10900:
10901: xmlClearParserCtxt(ctxt);
10902: if (filename != NULL)
1.119 daniel 10903: input->filename = xmlMemStrdup(filename);
1.96 daniel 10904: input->base = buffer;
10905: input->cur = buffer;
10906: inputPush(ctxt, input);
1.17 daniel 10907: }
10908:
1.123 daniel 10909: /**
10910: * xmlSAXUserParseFile:
10911: * @sax: a SAX handler
10912: * @user_data: The user data returned on SAX callbacks
10913: * @filename: a file name
10914: *
10915: * parse an XML file and call the given SAX handler routines.
10916: * Automatic support for ZLIB/Compress compressed document is provided
10917: *
10918: * Returns 0 in case of success or a error number otherwise
10919: */
1.131 daniel 10920: int
10921: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10922: const char *filename) {
1.123 daniel 10923: int ret = 0;
10924: xmlParserCtxtPtr ctxt;
10925:
10926: ctxt = xmlCreateFileParserCtxt(filename);
10927: if (ctxt == NULL) return -1;
1.134 daniel 10928: if (ctxt->sax != &xmlDefaultSAXHandler)
10929: xmlFree(ctxt->sax);
1.123 daniel 10930: ctxt->sax = sax;
1.140 daniel 10931: if (user_data != NULL)
10932: ctxt->userData = user_data;
1.123 daniel 10933:
10934: xmlParseDocument(ctxt);
10935:
10936: if (ctxt->wellFormed)
10937: ret = 0;
10938: else {
10939: if (ctxt->errNo != 0)
10940: ret = ctxt->errNo;
10941: else
10942: ret = -1;
10943: }
10944: if (sax != NULL)
10945: ctxt->sax = NULL;
10946: xmlFreeParserCtxt(ctxt);
10947:
10948: return ret;
10949: }
10950:
10951: /**
10952: * xmlSAXUserParseMemory:
10953: * @sax: a SAX handler
10954: * @user_data: The user data returned on SAX callbacks
10955: * @buffer: an in-memory XML document input
1.127 daniel 10956: * @size: the length of the XML document in bytes
1.123 daniel 10957: *
10958: * A better SAX parsing routine.
10959: * parse an XML in-memory buffer and call the given SAX handler routines.
10960: *
10961: * Returns 0 in case of success or a error number otherwise
10962: */
10963: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
10964: char *buffer, int size) {
10965: int ret = 0;
10966: xmlParserCtxtPtr ctxt;
10967:
10968: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10969: if (ctxt == NULL) return -1;
10970: ctxt->sax = sax;
10971: ctxt->userData = user_data;
10972:
10973: xmlParseDocument(ctxt);
10974:
10975: if (ctxt->wellFormed)
10976: ret = 0;
10977: else {
10978: if (ctxt->errNo != 0)
10979: ret = ctxt->errNo;
10980: else
10981: ret = -1;
10982: }
10983: if (sax != NULL)
10984: ctxt->sax = NULL;
10985: xmlFreeParserCtxt(ctxt);
10986:
10987: return ret;
10988: }
10989:
1.32 daniel 10990:
1.98 daniel 10991: /************************************************************************
10992: * *
1.127 daniel 10993: * Miscellaneous *
1.98 daniel 10994: * *
10995: ************************************************************************/
10996:
1.132 daniel 10997: /**
10998: * xmlCleanupParser:
10999: *
11000: * Cleanup function for the XML parser. It tries to reclaim all
11001: * parsing related global memory allocated for the parser processing.
11002: * It doesn't deallocate any document related memory. Calling this
11003: * function should not prevent reusing the parser.
11004: */
11005:
11006: void
11007: xmlCleanupParser(void) {
11008: xmlCleanupCharEncodingHandlers();
1.133 daniel 11009: xmlCleanupPredefinedEntities();
1.132 daniel 11010: }
1.98 daniel 11011:
1.50 daniel 11012: /**
11013: * xmlParserFindNodeInfo:
11014: * @ctxt: an XML parser context
11015: * @node: an XML node within the tree
11016: *
11017: * Find the parser node info struct for a given node
11018: *
1.68 daniel 11019: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 11020: */
11021: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
11022: const xmlNode* node)
11023: {
11024: unsigned long pos;
11025:
11026: /* Find position where node should be at */
11027: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
11028: if ( ctx->node_seq.buffer[pos].node == node )
11029: return &ctx->node_seq.buffer[pos];
11030: else
11031: return NULL;
11032: }
11033:
11034:
1.50 daniel 11035: /**
1.181 daniel 11036: * xmlInitNodeInfoSeq:
1.50 daniel 11037: * @seq: a node info sequence pointer
11038: *
11039: * -- Initialize (set to initial state) node info sequence
1.32 daniel 11040: */
1.55 daniel 11041: void
11042: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 11043: {
11044: seq->length = 0;
11045: seq->maximum = 0;
11046: seq->buffer = NULL;
11047: }
11048:
1.50 daniel 11049: /**
1.181 daniel 11050: * xmlClearNodeInfoSeq:
1.50 daniel 11051: * @seq: a node info sequence pointer
11052: *
11053: * -- Clear (release memory and reinitialize) node
1.32 daniel 11054: * info sequence
11055: */
1.55 daniel 11056: void
11057: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 11058: {
11059: if ( seq->buffer != NULL )
1.119 daniel 11060: xmlFree(seq->buffer);
1.32 daniel 11061: xmlInitNodeInfoSeq(seq);
11062: }
11063:
11064:
1.50 daniel 11065: /**
11066: * xmlParserFindNodeInfoIndex:
11067: * @seq: a node info sequence pointer
11068: * @node: an XML node pointer
11069: *
11070: *
1.32 daniel 11071: * xmlParserFindNodeInfoIndex : Find the index that the info record for
11072: * the given node is or should be at in a sorted sequence
1.68 daniel 11073: *
11074: * Returns a long indicating the position of the record
1.32 daniel 11075: */
11076: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
11077: const xmlNode* node)
11078: {
11079: unsigned long upper, lower, middle;
11080: int found = 0;
11081:
11082: /* Do a binary search for the key */
11083: lower = 1;
11084: upper = seq->length;
11085: middle = 0;
11086: while ( lower <= upper && !found) {
11087: middle = lower + (upper - lower) / 2;
11088: if ( node == seq->buffer[middle - 1].node )
11089: found = 1;
11090: else if ( node < seq->buffer[middle - 1].node )
11091: upper = middle - 1;
11092: else
11093: lower = middle + 1;
11094: }
11095:
11096: /* Return position */
11097: if ( middle == 0 || seq->buffer[middle - 1].node < node )
11098: return middle;
11099: else
11100: return middle - 1;
11101: }
11102:
11103:
1.50 daniel 11104: /**
11105: * xmlParserAddNodeInfo:
11106: * @ctxt: an XML parser context
1.68 daniel 11107: * @info: a node info sequence pointer
1.50 daniel 11108: *
11109: * Insert node info record into the sorted sequence
1.32 daniel 11110: */
1.55 daniel 11111: void
11112: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 11113: const xmlParserNodeInfo* info)
1.32 daniel 11114: {
11115: unsigned long pos;
11116: static unsigned int block_size = 5;
11117:
11118: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 11119: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
11120: if ( pos < ctxt->node_seq.length
11121: && ctxt->node_seq.buffer[pos].node == info->node ) {
11122: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 11123: }
11124:
11125: /* Otherwise, we need to add new node to buffer */
11126: else {
11127: /* Expand buffer by 5 if needed */
1.55 daniel 11128: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 11129: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 11130: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
11131: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 11132:
1.55 daniel 11133: if ( ctxt->node_seq.buffer == NULL )
1.119 daniel 11134: tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
1.32 daniel 11135: else
1.119 daniel 11136: tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 11137:
11138: if ( tmp_buffer == NULL ) {
1.55 daniel 11139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 11140: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.123 daniel 11141: ctxt->errNo = XML_ERR_NO_MEMORY;
1.32 daniel 11142: return;
11143: }
1.55 daniel 11144: ctxt->node_seq.buffer = tmp_buffer;
11145: ctxt->node_seq.maximum += block_size;
1.32 daniel 11146: }
11147:
11148: /* If position is not at end, move elements out of the way */
1.55 daniel 11149: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 11150: unsigned long i;
11151:
1.55 daniel 11152: for ( i = ctxt->node_seq.length; i > pos; i-- )
11153: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 11154: }
11155:
11156: /* Copy element and increase length */
1.55 daniel 11157: ctxt->node_seq.buffer[pos] = *info;
11158: ctxt->node_seq.length++;
1.32 daniel 11159: }
11160: }
1.77 daniel 11161:
1.98 daniel 11162:
11163: /**
1.181 daniel 11164: * xmlSubstituteEntitiesDefault:
1.98 daniel 11165: * @val: int 0 or 1
11166: *
11167: * Set and return the previous value for default entity support.
11168: * Initially the parser always keep entity references instead of substituting
11169: * entity values in the output. This function has to be used to change the
11170: * default parser behaviour
11171: * SAX::subtituteEntities() has to be used for changing that on a file by
11172: * file basis.
11173: *
11174: * Returns the last value for 0 for no substitution, 1 for substitution.
11175: */
11176:
11177: int
11178: xmlSubstituteEntitiesDefault(int val) {
11179: int old = xmlSubstituteEntitiesDefaultValue;
11180:
11181: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 11182: return(old);
11183: }
11184:
11185: /**
11186: * xmlKeepBlanksDefault:
11187: * @val: int 0 or 1
11188: *
11189: * Set and return the previous value for default blanks text nodes support.
11190: * The 1.x version of the parser used an heuristic to try to detect
11191: * ignorable white spaces. As a result the SAX callback was generating
11192: * ignorableWhitespace() callbacks instead of characters() one, and when
11193: * using the DOM output text nodes containing those blanks were not generated.
11194: * The 2.x and later version will switch to the XML standard way and
11195: * ignorableWhitespace() are only generated when running the parser in
11196: * validating mode and when the current element doesn't allow CDATA or
11197: * mixed content.
11198: * This function is provided as a way to force the standard behaviour
11199: * on 1.X libs and to switch back to the old mode for compatibility when
11200: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
11201: * by using xmlIsBlankNode() commodity function to detect the "empty"
11202: * nodes generated.
11203: * This value also affect autogeneration of indentation when saving code
11204: * if blanks sections are kept, indentation is not generated.
11205: *
11206: * Returns the last value for 0 for no substitution, 1 for substitution.
11207: */
11208:
11209: int
11210: xmlKeepBlanksDefault(int val) {
11211: int old = xmlKeepBlanksDefaultValue;
11212:
11213: xmlKeepBlanksDefaultValue = val;
11214: xmlIndentTreeOutput = !val;
1.98 daniel 11215: return(old);
11216: }
1.77 daniel 11217:
Webmaster