Annotation of XML/parser.c, revision 1.232
1.1 veillard 1: /*
1.229 veillard 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
1.15 veillard 4: *
1.222 veillard 5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
1.229 veillard 13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscelaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAx callbacks or as standalones functions using a preparsed
26: * document.
27: *
1.15 veillard 28: * See Copyright for the status of this software.
29: *
1.60 daniel 30: * Daniel.Veillard@w3.org
1.1 veillard 31: */
32:
1.26 daniel 33: #ifdef WIN32
1.138 daniel 34: #include "win32config.h"
1.226 veillard 35: #define XML_DIR_SEP '\\'
1.26 daniel 36: #else
1.121 daniel 37: #include "config.h"
1.226 veillard 38: #define XML_DIR_SEP '/'
1.26 daniel 39: #endif
1.121 daniel 40:
1.1 veillard 41: #include <stdio.h>
1.204 veillard 42: #include <string.h>
1.121 daniel 43: #ifdef HAVE_CTYPE_H
1.1 veillard 44: #include <ctype.h>
1.121 daniel 45: #endif
46: #ifdef HAVE_STDLIB_H
1.50 daniel 47: #include <stdlib.h>
1.121 daniel 48: #endif
49: #ifdef HAVE_SYS_STAT_H
1.9 httpng 50: #include <sys/stat.h>
1.121 daniel 51: #endif
1.9 httpng 52: #ifdef HAVE_FCNTL_H
53: #include <fcntl.h>
54: #endif
1.10 httpng 55: #ifdef HAVE_UNISTD_H
56: #include <unistd.h>
57: #endif
1.20 daniel 58: #ifdef HAVE_ZLIB_H
59: #include <zlib.h>
60: #endif
1.1 veillard 61:
1.188 daniel 62: #include <libxml/xmlmemory.h>
63: #include <libxml/tree.h>
64: #include <libxml/parser.h>
65: #include <libxml/entities.h>
66: #include <libxml/encoding.h>
67: #include <libxml/valid.h>
68: #include <libxml/parserInternals.h>
69: #include <libxml/xmlIO.h>
1.193 daniel 70: #include <libxml/uri.h>
1.122 daniel 71: #include "xml-error.h"
1.1 veillard 72:
1.140 daniel 73: #define XML_PARSER_BIG_BUFFER_SIZE 1000
74: #define XML_PARSER_BUFFER_SIZE 100
75:
1.229 veillard 76: /*
77: * Various global defaults for parsing
78: */
1.160 daniel 79: int xmlGetWarningsDefaultValue = 1;
1.220 veillard 80: int xmlParserDebugEntities = 0;
1.229 veillard 81: int xmlSubstituteEntitiesDefaultValue = 0;
82: int xmlDoValidityCheckingDefaultValue = 0;
83: int xmlPedanticParserDefaultValue = 0;
84: int xmlKeepBlanksDefaultValue = 1;
1.86 daniel 85:
1.139 daniel 86: /*
87: * List of XML prefixed PI allowed by W3C specs
88: */
89:
90: const char *xmlW3CPIs[] = {
91: "xml-stylesheet",
92: NULL
93: };
1.91 daniel 94:
1.229 veillard 95: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
1.151 daniel 96: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
97: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
98: const xmlChar **str);
1.91 daniel 99:
100:
1.45 daniel 101: /************************************************************************
102: * *
103: * Parser stacks related functions and macros *
104: * *
105: ************************************************************************/
1.79 daniel 106:
1.135 daniel 107: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
108: const xmlChar ** str);
1.79 daniel 109:
1.1 veillard 110: /*
1.40 daniel 111: * Generic function for accessing stacks in the Parser Context
1.1 veillard 112: */
113:
1.140 daniel 114: #define PUSH_AND_POP(scope, type, name) \
115: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 116: if (ctxt->name##Nr >= ctxt->name##Max) { \
117: ctxt->name##Max *= 2; \
1.204 veillard 118: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 119: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
120: if (ctxt->name##Tab == NULL) { \
1.31 daniel 121: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 122: return(0); \
1.31 daniel 123: } \
124: } \
1.40 daniel 125: ctxt->name##Tab[ctxt->name##Nr] = value; \
126: ctxt->name = value; \
127: return(ctxt->name##Nr++); \
1.31 daniel 128: } \
1.140 daniel 129: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 130: type ret; \
1.40 daniel 131: if (ctxt->name##Nr <= 0) return(0); \
132: ctxt->name##Nr--; \
1.50 daniel 133: if (ctxt->name##Nr > 0) \
134: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
135: else \
136: ctxt->name = NULL; \
1.69 daniel 137: ret = ctxt->name##Tab[ctxt->name##Nr]; \
138: ctxt->name##Tab[ctxt->name##Nr] = 0; \
139: return(ret); \
1.31 daniel 140: } \
141:
1.229 veillard 142: /*
143: * Those macros actually generate the functions
144: */
1.140 daniel 145: PUSH_AND_POP(extern, xmlParserInputPtr, input)
146: PUSH_AND_POP(extern, xmlNodePtr, node)
147: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 148:
1.176 daniel 149: int spacePush(xmlParserCtxtPtr ctxt, int val) {
150: if (ctxt->spaceNr >= ctxt->spaceMax) {
151: ctxt->spaceMax *= 2;
1.204 veillard 152: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 153: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
154: if (ctxt->spaceTab == NULL) {
155: fprintf(stderr, "realloc failed !\n");
156: return(0);
157: }
158: }
159: ctxt->spaceTab[ctxt->spaceNr] = val;
160: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
161: return(ctxt->spaceNr++);
162: }
163:
164: int spacePop(xmlParserCtxtPtr ctxt) {
165: int ret;
166: if (ctxt->spaceNr <= 0) return(0);
167: ctxt->spaceNr--;
168: if (ctxt->spaceNr > 0)
169: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
170: else
171: ctxt->space = NULL;
172: ret = ctxt->spaceTab[ctxt->spaceNr];
173: ctxt->spaceTab[ctxt->spaceNr] = -1;
174: return(ret);
175: }
176:
1.55 daniel 177: /*
178: * Macros for accessing the content. Those should be used only by the parser,
179: * and not exported.
180: *
1.229 veillard 181: * Dirty macros, i.e. one often need to make assumption on the context to
182: * use them
1.55 daniel 183: *
1.123 daniel 184: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 185: * To be used with extreme caution since operations consuming
186: * characters may move the input buffer to a different location !
1.123 daniel 187: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.151 daniel 188: * This should be used internally by the parser
1.55 daniel 189: * only to compare to ASCII values otherwise it would break when
190: * running with UTF-8 encoding.
1.229 veillard 191: * RAW same as CUR but in the input buffer, bypass any token
192: * extraction that may have been done
1.123 daniel 193: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 194: * to compare on ASCII based substring.
1.123 daniel 195: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 196: * strings within the parser.
197: *
1.77 daniel 198: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 199: *
200: * NEXT Skip to the next character, this does the proper decoding
201: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.229 veillard 202: * NEXTL(l) Skip l xmlChars in the input buffer
203: * CUR_CHAR(l) returns the current unicode character (int), set l
204: * to the number of xmlChars used for the encoding [0-5].
205: * CUR_SCHAR same but operate on a string instead of the context
206: * COPY_BUF copy the current unicode char to the target buffer, increment
207: * the index
208: * GROW, SHRINK handling of input buffers
1.55 daniel 209: */
1.45 daniel 210:
1.152 daniel 211: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 212: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 213: #define NXT(val) ctxt->input->cur[(val)]
214: #define CUR_PTR ctxt->input->cur
1.154 daniel 215:
1.164 daniel 216: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
217: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 218: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
1.168 daniel 219: if ((*ctxt->input->cur == 0) && \
220: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
221: xmlPopInput(ctxt)
1.164 daniel 222:
1.97 daniel 223: #define SHRINK xmlParserInputShrink(ctxt->input); \
224: if ((*ctxt->input->cur == 0) && \
225: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
226: xmlPopInput(ctxt)
227:
228: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
229: if ((*ctxt->input->cur == 0) && \
230: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
231: xmlPopInput(ctxt)
1.55 daniel 232:
1.155 daniel 233: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 234:
1.151 daniel 235: #define NEXT xmlNextChar(ctxt);
1.154 daniel 236:
1.153 daniel 237: #define NEXTL(l) \
238: if (*(ctxt->input->cur) == '\n') { \
239: ctxt->input->line++; ctxt->input->col = 1; \
240: } else ctxt->input->col++; \
1.154 daniel 241: ctxt->token = 0; ctxt->input->cur += l; \
242: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 243: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.154 daniel 244:
1.152 daniel 245: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 246: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 247:
1.152 daniel 248: #define COPY_BUF(l,b,i,v) \
249: if (l == 1) b[i++] = (xmlChar) v; \
250: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 251:
252: /**
1.229 veillard 253: * xmlSkipBlankChars:
1.151 daniel 254: * @ctxt: the XML parser context
255: *
1.229 veillard 256: * skip all blanks character found at that point in the input streams.
257: * It pops up finished entities in the process if allowable at that point.
258: *
259: * Returns the number of space chars skipped
1.151 daniel 260: */
1.55 daniel 261:
1.229 veillard 262: int
263: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
264: int cur, res = 0;
1.201 daniel 265:
1.176 daniel 266: /*
1.229 veillard 267: * It's Okay to use CUR/NEXT here since all the blanks are on
268: * the ASCII range.
269: */
270: do {
271: cur = CUR;
272: while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
273: NEXT;
274: cur = CUR;
275: res++;
1.151 daniel 276: }
1.229 veillard 277: while ((cur == 0) && (ctxt->inputNr > 1) &&
278: (ctxt->instate != XML_PARSER_COMMENT)) {
1.168 daniel 279: xmlPopInput(ctxt);
1.229 veillard 280: cur = CUR;
281: }
1.222 veillard 282: /*
283: * Need to handle support of entities branching here
284: */
1.155 daniel 285: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1.229 veillard 286: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.222 veillard 287: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1.155 daniel 288: return(res);
1.152 daniel 289: }
290:
1.97 daniel 291: /************************************************************************
292: * *
1.229 veillard 293: * Commodity functions to handle entities *
1.97 daniel 294: * *
295: ************************************************************************/
1.40 daniel 296:
1.50 daniel 297: /**
298: * xmlPopInput:
299: * @ctxt: an XML parser context
300: *
1.40 daniel 301: * xmlPopInput: the current input pointed by ctxt->input came to an end
302: * pop it and return the next char.
1.45 daniel 303: *
1.123 daniel 304: * Returns the current xmlChar in the parser context
1.40 daniel 305: */
1.123 daniel 306: xmlChar
1.55 daniel 307: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 308: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.220 veillard 309: if (xmlParserDebugEntities)
310: fprintf(stderr, "Popping input %d\n", ctxt->inputNr);
1.69 daniel 311: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 312: if ((*ctxt->input->cur == 0) &&
313: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
314: return(xmlPopInput(ctxt));
1.40 daniel 315: return(CUR);
316: }
317:
1.50 daniel 318: /**
1.229 veillard 319: * xmlPushInput:
1.174 daniel 320: * @ctxt: an XML parser context
1.229 veillard 321: * @input: an XML parser input fragment (entity, XML fragment ...).
1.174 daniel 322: *
1.229 veillard 323: * xmlPushInput: switch to a new input stream which is stacked on top
324: * of the previous one(s).
1.174 daniel 325: */
1.229 veillard 326: void
327: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
328: if (input == NULL) return;
1.174 daniel 329:
1.229 veillard 330: if (xmlParserDebugEntities) {
331: if ((ctxt->input != NULL) && (ctxt->input->filename))
332: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
333: ctxt->input->line);
334: fprintf(stderr, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
335: }
336: inputPush(ctxt, input);
337: GROW;
1.174 daniel 338: }
1.97 daniel 339:
340: /**
341: * xmlParseCharRef:
342: * @ctxt: an XML parser context
343: *
344: * parse Reference declarations
345: *
346: * [66] CharRef ::= '&#' [0-9]+ ';' |
347: * '&#x' [0-9a-fA-F]+ ';'
348: *
1.98 daniel 349: * [ WFC: Legal Character ]
350: * Characters referred to using character references must match the
351: * production for Char.
352: *
1.135 daniel 353: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 354: */
1.97 daniel 355: int
356: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
357: int val = 0;
1.222 veillard 358: int count = 0;
1.97 daniel 359:
1.111 daniel 360: if (ctxt->token != 0) {
361: val = ctxt->token;
362: ctxt->token = 0;
363: return(val);
364: }
1.222 veillard 365: /*
366: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
367: */
1.152 daniel 368: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 369: (NXT(2) == 'x')) {
370: SKIP(3);
1.222 veillard 371: GROW;
372: while (RAW != ';') { /* loop blocked by count */
373: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 374: val = val * 16 + (CUR - '0');
1.222 veillard 375: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1.97 daniel 376: val = val * 16 + (CUR - 'a') + 10;
1.222 veillard 377: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1.97 daniel 378: val = val * 16 + (CUR - 'A') + 10;
379: else {
1.123 daniel 380: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
382: ctxt->sax->error(ctxt->userData,
383: "xmlParseCharRef: invalid hexadecimal value\n");
384: ctxt->wellFormed = 0;
1.180 daniel 385: ctxt->disableSAX = 1;
1.97 daniel 386: val = 0;
387: break;
388: }
389: NEXT;
1.222 veillard 390: count++;
1.97 daniel 391: }
1.164 daniel 392: if (RAW == ';') {
393: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
394: ctxt->nbChars ++;
395: ctxt->input->cur++;
396: }
1.152 daniel 397: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 398: SKIP(2);
1.222 veillard 399: GROW;
400: while (RAW != ';') { /* loop blocked by count */
401: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 402: val = val * 10 + (CUR - '0');
403: else {
1.123 daniel 404: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData,
407: "xmlParseCharRef: invalid decimal value\n");
408: ctxt->wellFormed = 0;
1.180 daniel 409: ctxt->disableSAX = 1;
1.97 daniel 410: val = 0;
411: break;
412: }
413: NEXT;
1.222 veillard 414: count++;
1.97 daniel 415: }
1.164 daniel 416: if (RAW == ';') {
417: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
418: ctxt->nbChars ++;
419: ctxt->input->cur++;
420: }
1.97 daniel 421: } else {
1.123 daniel 422: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 424: ctxt->sax->error(ctxt->userData,
425: "xmlParseCharRef: invalid value\n");
1.97 daniel 426: ctxt->wellFormed = 0;
1.180 daniel 427: ctxt->disableSAX = 1;
1.97 daniel 428: }
1.229 veillard 429:
430: /*
431: * [ WFC: Legal Character ]
432: * Characters referred to using character references must match the
433: * production for Char.
434: */
435: if (IS_CHAR(val)) {
436: return(val);
437: } else {
438: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 440: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
441: val);
1.97 daniel 442: ctxt->wellFormed = 0;
1.180 daniel 443: ctxt->disableSAX = 1;
1.97 daniel 444: }
1.229 veillard 445: return(0);
446: }
447:
448: /**
449: * xmlParseStringCharRef:
450: * @ctxt: an XML parser context
451: * @str: a pointer to an index in the string
452: *
453: * parse Reference declarations, variant parsing from a string rather
454: * than an an input flow.
455: *
456: * [66] CharRef ::= '&#' [0-9]+ ';' |
457: * '&#x' [0-9a-fA-F]+ ';'
458: *
459: * [ WFC: Legal Character ]
460: * Characters referred to using character references must match the
461: * production for Char.
462: *
463: * Returns the value parsed (as an int), 0 in case of error, str will be
464: * updated to the current value of the index
465: */
466: int
467: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
468: const xmlChar *ptr;
469: xmlChar cur;
470: int val = 0;
1.98 daniel 471:
1.229 veillard 472: if ((str == NULL) || (*str == NULL)) return(0);
473: ptr = *str;
474: cur = *ptr;
475: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
476: ptr += 3;
477: cur = *ptr;
478: while (cur != ';') { /* Non input consuming loop */
479: if ((cur >= '0') && (cur <= '9'))
480: val = val * 16 + (cur - '0');
481: else if ((cur >= 'a') && (cur <= 'f'))
482: val = val * 16 + (cur - 'a') + 10;
483: else if ((cur >= 'A') && (cur <= 'F'))
484: val = val * 16 + (cur - 'A') + 10;
485: else {
486: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
488: ctxt->sax->error(ctxt->userData,
489: "xmlParseStringCharRef: invalid hexadecimal value\n");
490: ctxt->wellFormed = 0;
491: ctxt->disableSAX = 1;
492: val = 0;
493: break;
494: }
495: ptr++;
496: cur = *ptr;
497: }
498: if (cur == ';')
499: ptr++;
500: } else if ((cur == '&') && (ptr[1] == '#')){
501: ptr += 2;
502: cur = *ptr;
503: while (cur != ';') { /* Non input consuming loops */
504: if ((cur >= '0') && (cur <= '9'))
505: val = val * 10 + (cur - '0');
506: else {
507: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
509: ctxt->sax->error(ctxt->userData,
510: "xmlParseStringCharRef: invalid decimal value\n");
511: ctxt->wellFormed = 0;
512: ctxt->disableSAX = 1;
513: val = 0;
514: break;
515: }
516: ptr++;
517: cur = *ptr;
518: }
519: if (cur == ';')
520: ptr++;
521: } else {
522: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 524: ctxt->sax->error(ctxt->userData,
525: "xmlParseCharRef: invalid value\n");
1.97 daniel 526: ctxt->wellFormed = 0;
1.180 daniel 527: ctxt->disableSAX = 1;
1.229 veillard 528: return(0);
1.97 daniel 529: }
1.229 veillard 530: *str = ptr;
1.98 daniel 531:
532: /*
1.229 veillard 533: * [ WFC: Legal Character ]
534: * Characters referred to using character references must match the
535: * production for Char.
1.98 daniel 536: */
1.229 veillard 537: if (IS_CHAR(val)) {
538: return(val);
539: } else {
540: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.98 daniel 541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 542: ctxt->sax->error(ctxt->userData,
543: "CharRef: invalid xmlChar value %d\n", val);
1.98 daniel 544: ctxt->wellFormed = 0;
1.180 daniel 545: ctxt->disableSAX = 1;
1.98 daniel 546: }
1.229 veillard 547: return(0);
1.96 daniel 548: }
549:
550: /**
551: * xmlParserHandlePEReference:
552: * @ctxt: the parser context
553: *
554: * [69] PEReference ::= '%' Name ';'
555: *
1.98 daniel 556: * [ WFC: No Recursion ]
1.229 veillard 557: * A parsed entity must not contain a recursive
1.98 daniel 558: * reference to itself, either directly or indirectly.
559: *
560: * [ WFC: Entity Declared ]
561: * In a document without any DTD, a document with only an internal DTD
562: * subset which contains no parameter entity references, or a document
563: * with "standalone='yes'", ... ... The declaration of a parameter
564: * entity must precede any reference to it...
565: *
566: * [ VC: Entity Declared ]
567: * In a document with an external subset or external parameter entities
568: * with "standalone='no'", ... ... The declaration of a parameter entity
569: * must precede any reference to it...
570: *
571: * [ WFC: In DTD ]
572: * Parameter-entity references may only appear in the DTD.
573: * NOTE: misleading but this is handled.
574: *
575: * A PEReference may have been detected in the current input stream
1.96 daniel 576: * the handling is done accordingly to
577: * http://www.w3.org/TR/REC-xml#entproc
578: * i.e.
579: * - Included in literal in entity values
580: * - Included as Paraemeter Entity reference within DTDs
581: */
582: void
583: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 584: xmlChar *name;
1.96 daniel 585: xmlEntityPtr entity = NULL;
586: xmlParserInputPtr input;
587:
1.126 daniel 588: if (ctxt->token != 0) {
589: return;
590: }
1.152 daniel 591: if (RAW != '%') return;
1.96 daniel 592: switch(ctxt->instate) {
1.109 daniel 593: case XML_PARSER_CDATA_SECTION:
594: return;
1.97 daniel 595: case XML_PARSER_COMMENT:
596: return;
1.140 daniel 597: case XML_PARSER_START_TAG:
598: return;
599: case XML_PARSER_END_TAG:
600: return;
1.96 daniel 601: case XML_PARSER_EOF:
1.123 daniel 602: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
605: ctxt->wellFormed = 0;
1.180 daniel 606: ctxt->disableSAX = 1;
1.96 daniel 607: return;
608: case XML_PARSER_PROLOG:
1.140 daniel 609: case XML_PARSER_START:
610: case XML_PARSER_MISC:
1.123 daniel 611: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
613: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
614: ctxt->wellFormed = 0;
1.180 daniel 615: ctxt->disableSAX = 1;
1.96 daniel 616: return;
1.97 daniel 617: case XML_PARSER_ENTITY_DECL:
1.96 daniel 618: case XML_PARSER_CONTENT:
619: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 620: case XML_PARSER_PI:
1.168 daniel 621: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 622: /* we just ignore it there */
623: return;
624: case XML_PARSER_EPILOG:
1.123 daniel 625: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 626: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 627: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 628: ctxt->wellFormed = 0;
1.180 daniel 629: ctxt->disableSAX = 1;
1.96 daniel 630: return;
1.97 daniel 631: case XML_PARSER_ENTITY_VALUE:
632: /*
633: * NOTE: in the case of entity values, we don't do the
1.127 daniel 634: * substitution here since we need the literal
1.97 daniel 635: * entity value to be able to save the internal
636: * subset of the document.
1.222 veillard 637: * This will be handled by xmlStringDecodeEntities
1.97 daniel 638: */
639: return;
1.96 daniel 640: case XML_PARSER_DTD:
1.98 daniel 641: /*
642: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
643: * In the internal DTD subset, parameter-entity references
644: * can occur only where markup declarations can occur, not
645: * within markup declarations.
646: * In that case this is handled in xmlParseMarkupDecl
647: */
648: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
649: return;
1.96 daniel 650: }
651:
652: NEXT;
653: name = xmlParseName(ctxt);
1.220 veillard 654: if (xmlParserDebugEntities)
655: fprintf(stderr, "PE Reference: %s\n", name);
1.96 daniel 656: if (name == NULL) {
1.123 daniel 657: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
659: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
660: ctxt->wellFormed = 0;
1.180 daniel 661: ctxt->disableSAX = 1;
1.96 daniel 662: } else {
1.152 daniel 663: if (RAW == ';') {
1.96 daniel 664: NEXT;
1.98 daniel 665: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
666: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 667: if (entity == NULL) {
1.98 daniel 668:
669: /*
670: * [ WFC: Entity Declared ]
671: * In a document without any DTD, a document with only an
672: * internal DTD subset which contains no parameter entity
673: * references, or a document with "standalone='yes'", ...
674: * ... The declaration of a parameter entity must precede
675: * any reference to it...
676: */
677: if ((ctxt->standalone == 1) ||
678: ((ctxt->hasExternalSubset == 0) &&
679: (ctxt->hasPErefs == 0))) {
680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681: ctxt->sax->error(ctxt->userData,
682: "PEReference: %%%s; not found\n", name);
683: ctxt->wellFormed = 0;
1.180 daniel 684: ctxt->disableSAX = 1;
1.98 daniel 685: } else {
686: /*
687: * [ VC: Entity Declared ]
688: * In a document with an external subset or external
689: * parameter entities with "standalone='no'", ...
690: * ... The declaration of a parameter entity must precede
691: * any reference to it...
692: */
1.220 veillard 693: if ((!ctxt->disableSAX) &&
694: (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1.212 veillard 695: ctxt->vctxt.error(ctxt->vctxt.userData,
696: "PEReference: %%%s; not found\n", name);
1.220 veillard 697: } else if ((!ctxt->disableSAX) &&
698: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 699: ctxt->sax->warning(ctxt->userData,
700: "PEReference: %%%s; not found\n", name);
701: ctxt->valid = 0;
702: }
1.96 daniel 703: } else {
1.159 daniel 704: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
705: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 706: /*
1.229 veillard 707: * handle the extra spaces added before and after
1.96 daniel 708: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.229 veillard 709: * this is done independantly.
1.96 daniel 710: */
711: input = xmlNewEntityInputStream(ctxt, entity);
712: xmlPushInput(ctxt, input);
1.164 daniel 713: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
714: (RAW == '<') && (NXT(1) == '?') &&
715: (NXT(2) == 'x') && (NXT(3) == 'm') &&
716: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 717: xmlParseTextDecl(ctxt);
1.164 daniel 718: }
719: if (ctxt->token == 0)
720: ctxt->token = ' ';
1.96 daniel 721: } else {
722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723: ctxt->sax->error(ctxt->userData,
724: "xmlHandlePEReference: %s is not a parameter entity\n",
725: name);
726: ctxt->wellFormed = 0;
1.180 daniel 727: ctxt->disableSAX = 1;
1.96 daniel 728: }
729: }
730: } else {
1.123 daniel 731: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
733: ctxt->sax->error(ctxt->userData,
734: "xmlHandlePEReference: expecting ';'\n");
735: ctxt->wellFormed = 0;
1.180 daniel 736: ctxt->disableSAX = 1;
1.96 daniel 737: }
1.119 daniel 738: xmlFree(name);
1.97 daniel 739: }
740: }
741:
742: /*
743: * Macro used to grow the current buffer.
744: */
745: #define growBuffer(buffer) { \
746: buffer##_size *= 2; \
1.145 daniel 747: buffer = (xmlChar *) \
748: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 749: if (buffer == NULL) { \
750: perror("realloc failed"); \
1.145 daniel 751: return(NULL); \
1.97 daniel 752: } \
1.96 daniel 753: }
1.77 daniel 754:
755: /**
1.135 daniel 756: * xmlStringDecodeEntities:
757: * @ctxt: the parser context
758: * @str: the input string
759: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
760: * @end: an end marker xmlChar, 0 if none
761: * @end2: an end marker xmlChar, 0 if none
762: * @end3: an end marker xmlChar, 0 if none
763: *
1.222 veillard 764: * Takes a entity string content and process to do the adequate subtitutions.
765: *
1.135 daniel 766: * [67] Reference ::= EntityRef | CharRef
767: *
768: * [69] PEReference ::= '%' Name ';'
769: *
770: * Returns A newly allocated string with the substitution done. The caller
771: * must deallocate it !
772: */
773: xmlChar *
774: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
775: xmlChar end, xmlChar end2, xmlChar end3) {
776: xmlChar *buffer = NULL;
777: int buffer_size = 0;
778:
779: xmlChar *current = NULL;
780: xmlEntityPtr ent;
1.176 daniel 781: int c,l;
782: int nbchars = 0;
1.135 daniel 783:
1.211 veillard 784: if (str == NULL)
785: return(NULL);
786:
1.185 daniel 787: if (ctxt->depth > 40) {
1.230 veillard 788: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
790: ctxt->sax->error(ctxt->userData,
791: "Detected entity reference loop\n");
792: ctxt->wellFormed = 0;
793: ctxt->disableSAX = 1;
794: return(NULL);
795: }
796:
1.135 daniel 797: /*
798: * allocate a translation buffer.
799: */
1.140 daniel 800: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 801: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
802: if (buffer == NULL) {
803: perror("xmlDecodeEntities: malloc failed");
804: return(NULL);
805: }
806:
807: /*
808: * Ok loop until we reach one of the ending char or a size limit.
1.222 veillard 809: * we are operating on already parsed values.
1.135 daniel 810: */
1.176 daniel 811: c = CUR_SCHAR(str, l);
1.222 veillard 812: while ((c != 0) && (c != end) && /* non input consuming loop */
813: (c != end2) && (c != end3)) {
1.135 daniel 814:
1.176 daniel 815: if (c == 0) break;
816: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 817: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 818: if (val != 0) {
819: COPY_BUF(0,buffer,nbchars,val);
820: }
821: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.220 veillard 822: if (xmlParserDebugEntities)
823: fprintf(stderr, "String decoding Entity Reference: %.30s\n",
824: str);
1.135 daniel 825: ent = xmlParseStringEntityRef(ctxt, &str);
1.222 veillard 826: if ((ent != NULL) &&
827: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.219 veillard 828: if (ent->content != NULL) {
829: COPY_BUF(0,buffer,nbchars,ent->content[0]);
830: } else {
831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
832: ctxt->sax->error(ctxt->userData,
833: "internal error entity has no content\n");
834: }
835: } else if ((ent != NULL) && (ent->content != NULL)) {
1.185 daniel 836: xmlChar *rep;
837:
838: ctxt->depth++;
839: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
840: 0, 0, 0);
841: ctxt->depth--;
842: if (rep != NULL) {
843: current = rep;
1.222 veillard 844: while (*current != 0) { /* non input consuming loop */
1.185 daniel 845: buffer[nbchars++] = *current++;
846: if (nbchars >
847: buffer_size - XML_PARSER_BUFFER_SIZE) {
848: growBuffer(buffer);
849: }
1.135 daniel 850: }
1.185 daniel 851: xmlFree(rep);
1.135 daniel 852: }
853: } else if (ent != NULL) {
854: int i = xmlStrlen(ent->name);
855: const xmlChar *cur = ent->name;
856:
1.176 daniel 857: buffer[nbchars++] = '&';
858: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 859: growBuffer(buffer);
860: }
861: for (;i > 0;i--)
1.176 daniel 862: buffer[nbchars++] = *cur++;
863: buffer[nbchars++] = ';';
1.135 daniel 864: }
1.176 daniel 865: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.220 veillard 866: if (xmlParserDebugEntities)
867: fprintf(stderr, "String decoding PE Reference: %.30s\n", str);
1.135 daniel 868: ent = xmlParseStringPEReference(ctxt, &str);
869: if (ent != NULL) {
1.185 daniel 870: xmlChar *rep;
871:
872: ctxt->depth++;
873: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
874: 0, 0, 0);
875: ctxt->depth--;
876: if (rep != NULL) {
877: current = rep;
1.222 veillard 878: while (*current != 0) { /* non input consuming loop */
1.185 daniel 879: buffer[nbchars++] = *current++;
880: if (nbchars >
881: buffer_size - XML_PARSER_BUFFER_SIZE) {
882: growBuffer(buffer);
883: }
1.135 daniel 884: }
1.185 daniel 885: xmlFree(rep);
1.135 daniel 886: }
887: }
888: } else {
1.176 daniel 889: COPY_BUF(l,buffer,nbchars,c);
890: str += l;
891: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 892: growBuffer(buffer);
893: }
894: }
1.176 daniel 895: c = CUR_SCHAR(str, l);
1.135 daniel 896: }
1.229 veillard 897: buffer[nbchars++] = 0;
898: return(buffer);
1.172 daniel 899: }
900:
1.229 veillard 901:
902: /************************************************************************
903: * *
1.123 daniel 904: * Commodity functions to handle xmlChars *
1.28 daniel 905: * *
906: ************************************************************************/
907:
1.50 daniel 908: /**
909: * xmlStrndup:
1.123 daniel 910: * @cur: the input xmlChar *
1.50 daniel 911: * @len: the len of @cur
912: *
1.123 daniel 913: * a strndup for array of xmlChar's
1.68 daniel 914: *
1.123 daniel 915: * Returns a new xmlChar * or NULL
1.1 veillard 916: */
1.123 daniel 917: xmlChar *
918: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 919: xmlChar *ret;
920:
921: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 922: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 923: if (ret == NULL) {
1.86 daniel 924: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 925: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 926: return(NULL);
927: }
1.123 daniel 928: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 929: ret[len] = 0;
930: return(ret);
931: }
932:
1.50 daniel 933: /**
934: * xmlStrdup:
1.123 daniel 935: * @cur: the input xmlChar *
1.50 daniel 936: *
1.152 daniel 937: * a strdup for array of xmlChar's. Since they are supposed to be
938: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
939: * a termination mark of '0'.
1.68 daniel 940: *
1.123 daniel 941: * Returns a new xmlChar * or NULL
1.1 veillard 942: */
1.123 daniel 943: xmlChar *
944: xmlStrdup(const xmlChar *cur) {
945: const xmlChar *p = cur;
1.1 veillard 946:
1.135 daniel 947: if (cur == NULL) return(NULL);
1.222 veillard 948: while (*p != 0) p++; /* non input consuming */
1.1 veillard 949: return(xmlStrndup(cur, p - cur));
950: }
951:
1.50 daniel 952: /**
953: * xmlCharStrndup:
954: * @cur: the input char *
955: * @len: the len of @cur
956: *
1.123 daniel 957: * a strndup for char's to xmlChar's
1.68 daniel 958: *
1.123 daniel 959: * Returns a new xmlChar * or NULL
1.45 daniel 960: */
961:
1.123 daniel 962: xmlChar *
1.55 daniel 963: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 964: int i;
1.135 daniel 965: xmlChar *ret;
966:
967: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 968: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 969: if (ret == NULL) {
1.86 daniel 970: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 971: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 972: return(NULL);
973: }
974: for (i = 0;i < len;i++)
1.123 daniel 975: ret[i] = (xmlChar) cur[i];
1.45 daniel 976: ret[len] = 0;
977: return(ret);
978: }
979:
1.50 daniel 980: /**
981: * xmlCharStrdup:
982: * @cur: the input char *
983: * @len: the len of @cur
984: *
1.123 daniel 985: * a strdup for char's to xmlChar's
1.68 daniel 986: *
1.123 daniel 987: * Returns a new xmlChar * or NULL
1.45 daniel 988: */
989:
1.123 daniel 990: xmlChar *
1.55 daniel 991: xmlCharStrdup(const char *cur) {
1.45 daniel 992: const char *p = cur;
993:
1.135 daniel 994: if (cur == NULL) return(NULL);
1.222 veillard 995: while (*p != '\0') p++; /* non input consuming */
1.45 daniel 996: return(xmlCharStrndup(cur, p - cur));
997: }
998:
1.50 daniel 999: /**
1000: * xmlStrcmp:
1.123 daniel 1001: * @str1: the first xmlChar *
1002: * @str2: the second xmlChar *
1.50 daniel 1003: *
1.123 daniel 1004: * a strcmp for xmlChar's
1.68 daniel 1005: *
1006: * Returns the integer result of the comparison
1.14 veillard 1007: */
1008:
1.55 daniel 1009: int
1.123 daniel 1010: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1011: register int tmp;
1012:
1.229 veillard 1013: if (str1 == str2) return(0);
1.135 daniel 1014: if (str1 == NULL) return(-1);
1015: if (str2 == NULL) return(1);
1.14 veillard 1016: do {
1.232 ! veillard 1017: tmp = *str1++ - *str2;
1.14 veillard 1018: if (tmp != 0) return(tmp);
1.232 ! veillard 1019: } while (*str2++ != 0);
! 1020: return 0;
1.14 veillard 1021: }
1022:
1.50 daniel 1023: /**
1024: * xmlStrncmp:
1.123 daniel 1025: * @str1: the first xmlChar *
1026: * @str2: the second xmlChar *
1.50 daniel 1027: * @len: the max comparison length
1028: *
1.123 daniel 1029: * a strncmp for xmlChar's
1.68 daniel 1030: *
1031: * Returns the integer result of the comparison
1.14 veillard 1032: */
1033:
1.55 daniel 1034: int
1.123 daniel 1035: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1036: register int tmp;
1037:
1038: if (len <= 0) return(0);
1.232 ! veillard 1039: if (str1 == str2) return(0);
1.135 daniel 1040: if (str1 == NULL) return(-1);
1041: if (str2 == NULL) return(1);
1.14 veillard 1042: do {
1.232 ! veillard 1043: tmp = *str1++ - *str2;
! 1044: if (tmp != 0 || --len == 0) return(tmp);
! 1045: } while (*str2++ != 0);
! 1046: return 0;
! 1047: }
! 1048:
! 1049: static xmlChar casemap[256] = {
! 1050: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
! 1051: 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
! 1052: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
! 1053: 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
! 1054: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
! 1055: 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
! 1056: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
! 1057: 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
! 1058: 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
! 1059: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
! 1060: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
! 1061: 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
! 1062: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
! 1063: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
! 1064: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
! 1065: 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
! 1066: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
! 1067: 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
! 1068: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
! 1069: 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
! 1070: 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
! 1071: 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
! 1072: 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
! 1073: 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
! 1074: 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
! 1075: 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
! 1076: 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
! 1077: 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
! 1078: 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
! 1079: 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
! 1080: 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
! 1081: 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
! 1082: };
! 1083:
! 1084: /**
! 1085: * xmlStrcasecmp:
! 1086: * @str1: the first xmlChar *
! 1087: * @str2: the second xmlChar *
! 1088: *
! 1089: * a strcasecmp for xmlChar's
! 1090: *
! 1091: * Returns the integer result of the comparison
! 1092: */
! 1093:
! 1094: int
! 1095: xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
! 1096: register int tmp;
! 1097:
! 1098: if (str1 == str2) return(0);
! 1099: if (str1 == NULL) return(-1);
! 1100: if (str2 == NULL) return(1);
! 1101: do {
! 1102: tmp = casemap[*str1++] - casemap[*str2];
1.14 veillard 1103: if (tmp != 0) return(tmp);
1.232 ! veillard 1104: } while (*str2++ != 0);
! 1105: return 0;
! 1106: }
! 1107:
! 1108: /**
! 1109: * xmlStrncasecmp:
! 1110: * @str1: the first xmlChar *
! 1111: * @str2: the second xmlChar *
! 1112: * @len: the max comparison length
! 1113: *
! 1114: * a strncasecmp for xmlChar's
! 1115: *
! 1116: * Returns the integer result of the comparison
! 1117: */
! 1118:
! 1119: int
! 1120: xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
! 1121: register int tmp;
! 1122:
! 1123: if (len <= 0) return(0);
! 1124: if (str1 == str2) return(0);
! 1125: if (str1 == NULL) return(-1);
! 1126: if (str2 == NULL) return(1);
! 1127: do {
! 1128: tmp = casemap[*str1++] - casemap[*str2];
! 1129: if (tmp != 0 || --len == 0) return(tmp);
! 1130: } while (*str2++ != 0);
! 1131: return 0;
1.14 veillard 1132: }
1133:
1.50 daniel 1134: /**
1135: * xmlStrchr:
1.123 daniel 1136: * @str: the xmlChar * array
1137: * @val: the xmlChar to search
1.50 daniel 1138: *
1.123 daniel 1139: * a strchr for xmlChar's
1.68 daniel 1140: *
1.123 daniel 1141: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1142: */
1143:
1.123 daniel 1144: const xmlChar *
1145: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 1146: if (str == NULL) return(NULL);
1.222 veillard 1147: while (*str != 0) { /* non input consuming */
1.123 daniel 1148: if (*str == val) return((xmlChar *) str);
1.14 veillard 1149: str++;
1150: }
1151: return(NULL);
1.89 daniel 1152: }
1153:
1154: /**
1155: * xmlStrstr:
1.123 daniel 1156: * @str: the xmlChar * array (haystack)
1157: * @val: the xmlChar to search (needle)
1.89 daniel 1158: *
1.123 daniel 1159: * a strstr for xmlChar's
1.89 daniel 1160: *
1.123 daniel 1161: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1162: */
1163:
1.123 daniel 1164: const xmlChar *
1165: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1166: int n;
1167:
1168: if (str == NULL) return(NULL);
1169: if (val == NULL) return(NULL);
1170: n = xmlStrlen(val);
1171:
1172: if (n == 0) return(str);
1.222 veillard 1173: while (*str != 0) { /* non input consuming */
1.89 daniel 1174: if (*str == *val) {
1.123 daniel 1175: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1176: }
1.232 ! veillard 1177: str++;
! 1178: }
! 1179: return(NULL);
! 1180: }
! 1181:
! 1182: /**
! 1183: * xmlStrcasestr:
! 1184: * @str: the xmlChar * array (haystack)
! 1185: * @val: the xmlChar to search (needle)
! 1186: *
! 1187: * a case-ignoring strstr for xmlChar's
! 1188: *
! 1189: * Returns the xmlChar * for the first occurence or NULL.
! 1190: */
! 1191:
! 1192: const xmlChar *
! 1193: xmlStrcasestr(const xmlChar *str, xmlChar *val) {
! 1194: int n;
! 1195:
! 1196: if (str == NULL) return(NULL);
! 1197: if (val == NULL) return(NULL);
! 1198: n = xmlStrlen(val);
! 1199:
! 1200: if (n == 0) return(str);
! 1201: while (*str != 0) { /* non input consuming */
! 1202: if (casemap[*str] == casemap[*val])
! 1203: if (!xmlStrncasecmp(str, val, n)) return(str);
1.89 daniel 1204: str++;
1205: }
1206: return(NULL);
1207: }
1208:
1209: /**
1210: * xmlStrsub:
1.123 daniel 1211: * @str: the xmlChar * array (haystack)
1.89 daniel 1212: * @start: the index of the first char (zero based)
1213: * @len: the length of the substring
1214: *
1215: * Extract a substring of a given string
1216: *
1.123 daniel 1217: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1218: */
1219:
1.123 daniel 1220: xmlChar *
1221: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1222: int i;
1223:
1224: if (str == NULL) return(NULL);
1225: if (start < 0) return(NULL);
1.90 daniel 1226: if (len < 0) return(NULL);
1.89 daniel 1227:
1228: for (i = 0;i < start;i++) {
1229: if (*str == 0) return(NULL);
1230: str++;
1231: }
1232: if (*str == 0) return(NULL);
1233: return(xmlStrndup(str, len));
1.14 veillard 1234: }
1.28 daniel 1235:
1.50 daniel 1236: /**
1237: * xmlStrlen:
1.123 daniel 1238: * @str: the xmlChar * array
1.50 daniel 1239: *
1.127 daniel 1240: * length of a xmlChar's string
1.68 daniel 1241: *
1.123 daniel 1242: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1243: */
1244:
1.55 daniel 1245: int
1.123 daniel 1246: xmlStrlen(const xmlChar *str) {
1.45 daniel 1247: int len = 0;
1248:
1249: if (str == NULL) return(0);
1.222 veillard 1250: while (*str != 0) { /* non input consuming */
1.45 daniel 1251: str++;
1252: len++;
1253: }
1254: return(len);
1255: }
1256:
1.50 daniel 1257: /**
1258: * xmlStrncat:
1.123 daniel 1259: * @cur: the original xmlChar * array
1260: * @add: the xmlChar * array added
1.50 daniel 1261: * @len: the length of @add
1262: *
1.123 daniel 1263: * a strncat for array of xmlChar's
1.68 daniel 1264: *
1.123 daniel 1265: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1266: */
1267:
1.123 daniel 1268: xmlChar *
1269: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1270: int size;
1.123 daniel 1271: xmlChar *ret;
1.45 daniel 1272:
1273: if ((add == NULL) || (len == 0))
1274: return(cur);
1275: if (cur == NULL)
1276: return(xmlStrndup(add, len));
1277:
1278: size = xmlStrlen(cur);
1.204 veillard 1279: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1280: if (ret == NULL) {
1.86 daniel 1281: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1282: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1283: return(cur);
1284: }
1.123 daniel 1285: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1286: ret[size + len] = 0;
1287: return(ret);
1288: }
1289:
1.50 daniel 1290: /**
1291: * xmlStrcat:
1.123 daniel 1292: * @cur: the original xmlChar * array
1293: * @add: the xmlChar * array added
1.50 daniel 1294: *
1.152 daniel 1295: * a strcat for array of xmlChar's. Since they are supposed to be
1296: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1297: * a termination mark of '0'.
1.68 daniel 1298: *
1.123 daniel 1299: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1300: */
1.123 daniel 1301: xmlChar *
1302: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1303: const xmlChar *p = add;
1.45 daniel 1304:
1305: if (add == NULL) return(cur);
1306: if (cur == NULL)
1307: return(xmlStrdup(add));
1308:
1.222 veillard 1309: while (*p != 0) p++; /* non input consuming */
1.45 daniel 1310: return(xmlStrncat(cur, add, p - add));
1311: }
1312:
1313: /************************************************************************
1314: * *
1315: * Commodity functions, cleanup needed ? *
1316: * *
1317: ************************************************************************/
1318:
1.50 daniel 1319: /**
1320: * areBlanks:
1321: * @ctxt: an XML parser context
1.123 daniel 1322: * @str: a xmlChar *
1.50 daniel 1323: * @len: the size of @str
1324: *
1.45 daniel 1325: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1326: *
1.68 daniel 1327: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1328: */
1329:
1.123 daniel 1330: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1331: int i, ret;
1.45 daniel 1332: xmlNodePtr lastChild;
1333:
1.176 daniel 1334: /*
1335: * Check for xml:space value.
1336: */
1337: if (*(ctxt->space) == 1)
1338: return(0);
1339:
1340: /*
1341: * Check that the string is made of blanks
1342: */
1.45 daniel 1343: for (i = 0;i < len;i++)
1344: if (!(IS_BLANK(str[i]))) return(0);
1345:
1.176 daniel 1346: /*
1347: * Look if the element is mixed content in the Dtd if available
1348: */
1.104 daniel 1349: if (ctxt->myDoc != NULL) {
1350: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1351: if (ret == 0) return(1);
1352: if (ret == 1) return(0);
1353: }
1.176 daniel 1354:
1.104 daniel 1355: /*
1.176 daniel 1356: * Otherwise, heuristic :-\
1.104 daniel 1357: */
1.179 daniel 1358: if (ctxt->keepBlanks)
1359: return(0);
1360: if (RAW != '<') return(0);
1361: if (ctxt->node == NULL) return(0);
1362: if ((ctxt->node->children == NULL) &&
1363: (RAW == '<') && (NXT(1) == '/')) return(0);
1364:
1.45 daniel 1365: lastChild = xmlGetLastChild(ctxt->node);
1366: if (lastChild == NULL) {
1367: if (ctxt->node->content != NULL) return(0);
1368: } else if (xmlNodeIsText(lastChild))
1369: return(0);
1.157 daniel 1370: else if ((ctxt->node->children != NULL) &&
1371: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 1372: return(0);
1.45 daniel 1373: return(1);
1374: }
1375:
1376: /*
1377: * Forward definition for recusive behaviour.
1378: */
1.77 daniel 1379: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1380: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1381:
1.28 daniel 1382: /************************************************************************
1383: * *
1384: * Extra stuff for namespace support *
1385: * Relates to http://www.w3.org/TR/WD-xml-names *
1386: * *
1387: ************************************************************************/
1388:
1.50 daniel 1389: /**
1.72 daniel 1390: * xmlSplitQName:
1.162 daniel 1391: * @ctxt: an XML parser context
1.72 daniel 1392: * @name: an XML parser context
1.123 daniel 1393: * @prefix: a xmlChar **
1.72 daniel 1394: *
1.206 veillard 1395: * parse an UTF8 encoded XML qualified name string
1.72 daniel 1396: *
1397: * [NS 5] QName ::= (Prefix ':')? LocalPart
1398: *
1399: * [NS 6] Prefix ::= NCName
1400: *
1401: * [NS 7] LocalPart ::= NCName
1402: *
1.127 daniel 1403: * Returns the local part, and prefix is updated
1.72 daniel 1404: * to get the Prefix if any.
1405: */
1406:
1.123 daniel 1407: xmlChar *
1.162 daniel 1408: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1409: xmlChar buf[XML_MAX_NAMELEN + 5];
1.222 veillard 1410: xmlChar *buffer = NULL;
1.162 daniel 1411: int len = 0;
1.222 veillard 1412: int max = XML_MAX_NAMELEN;
1.123 daniel 1413: xmlChar *ret = NULL;
1414: const xmlChar *cur = name;
1.206 veillard 1415: int c;
1.72 daniel 1416:
1417: *prefix = NULL;
1.113 daniel 1418:
1419: /* xml: prefix is not really a namespace */
1420: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1421: (cur[2] == 'l') && (cur[3] == ':'))
1422: return(xmlStrdup(name));
1423:
1.162 daniel 1424: /* nasty but valid */
1425: if (cur[0] == ':')
1426: return(xmlStrdup(name));
1427:
1.206 veillard 1428: c = *cur++;
1.222 veillard 1429: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1.206 veillard 1430: buf[len++] = c;
1431: c = *cur++;
1.162 daniel 1432: }
1.222 veillard 1433: if (len >= max) {
1434: /*
1435: * Okay someone managed to make a huge name, so he's ready to pay
1436: * for the processing speed.
1437: */
1438: max = len * 2;
1439:
1440: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1441: if (buffer == NULL) {
1442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1443: ctxt->sax->error(ctxt->userData,
1444: "xmlSplitQName: out of memory\n");
1445: return(NULL);
1446: }
1447: memcpy(buffer, buf, len);
1448: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1449: if (len + 10 > max) {
1450: max *= 2;
1451: buffer = (xmlChar *) xmlRealloc(buffer,
1452: max * sizeof(xmlChar));
1453: if (buffer == NULL) {
1454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1455: ctxt->sax->error(ctxt->userData,
1456: "xmlSplitQName: out of memory\n");
1457: return(NULL);
1458: }
1459: }
1460: buffer[len++] = c;
1461: c = *cur++;
1462: }
1463: buffer[len] = 0;
1464: }
1.72 daniel 1465:
1.222 veillard 1466: if (buffer == NULL)
1467: ret = xmlStrndup(buf, len);
1468: else {
1469: ret = buffer;
1470: buffer = NULL;
1471: max = XML_MAX_NAMELEN;
1472: }
1473:
1.72 daniel 1474:
1.162 daniel 1475: if (c == ':') {
1.206 veillard 1476: c = *cur++;
1477: if (c == 0) return(ret);
1.72 daniel 1478: *prefix = ret;
1.162 daniel 1479: len = 0;
1.72 daniel 1480:
1.222 veillard 1481: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1.206 veillard 1482: buf[len++] = c;
1483: c = *cur++;
1.162 daniel 1484: }
1.222 veillard 1485: if (len >= max) {
1486: /*
1487: * Okay someone managed to make a huge name, so he's ready to pay
1488: * for the processing speed.
1489: */
1.229 veillard 1490: max = len * 2;
1491:
1492: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1493: if (buffer == NULL) {
1.55 daniel 1494: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 1495: ctxt->sax->error(ctxt->userData,
1.229 veillard 1496: "xmlSplitQName: out of memory\n");
1497: return(NULL);
1498: }
1499: memcpy(buffer, buf, len);
1500: while (c != 0) { /* tested bigname2.xml */
1501: if (len + 10 > max) {
1502: max *= 2;
1503: buffer = (xmlChar *) xmlRealloc(buffer,
1504: max * sizeof(xmlChar));
1505: if (buffer == NULL) {
1506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1507: ctxt->sax->error(ctxt->userData,
1508: "xmlSplitQName: out of memory\n");
1509: return(NULL);
1510: }
1511: }
1512: buffer[len++] = c;
1513: c = *cur++;
1.122 daniel 1514: }
1.229 veillard 1515: buffer[len] = 0;
1516: }
1517:
1518: if (buffer == NULL)
1519: ret = xmlStrndup(buf, len);
1520: else {
1521: ret = buffer;
1522: }
1.45 daniel 1523: }
1524:
1.229 veillard 1525: return(ret);
1.45 daniel 1526: }
1527:
1.28 daniel 1528: /************************************************************************
1529: * *
1530: * The parser itself *
1531: * Relates to http://www.w3.org/TR/REC-xml *
1532: * *
1533: ************************************************************************/
1.14 veillard 1534:
1.50 daniel 1535: /**
1536: * xmlParseName:
1537: * @ctxt: an XML parser context
1538: *
1539: * parse an XML name.
1.22 daniel 1540: *
1541: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1542: * CombiningChar | Extender
1543: *
1544: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1545: *
1546: * [6] Names ::= Name (S Name)*
1.68 daniel 1547: *
1548: * Returns the Name parsed or NULL
1.1 veillard 1549: */
1550:
1.123 daniel 1551: xmlChar *
1.55 daniel 1552: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 1553: xmlChar buf[XML_MAX_NAMELEN + 5];
1554: int len = 0, l;
1555: int c;
1.222 veillard 1556: int count = 0;
1.1 veillard 1557:
1.91 daniel 1558: GROW;
1.160 daniel 1559: c = CUR_CHAR(l);
1.190 daniel 1560: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1561: (!IS_LETTER(c) && (c != '_') &&
1562: (c != ':'))) {
1.91 daniel 1563: return(NULL);
1564: }
1.40 daniel 1565:
1.222 veillard 1566: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1.190 daniel 1567: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1568: (c == '.') || (c == '-') ||
1569: (c == '_') || (c == ':') ||
1570: (IS_COMBINING(c)) ||
1571: (IS_EXTENDER(c)))) {
1.222 veillard 1572: if (count++ > 100) {
1573: count = 0;
1574: GROW;
1575: }
1.160 daniel 1576: COPY_BUF(l,buf,len,c);
1577: NEXTL(l);
1578: c = CUR_CHAR(l);
1.91 daniel 1579: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1580: /*
1581: * Okay someone managed to make a huge name, so he's ready to pay
1582: * for the processing speed.
1583: */
1584: xmlChar *buffer;
1585: int max = len * 2;
1586:
1587: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1588: if (buffer == NULL) {
1589: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1590: ctxt->sax->error(ctxt->userData,
1591: "xmlParseName: out of memory\n");
1592: return(NULL);
1593: }
1594: memcpy(buffer, buf, len);
1595: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1.160 daniel 1596: (c == '.') || (c == '-') ||
1597: (c == '_') || (c == ':') ||
1598: (IS_COMBINING(c)) ||
1599: (IS_EXTENDER(c))) {
1.222 veillard 1600: if (count++ > 100) {
1601: count = 0;
1602: GROW;
1603: }
1604: if (len + 10 > max) {
1605: max *= 2;
1606: buffer = (xmlChar *) xmlRealloc(buffer,
1607: max * sizeof(xmlChar));
1608: if (buffer == NULL) {
1609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1610: ctxt->sax->error(ctxt->userData,
1611: "xmlParseName: out of memory\n");
1612: return(NULL);
1613: }
1614: }
1615: COPY_BUF(l,buffer,len,c);
1.160 daniel 1616: NEXTL(l);
1617: c = CUR_CHAR(l);
1.97 daniel 1618: }
1.222 veillard 1619: buffer[len] = 0;
1620: return(buffer);
1.91 daniel 1621: }
1622: }
1623: return(xmlStrndup(buf, len));
1.22 daniel 1624: }
1625:
1.50 daniel 1626: /**
1.135 daniel 1627: * xmlParseStringName:
1628: * @ctxt: an XML parser context
1.229 veillard 1629: * @str: a pointer to the string pointer (IN/OUT)
1.135 daniel 1630: *
1631: * parse an XML name.
1632: *
1633: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1634: * CombiningChar | Extender
1635: *
1636: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1637: *
1638: * [6] Names ::= Name (S Name)*
1639: *
1640: * Returns the Name parsed or NULL. The str pointer
1641: * is updated to the current location in the string.
1642: */
1643:
1644: xmlChar *
1645: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 1646: xmlChar buf[XML_MAX_NAMELEN + 5];
1647: const xmlChar *cur = *str;
1648: int len = 0, l;
1649: int c;
1.135 daniel 1650:
1.176 daniel 1651: c = CUR_SCHAR(cur, l);
1652: if (!IS_LETTER(c) && (c != '_') &&
1653: (c != ':')) {
1.135 daniel 1654: return(NULL);
1655: }
1656:
1.222 veillard 1657: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1658: (c == '.') || (c == '-') ||
1659: (c == '_') || (c == ':') ||
1660: (IS_COMBINING(c)) ||
1661: (IS_EXTENDER(c))) {
1662: COPY_BUF(l,buf,len,c);
1663: cur += l;
1664: c = CUR_SCHAR(cur, l);
1.222 veillard 1665: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1666: /*
1667: * Okay someone managed to make a huge name, so he's ready to pay
1668: * for the processing speed.
1669: */
1670: xmlChar *buffer;
1671: int max = len * 2;
1672:
1673: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1674: if (buffer == NULL) {
1675: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1676: ctxt->sax->error(ctxt->userData,
1677: "xmlParseStringName: out of memory\n");
1678: return(NULL);
1679: }
1680: memcpy(buffer, buf, len);
1681: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1682: (c == '.') || (c == '-') ||
1683: (c == '_') || (c == ':') ||
1684: (IS_COMBINING(c)) ||
1685: (IS_EXTENDER(c))) {
1.222 veillard 1686: if (len + 10 > max) {
1687: max *= 2;
1688: buffer = (xmlChar *) xmlRealloc(buffer,
1689: max * sizeof(xmlChar));
1690: if (buffer == NULL) {
1691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1692: ctxt->sax->error(ctxt->userData,
1693: "xmlParseStringName: out of memory\n");
1694: return(NULL);
1695: }
1696: }
1697: COPY_BUF(l,buffer,len,c);
1.176 daniel 1698: cur += l;
1699: c = CUR_SCHAR(cur, l);
1700: }
1.222 veillard 1701: buffer[len] = 0;
1702: *str = cur;
1703: return(buffer);
1.176 daniel 1704: }
1.135 daniel 1705: }
1.176 daniel 1706: *str = cur;
1707: return(xmlStrndup(buf, len));
1.135 daniel 1708: }
1709:
1710: /**
1.50 daniel 1711: * xmlParseNmtoken:
1712: * @ctxt: an XML parser context
1713: *
1714: * parse an XML Nmtoken.
1.22 daniel 1715: *
1716: * [7] Nmtoken ::= (NameChar)+
1717: *
1718: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1719: *
1720: * Returns the Nmtoken parsed or NULL
1.22 daniel 1721: */
1722:
1.123 daniel 1723: xmlChar *
1.55 daniel 1724: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.222 veillard 1725: xmlChar buf[XML_MAX_NAMELEN + 5];
1726: int len = 0, l;
1727: int c;
1728: int count = 0;
1.22 daniel 1729:
1.91 daniel 1730: GROW;
1.160 daniel 1731: c = CUR_CHAR(l);
1.222 veillard 1732:
1733: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1734: (c == '.') || (c == '-') ||
1735: (c == '_') || (c == ':') ||
1736: (IS_COMBINING(c)) ||
1737: (IS_EXTENDER(c))) {
1.222 veillard 1738: if (count++ > 100) {
1739: count = 0;
1740: GROW;
1741: }
1.160 daniel 1742: COPY_BUF(l,buf,len,c);
1743: NEXTL(l);
1744: c = CUR_CHAR(l);
1.91 daniel 1745: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1746: /*
1747: * Okay someone managed to make a huge token, so he's ready to pay
1748: * for the processing speed.
1749: */
1750: xmlChar *buffer;
1751: int max = len * 2;
1752:
1753: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1754: if (buffer == NULL) {
1755: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1756: ctxt->sax->error(ctxt->userData,
1757: "xmlParseNmtoken: out of memory\n");
1758: return(NULL);
1759: }
1760: memcpy(buffer, buf, len);
1761: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1762: (c == '.') || (c == '-') ||
1763: (c == '_') || (c == ':') ||
1764: (IS_COMBINING(c)) ||
1765: (IS_EXTENDER(c))) {
1.222 veillard 1766: if (count++ > 100) {
1767: count = 0;
1768: GROW;
1769: }
1770: if (len + 10 > max) {
1771: max *= 2;
1772: buffer = (xmlChar *) xmlRealloc(buffer,
1773: max * sizeof(xmlChar));
1774: if (buffer == NULL) {
1775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776: ctxt->sax->error(ctxt->userData,
1777: "xmlParseName: out of memory\n");
1778: return(NULL);
1779: }
1780: }
1781: COPY_BUF(l,buffer,len,c);
1.160 daniel 1782: NEXTL(l);
1783: c = CUR_CHAR(l);
1784: }
1.222 veillard 1785: buffer[len] = 0;
1786: return(buffer);
1.91 daniel 1787: }
1788: }
1.168 daniel 1789: if (len == 0)
1790: return(NULL);
1.91 daniel 1791: return(xmlStrndup(buf, len));
1.1 veillard 1792: }
1793:
1.50 daniel 1794: /**
1795: * xmlParseEntityValue:
1796: * @ctxt: an XML parser context
1.78 daniel 1797: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1798: *
1.229 veillard 1799: * parse a value for ENTITY declarations
1.24 daniel 1800: *
1801: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1802: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1803: *
1.78 daniel 1804: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1805: */
1806:
1.123 daniel 1807: xmlChar *
1808: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 1809: xmlChar *buf = NULL;
1810: int len = 0;
1.140 daniel 1811: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 1812: int c, l;
1.135 daniel 1813: xmlChar stop;
1.123 daniel 1814: xmlChar *ret = NULL;
1.176 daniel 1815: const xmlChar *cur = NULL;
1.98 daniel 1816: xmlParserInputPtr input;
1.24 daniel 1817:
1.152 daniel 1818: if (RAW == '"') stop = '"';
1819: else if (RAW == '\'') stop = '\'';
1.135 daniel 1820: else {
1821: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1823: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1824: ctxt->wellFormed = 0;
1.180 daniel 1825: ctxt->disableSAX = 1;
1.135 daniel 1826: return(NULL);
1827: }
1828: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1829: if (buf == NULL) {
1830: fprintf(stderr, "malloc of %d byte failed\n", size);
1831: return(NULL);
1832: }
1.94 daniel 1833:
1.135 daniel 1834: /*
1835: * The content of the entity definition is copied in a buffer.
1836: */
1.94 daniel 1837:
1.135 daniel 1838: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1839: input = ctxt->input;
1840: GROW;
1841: NEXT;
1.152 daniel 1842: c = CUR_CHAR(l);
1.135 daniel 1843: /*
1844: * NOTE: 4.4.5 Included in Literal
1845: * When a parameter entity reference appears in a literal entity
1846: * value, ... a single or double quote character in the replacement
1847: * text is always treated as a normal data character and will not
1848: * terminate the literal.
1849: * In practice it means we stop the loop only when back at parsing
1850: * the initial entity and the quote is found
1851: */
1.222 veillard 1852: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1853: (ctxt->input != input))) {
1.152 daniel 1854: if (len + 5 >= size) {
1.135 daniel 1855: size *= 2;
1.204 veillard 1856: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 1857: if (buf == NULL) {
1858: fprintf(stderr, "realloc of %d byte failed\n", size);
1859: return(NULL);
1.94 daniel 1860: }
1.79 daniel 1861: }
1.152 daniel 1862: COPY_BUF(l,buf,len,c);
1863: NEXTL(l);
1.98 daniel 1864: /*
1.135 daniel 1865: * Pop-up of finished entities.
1.98 daniel 1866: */
1.222 veillard 1867: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1.135 daniel 1868: xmlPopInput(ctxt);
1.152 daniel 1869:
1.221 veillard 1870: GROW;
1.152 daniel 1871: c = CUR_CHAR(l);
1.135 daniel 1872: if (c == 0) {
1.94 daniel 1873: GROW;
1.152 daniel 1874: c = CUR_CHAR(l);
1.79 daniel 1875: }
1.135 daniel 1876: }
1877: buf[len] = 0;
1878:
1879: /*
1.176 daniel 1880: * Raise problem w.r.t. '&' and '%' being used in non-entities
1881: * reference constructs. Note Charref will be handled in
1882: * xmlStringDecodeEntities()
1883: */
1884: cur = buf;
1.223 veillard 1885: while (*cur != 0) { /* non input consuming */
1.176 daniel 1886: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1887: xmlChar *name;
1888: xmlChar tmp = *cur;
1889:
1890: cur++;
1891: name = xmlParseStringName(ctxt, &cur);
1892: if ((name == NULL) || (*cur != ';')) {
1.230 veillard 1893: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1.176 daniel 1894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1895: ctxt->sax->error(ctxt->userData,
1896: "EntityValue: '%c' forbidden except for entities references\n",
1897: tmp);
1898: ctxt->wellFormed = 0;
1.180 daniel 1899: ctxt->disableSAX = 1;
1.176 daniel 1900: }
1901: if ((ctxt->inSubset == 1) && (tmp == '%')) {
1.230 veillard 1902: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1.176 daniel 1903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1904: ctxt->sax->error(ctxt->userData,
1905: "EntityValue: PEReferences forbidden in internal subset\n",
1906: tmp);
1907: ctxt->wellFormed = 0;
1.180 daniel 1908: ctxt->disableSAX = 1;
1.176 daniel 1909: }
1910: if (name != NULL)
1911: xmlFree(name);
1912: }
1913: cur++;
1914: }
1915:
1916: /*
1.135 daniel 1917: * Then PEReference entities are substituted.
1918: */
1919: if (c != stop) {
1920: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 1921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 1922: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1923: ctxt->wellFormed = 0;
1.180 daniel 1924: ctxt->disableSAX = 1;
1.170 daniel 1925: xmlFree(buf);
1.135 daniel 1926: } else {
1927: NEXT;
1928: /*
1929: * NOTE: 4.4.7 Bypassed
1930: * When a general entity reference appears in the EntityValue in
1931: * an entity declaration, it is bypassed and left as is.
1.176 daniel 1932: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 1933: */
1934: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1935: 0, 0, 0);
1936: if (orig != NULL)
1937: *orig = buf;
1938: else
1939: xmlFree(buf);
1.24 daniel 1940: }
1941:
1942: return(ret);
1943: }
1944:
1.50 daniel 1945: /**
1946: * xmlParseAttValue:
1947: * @ctxt: an XML parser context
1948: *
1949: * parse a value for an attribute
1.78 daniel 1950: * Note: the parser won't do substitution of entities here, this
1.113 daniel 1951: * will be handled later in xmlStringGetNodeList
1.29 daniel 1952: *
1953: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1954: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 1955: *
1.129 daniel 1956: * 3.3.3 Attribute-Value Normalization:
1957: * Before the value of an attribute is passed to the application or
1958: * checked for validity, the XML processor must normalize it as follows:
1959: * - a character reference is processed by appending the referenced
1960: * character to the attribute value
1961: * - an entity reference is processed by recursively processing the
1962: * replacement text of the entity
1963: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
1964: * appending #x20 to the normalized value, except that only a single
1965: * #x20 is appended for a "#xD#xA" sequence that is part of an external
1966: * parsed entity or the literal entity value of an internal parsed entity
1967: * - other characters are processed by appending them to the normalized value
1.130 daniel 1968: * If the declared value is not CDATA, then the XML processor must further
1969: * process the normalized attribute value by discarding any leading and
1970: * trailing space (#x20) characters, and by replacing sequences of space
1971: * (#x20) characters by a single space (#x20) character.
1972: * All attributes for which no declaration has been read should be treated
1973: * by a non-validating parser as if declared CDATA.
1.129 daniel 1974: *
1975: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 1976: */
1977:
1.123 daniel 1978: xmlChar *
1.55 daniel 1979: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 1980: xmlChar limit = 0;
1.198 daniel 1981: xmlChar *buf = NULL;
1982: int len = 0;
1983: int buf_size = 0;
1984: int c, l;
1.129 daniel 1985: xmlChar *current = NULL;
1986: xmlEntityPtr ent;
1987:
1.29 daniel 1988:
1.91 daniel 1989: SHRINK;
1.151 daniel 1990: if (NXT(0) == '"') {
1.96 daniel 1991: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 1992: limit = '"';
1.40 daniel 1993: NEXT;
1.151 daniel 1994: } else if (NXT(0) == '\'') {
1.129 daniel 1995: limit = '\'';
1.96 daniel 1996: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 1997: NEXT;
1.29 daniel 1998: } else {
1.123 daniel 1999: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2000: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2001: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2002: ctxt->wellFormed = 0;
1.180 daniel 2003: ctxt->disableSAX = 1;
1.129 daniel 2004: return(NULL);
1.29 daniel 2005: }
2006:
1.129 daniel 2007: /*
2008: * allocate a translation buffer.
2009: */
1.198 daniel 2010: buf_size = XML_PARSER_BUFFER_SIZE;
2011: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2012: if (buf == NULL) {
1.129 daniel 2013: perror("xmlParseAttValue: malloc failed");
2014: return(NULL);
2015: }
2016:
2017: /*
2018: * Ok loop until we reach one of the ending char or a size limit.
2019: */
1.198 daniel 2020: c = CUR_CHAR(l);
1.223 veillard 2021: while (((NXT(0) != limit) && /* checked */
2022: (c != '<')) || (ctxt->token != 0)) {
1.198 daniel 2023: if (c == 0) break;
1.205 veillard 2024: if (ctxt->token == '&') {
1.229 veillard 2025: /*
2026: * The reparsing will be done in xmlStringGetNodeList()
2027: * called by the attribute() function in SAX.c
2028: */
1.205 veillard 2029: static xmlChar buffer[6] = "&";
2030:
2031: if (len > buf_size - 10) {
2032: growBuffer(buf);
2033: }
2034: current = &buffer[0];
1.223 veillard 2035: while (*current != 0) { /* non input consuming */
1.205 veillard 2036: buf[len++] = *current++;
2037: }
2038: ctxt->token = 0;
2039: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 2040: int val = xmlParseCharRef(ctxt);
1.229 veillard 2041: if (val == '&') {
2042: /*
2043: * The reparsing will be done in xmlStringGetNodeList()
2044: * called by the attribute() function in SAX.c
2045: */
2046: static xmlChar buffer[6] = "&";
2047:
2048: if (len > buf_size - 10) {
2049: growBuffer(buf);
2050: }
2051: current = &buffer[0];
2052: while (*current != 0) { /* non input consuming */
2053: buf[len++] = *current++;
2054: }
2055: } else {
2056: COPY_BUF(l,buf,len,val);
2057: NEXTL(l);
2058: }
1.198 daniel 2059: } else if (c == '&') {
1.129 daniel 2060: ent = xmlParseEntityRef(ctxt);
2061: if ((ent != NULL) &&
2062: (ctxt->replaceEntities != 0)) {
1.185 daniel 2063: xmlChar *rep;
2064:
1.186 daniel 2065: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2066: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 2067: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 2068: if (rep != NULL) {
2069: current = rep;
1.223 veillard 2070: while (*current != 0) { /* non input consuming */
1.198 daniel 2071: buf[len++] = *current++;
2072: if (len > buf_size - 10) {
2073: growBuffer(buf);
1.186 daniel 2074: }
1.185 daniel 2075: }
1.186 daniel 2076: xmlFree(rep);
1.129 daniel 2077: }
1.186 daniel 2078: } else {
2079: if (ent->content != NULL)
1.198 daniel 2080: buf[len++] = ent->content[0];
1.129 daniel 2081: }
2082: } else if (ent != NULL) {
2083: int i = xmlStrlen(ent->name);
2084: const xmlChar *cur = ent->name;
2085:
1.186 daniel 2086: /*
2087: * This may look absurd but is needed to detect
2088: * entities problems
2089: */
1.211 veillard 2090: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2091: (ent->content != NULL)) {
1.186 daniel 2092: xmlChar *rep;
2093: rep = xmlStringDecodeEntities(ctxt, ent->content,
2094: XML_SUBSTITUTE_REF, 0, 0, 0);
2095: if (rep != NULL)
2096: xmlFree(rep);
2097: }
2098:
2099: /*
2100: * Just output the reference
2101: */
1.198 daniel 2102: buf[len++] = '&';
2103: if (len > buf_size - i - 10) {
2104: growBuffer(buf);
1.129 daniel 2105: }
2106: for (;i > 0;i--)
1.198 daniel 2107: buf[len++] = *cur++;
2108: buf[len++] = ';';
1.129 daniel 2109: }
2110: } else {
1.198 daniel 2111: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2112: COPY_BUF(l,buf,len,0x20);
2113: if (len > buf_size - 10) {
2114: growBuffer(buf);
1.129 daniel 2115: }
2116: } else {
1.198 daniel 2117: COPY_BUF(l,buf,len,c);
2118: if (len > buf_size - 10) {
2119: growBuffer(buf);
1.129 daniel 2120: }
2121: }
1.198 daniel 2122: NEXTL(l);
1.129 daniel 2123: }
1.198 daniel 2124: GROW;
2125: c = CUR_CHAR(l);
1.129 daniel 2126: }
1.198 daniel 2127: buf[len++] = 0;
1.152 daniel 2128: if (RAW == '<') {
1.230 veillard 2129: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.129 daniel 2130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2131: ctxt->sax->error(ctxt->userData,
2132: "Unescaped '<' not allowed in attributes values\n");
2133: ctxt->wellFormed = 0;
1.180 daniel 2134: ctxt->disableSAX = 1;
1.152 daniel 2135: } else if (RAW != limit) {
1.230 veillard 2136: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.129 daniel 2137: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2138: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2139: ctxt->wellFormed = 0;
1.180 daniel 2140: ctxt->disableSAX = 1;
1.129 daniel 2141: } else
2142: NEXT;
1.198 daniel 2143: return(buf);
1.29 daniel 2144: }
2145:
1.50 daniel 2146: /**
2147: * xmlParseSystemLiteral:
2148: * @ctxt: an XML parser context
2149: *
2150: * parse an XML Literal
1.21 daniel 2151: *
1.22 daniel 2152: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2153: *
2154: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2155: */
2156:
1.123 daniel 2157: xmlChar *
1.55 daniel 2158: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2159: xmlChar *buf = NULL;
2160: int len = 0;
1.140 daniel 2161: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2162: int cur, l;
1.135 daniel 2163: xmlChar stop;
1.168 daniel 2164: int state = ctxt->instate;
1.223 veillard 2165: int count = 0;
1.21 daniel 2166:
1.91 daniel 2167: SHRINK;
1.152 daniel 2168: if (RAW == '"') {
1.40 daniel 2169: NEXT;
1.135 daniel 2170: stop = '"';
1.152 daniel 2171: } else if (RAW == '\'') {
1.40 daniel 2172: NEXT;
1.135 daniel 2173: stop = '\'';
1.21 daniel 2174: } else {
1.230 veillard 2175: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2177: ctxt->sax->error(ctxt->userData,
2178: "SystemLiteral \" or ' expected\n");
1.59 daniel 2179: ctxt->wellFormed = 0;
1.180 daniel 2180: ctxt->disableSAX = 1;
1.135 daniel 2181: return(NULL);
1.21 daniel 2182: }
2183:
1.135 daniel 2184: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2185: if (buf == NULL) {
2186: fprintf(stderr, "malloc of %d byte failed\n", size);
2187: return(NULL);
2188: }
1.168 daniel 2189: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 2190: cur = CUR_CHAR(l);
1.223 veillard 2191: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
1.152 daniel 2192: if (len + 5 >= size) {
1.135 daniel 2193: size *= 2;
1.204 veillard 2194: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2195: if (buf == NULL) {
2196: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 2197: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2198: return(NULL);
2199: }
2200: }
1.223 veillard 2201: count++;
2202: if (count > 50) {
2203: GROW;
2204: count = 0;
2205: }
1.152 daniel 2206: COPY_BUF(l,buf,len,cur);
2207: NEXTL(l);
2208: cur = CUR_CHAR(l);
1.135 daniel 2209: if (cur == 0) {
2210: GROW;
2211: SHRINK;
1.152 daniel 2212: cur = CUR_CHAR(l);
1.135 daniel 2213: }
2214: }
2215: buf[len] = 0;
1.204 veillard 2216: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2217: if (!IS_CHAR(cur)) {
1.230 veillard 2218: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2219: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2220: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2221: ctxt->wellFormed = 0;
1.180 daniel 2222: ctxt->disableSAX = 1;
1.135 daniel 2223: } else {
2224: NEXT;
2225: }
2226: return(buf);
1.21 daniel 2227: }
2228:
1.50 daniel 2229: /**
2230: * xmlParsePubidLiteral:
2231: * @ctxt: an XML parser context
1.21 daniel 2232: *
1.50 daniel 2233: * parse an XML public literal
1.68 daniel 2234: *
2235: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2236: *
2237: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2238: */
2239:
1.123 daniel 2240: xmlChar *
1.55 daniel 2241: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2242: xmlChar *buf = NULL;
2243: int len = 0;
1.140 daniel 2244: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 2245: xmlChar cur;
2246: xmlChar stop;
1.223 veillard 2247: int count = 0;
1.125 daniel 2248:
1.91 daniel 2249: SHRINK;
1.152 daniel 2250: if (RAW == '"') {
1.40 daniel 2251: NEXT;
1.135 daniel 2252: stop = '"';
1.152 daniel 2253: } else if (RAW == '\'') {
1.40 daniel 2254: NEXT;
1.135 daniel 2255: stop = '\'';
1.21 daniel 2256: } else {
1.230 veillard 2257: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2259: ctxt->sax->error(ctxt->userData,
2260: "SystemLiteral \" or ' expected\n");
1.59 daniel 2261: ctxt->wellFormed = 0;
1.180 daniel 2262: ctxt->disableSAX = 1;
1.135 daniel 2263: return(NULL);
2264: }
2265: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2266: if (buf == NULL) {
2267: fprintf(stderr, "malloc of %d byte failed\n", size);
2268: return(NULL);
2269: }
2270: cur = CUR;
1.223 veillard 2271: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
1.135 daniel 2272: if (len + 1 >= size) {
2273: size *= 2;
1.204 veillard 2274: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2275: if (buf == NULL) {
2276: fprintf(stderr, "realloc of %d byte failed\n", size);
2277: return(NULL);
2278: }
2279: }
2280: buf[len++] = cur;
1.223 veillard 2281: count++;
2282: if (count > 50) {
2283: GROW;
2284: count = 0;
2285: }
1.135 daniel 2286: NEXT;
2287: cur = CUR;
2288: if (cur == 0) {
2289: GROW;
2290: SHRINK;
2291: cur = CUR;
2292: }
2293: }
2294: buf[len] = 0;
2295: if (cur != stop) {
1.230 veillard 2296: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2298: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2299: ctxt->wellFormed = 0;
1.180 daniel 2300: ctxt->disableSAX = 1;
1.135 daniel 2301: } else {
2302: NEXT;
1.21 daniel 2303: }
1.135 daniel 2304: return(buf);
1.21 daniel 2305: }
2306:
1.50 daniel 2307: /**
2308: * xmlParseCharData:
2309: * @ctxt: an XML parser context
2310: * @cdata: int indicating whether we are within a CDATA section
2311: *
2312: * parse a CharData section.
2313: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2314: *
1.151 daniel 2315: * The right angle bracket (>) may be represented using the string ">",
2316: * and must, for compatibility, be escaped using ">" or a character
2317: * reference when it appears in the string "]]>" in content, when that
2318: * string is not marking the end of a CDATA section.
2319: *
1.27 daniel 2320: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2321: */
2322:
1.55 daniel 2323: void
2324: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 2325: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 2326: int nbchar = 0;
1.152 daniel 2327: int cur, l;
1.223 veillard 2328: int count = 0;
1.27 daniel 2329:
1.91 daniel 2330: SHRINK;
1.223 veillard 2331: GROW;
1.152 daniel 2332: cur = CUR_CHAR(l);
1.223 veillard 2333: while (((cur != '<') || (ctxt->token == '<')) && /* checked */
1.190 daniel 2334: ((cur != '&') || (ctxt->token == '&')) &&
1.229 veillard 2335: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
1.97 daniel 2336: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2337: (NXT(2) == '>')) {
2338: if (cdata) break;
2339: else {
1.230 veillard 2340: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 2342: ctxt->sax->error(ctxt->userData,
1.59 daniel 2343: "Sequence ']]>' not allowed in content\n");
1.151 daniel 2344: /* Should this be relaxed ??? I see a "must here */
2345: ctxt->wellFormed = 0;
1.180 daniel 2346: ctxt->disableSAX = 1;
1.59 daniel 2347: }
2348: }
1.152 daniel 2349: COPY_BUF(l,buf,nbchar,cur);
2350: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 2351: /*
2352: * Ok the segment is to be consumed as chars.
2353: */
1.171 daniel 2354: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2355: if (areBlanks(ctxt, buf, nbchar)) {
2356: if (ctxt->sax->ignorableWhitespace != NULL)
2357: ctxt->sax->ignorableWhitespace(ctxt->userData,
2358: buf, nbchar);
2359: } else {
2360: if (ctxt->sax->characters != NULL)
2361: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2362: }
2363: }
2364: nbchar = 0;
2365: }
1.223 veillard 2366: count++;
2367: if (count > 50) {
2368: GROW;
2369: count = 0;
2370: }
1.152 daniel 2371: NEXTL(l);
2372: cur = CUR_CHAR(l);
1.27 daniel 2373: }
1.91 daniel 2374: if (nbchar != 0) {
2375: /*
2376: * Ok the segment is to be consumed as chars.
2377: */
1.171 daniel 2378: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2379: if (areBlanks(ctxt, buf, nbchar)) {
2380: if (ctxt->sax->ignorableWhitespace != NULL)
2381: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2382: } else {
2383: if (ctxt->sax->characters != NULL)
2384: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2385: }
2386: }
1.45 daniel 2387: }
1.27 daniel 2388: }
2389:
1.50 daniel 2390: /**
2391: * xmlParseExternalID:
2392: * @ctxt: an XML parser context
1.123 daniel 2393: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2394: * @strict: indicate whether we should restrict parsing to only
2395: * production [75], see NOTE below
1.50 daniel 2396: *
1.67 daniel 2397: * Parse an External ID or a Public ID
2398: *
2399: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2400: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2401: *
2402: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2403: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2404: *
2405: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2406: *
1.68 daniel 2407: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2408: * case publicID receives PubidLiteral, is strict is off
2409: * it is possible to return NULL and have publicID set.
1.22 daniel 2410: */
2411:
1.123 daniel 2412: xmlChar *
2413: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2414: xmlChar *URI = NULL;
1.22 daniel 2415:
1.91 daniel 2416: SHRINK;
1.152 daniel 2417: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 2418: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2419: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2420: SKIP(6);
1.59 daniel 2421: if (!IS_BLANK(CUR)) {
1.230 veillard 2422: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2424: ctxt->sax->error(ctxt->userData,
1.59 daniel 2425: "Space required after 'SYSTEM'\n");
2426: ctxt->wellFormed = 0;
1.180 daniel 2427: ctxt->disableSAX = 1;
1.59 daniel 2428: }
1.42 daniel 2429: SKIP_BLANKS;
1.39 daniel 2430: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2431: if (URI == NULL) {
1.230 veillard 2432: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2434: ctxt->sax->error(ctxt->userData,
1.39 daniel 2435: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2436: ctxt->wellFormed = 0;
1.180 daniel 2437: ctxt->disableSAX = 1;
1.59 daniel 2438: }
1.152 daniel 2439: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 2440: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2441: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2442: SKIP(6);
1.59 daniel 2443: if (!IS_BLANK(CUR)) {
1.230 veillard 2444: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2446: ctxt->sax->error(ctxt->userData,
1.59 daniel 2447: "Space required after 'PUBLIC'\n");
2448: ctxt->wellFormed = 0;
1.180 daniel 2449: ctxt->disableSAX = 1;
1.59 daniel 2450: }
1.42 daniel 2451: SKIP_BLANKS;
1.39 daniel 2452: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2453: if (*publicID == NULL) {
1.230 veillard 2454: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.55 daniel 2455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2456: ctxt->sax->error(ctxt->userData,
1.39 daniel 2457: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2458: ctxt->wellFormed = 0;
1.180 daniel 2459: ctxt->disableSAX = 1;
1.59 daniel 2460: }
1.67 daniel 2461: if (strict) {
2462: /*
2463: * We don't handle [83] so "S SystemLiteral" is required.
2464: */
2465: if (!IS_BLANK(CUR)) {
1.230 veillard 2466: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2468: ctxt->sax->error(ctxt->userData,
1.67 daniel 2469: "Space required after the Public Identifier\n");
2470: ctxt->wellFormed = 0;
1.180 daniel 2471: ctxt->disableSAX = 1;
1.67 daniel 2472: }
2473: } else {
2474: /*
2475: * We handle [83] so we return immediately, if
2476: * "S SystemLiteral" is not detected. From a purely parsing
2477: * point of view that's a nice mess.
2478: */
1.135 daniel 2479: const xmlChar *ptr;
2480: GROW;
2481:
2482: ptr = CUR_PTR;
1.67 daniel 2483: if (!IS_BLANK(*ptr)) return(NULL);
2484:
1.223 veillard 2485: while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
1.173 daniel 2486: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 2487: }
1.42 daniel 2488: SKIP_BLANKS;
1.39 daniel 2489: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2490: if (URI == NULL) {
1.230 veillard 2491: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2493: ctxt->sax->error(ctxt->userData,
1.39 daniel 2494: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2495: ctxt->wellFormed = 0;
1.180 daniel 2496: ctxt->disableSAX = 1;
1.59 daniel 2497: }
1.22 daniel 2498: }
1.39 daniel 2499: return(URI);
1.22 daniel 2500: }
2501:
1.50 daniel 2502: /**
2503: * xmlParseComment:
1.69 daniel 2504: * @ctxt: an XML parser context
1.50 daniel 2505: *
1.3 veillard 2506: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2507: * The spec says that "For compatibility, the string "--" (double-hyphen)
2508: * must not occur within comments. "
1.22 daniel 2509: *
2510: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2511: */
1.72 daniel 2512: void
1.114 daniel 2513: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 2514: xmlChar *buf = NULL;
1.195 daniel 2515: int len;
1.140 daniel 2516: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2517: int q, ql;
2518: int r, rl;
2519: int cur, l;
1.140 daniel 2520: xmlParserInputState state;
1.187 daniel 2521: xmlParserInputPtr input = ctxt->input;
1.223 veillard 2522: int count = 0;
1.3 veillard 2523:
2524: /*
1.22 daniel 2525: * Check that there is a comment right here.
1.3 veillard 2526: */
1.152 daniel 2527: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 2528: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2529:
1.140 daniel 2530: state = ctxt->instate;
1.97 daniel 2531: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2532: SHRINK;
1.40 daniel 2533: SKIP(4);
1.135 daniel 2534: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2535: if (buf == NULL) {
2536: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2537: ctxt->instate = state;
1.135 daniel 2538: return;
2539: }
1.152 daniel 2540: q = CUR_CHAR(ql);
2541: NEXTL(ql);
2542: r = CUR_CHAR(rl);
2543: NEXTL(rl);
2544: cur = CUR_CHAR(l);
1.195 daniel 2545: len = 0;
1.223 veillard 2546: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2547: ((cur != '>') ||
2548: (r != '-') || (q != '-'))) {
1.195 daniel 2549: if ((r == '-') && (q == '-') && (len > 1)) {
1.230 veillard 2550: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.55 daniel 2551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2552: ctxt->sax->error(ctxt->userData,
1.38 daniel 2553: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2554: ctxt->wellFormed = 0;
1.180 daniel 2555: ctxt->disableSAX = 1;
1.59 daniel 2556: }
1.152 daniel 2557: if (len + 5 >= size) {
1.135 daniel 2558: size *= 2;
1.204 veillard 2559: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2560: if (buf == NULL) {
2561: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2562: ctxt->instate = state;
1.135 daniel 2563: return;
2564: }
2565: }
1.152 daniel 2566: COPY_BUF(ql,buf,len,q);
1.135 daniel 2567: q = r;
1.152 daniel 2568: ql = rl;
1.135 daniel 2569: r = cur;
1.152 daniel 2570: rl = l;
1.223 veillard 2571:
2572: count++;
2573: if (count > 50) {
2574: GROW;
2575: count = 0;
2576: }
1.152 daniel 2577: NEXTL(l);
2578: cur = CUR_CHAR(l);
1.135 daniel 2579: if (cur == 0) {
2580: SHRINK;
2581: GROW;
1.152 daniel 2582: cur = CUR_CHAR(l);
1.135 daniel 2583: }
1.3 veillard 2584: }
1.135 daniel 2585: buf[len] = 0;
2586: if (!IS_CHAR(cur)) {
1.230 veillard 2587: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.55 daniel 2588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2589: ctxt->sax->error(ctxt->userData,
1.135 daniel 2590: "Comment not terminated \n<!--%.50s\n", buf);
1.59 daniel 2591: ctxt->wellFormed = 0;
1.180 daniel 2592: ctxt->disableSAX = 1;
1.178 daniel 2593: xmlFree(buf);
1.3 veillard 2594: } else {
1.187 daniel 2595: if (input != ctxt->input) {
1.230 veillard 2596: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2598: ctxt->sax->error(ctxt->userData,
2599: "Comment doesn't start and stop in the same entity\n");
2600: ctxt->wellFormed = 0;
2601: ctxt->disableSAX = 1;
2602: }
1.40 daniel 2603: NEXT;
1.171 daniel 2604: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2605: (!ctxt->disableSAX))
1.135 daniel 2606: ctxt->sax->comment(ctxt->userData, buf);
2607: xmlFree(buf);
1.3 veillard 2608: }
1.140 daniel 2609: ctxt->instate = state;
1.3 veillard 2610: }
2611:
1.50 daniel 2612: /**
2613: * xmlParsePITarget:
2614: * @ctxt: an XML parser context
2615: *
2616: * parse the name of a PI
1.22 daniel 2617: *
2618: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2619: *
2620: * Returns the PITarget name or NULL
1.22 daniel 2621: */
2622:
1.123 daniel 2623: xmlChar *
1.55 daniel 2624: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2625: xmlChar *name;
1.22 daniel 2626:
2627: name = xmlParseName(ctxt);
1.139 daniel 2628: if ((name != NULL) &&
1.22 daniel 2629: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2630: ((name[1] == 'm') || (name[1] == 'M')) &&
2631: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 2632: int i;
1.177 daniel 2633: if ((name[0] == 'x') && (name[1] == 'm') &&
2634: (name[2] == 'l') && (name[3] == 0)) {
1.230 veillard 2635: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2637: ctxt->sax->error(ctxt->userData,
2638: "XML declaration allowed only at the start of the document\n");
2639: ctxt->wellFormed = 0;
1.180 daniel 2640: ctxt->disableSAX = 1;
1.151 daniel 2641: return(name);
2642: } else if (name[3] == 0) {
1.230 veillard 2643: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2645: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2646: ctxt->wellFormed = 0;
1.180 daniel 2647: ctxt->disableSAX = 1;
1.151 daniel 2648: return(name);
2649: }
1.139 daniel 2650: for (i = 0;;i++) {
2651: if (xmlW3CPIs[i] == NULL) break;
2652: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
2653: return(name);
2654: }
2655: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
1.230 veillard 2656: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.139 daniel 2657: ctxt->sax->warning(ctxt->userData,
1.122 daniel 2658: "xmlParsePItarget: invalid name prefix 'xml'\n");
2659: }
1.22 daniel 2660: }
2661: return(name);
2662: }
2663:
1.50 daniel 2664: /**
2665: * xmlParsePI:
2666: * @ctxt: an XML parser context
2667: *
2668: * parse an XML Processing Instruction.
1.22 daniel 2669: *
2670: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2671: *
1.69 daniel 2672: * The processing is transfered to SAX once parsed.
1.3 veillard 2673: */
2674:
1.55 daniel 2675: void
2676: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 2677: xmlChar *buf = NULL;
2678: int len = 0;
1.140 daniel 2679: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2680: int cur, l;
1.123 daniel 2681: xmlChar *target;
1.140 daniel 2682: xmlParserInputState state;
1.223 veillard 2683: int count = 0;
1.22 daniel 2684:
1.152 daniel 2685: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 2686: xmlParserInputPtr input = ctxt->input;
1.140 daniel 2687: state = ctxt->instate;
2688: ctxt->instate = XML_PARSER_PI;
1.3 veillard 2689: /*
2690: * this is a Processing Instruction.
2691: */
1.40 daniel 2692: SKIP(2);
1.91 daniel 2693: SHRINK;
1.3 veillard 2694:
2695: /*
1.22 daniel 2696: * Parse the target name and check for special support like
2697: * namespace.
1.3 veillard 2698: */
1.22 daniel 2699: target = xmlParsePITarget(ctxt);
2700: if (target != NULL) {
1.156 daniel 2701: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 2702: if (input != ctxt->input) {
1.230 veillard 2703: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705: ctxt->sax->error(ctxt->userData,
2706: "PI declaration doesn't start and stop in the same entity\n");
2707: ctxt->wellFormed = 0;
2708: ctxt->disableSAX = 1;
2709: }
1.156 daniel 2710: SKIP(2);
2711:
2712: /*
2713: * SAX: PI detected.
2714: */
1.171 daniel 2715: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 2716: (ctxt->sax->processingInstruction != NULL))
2717: ctxt->sax->processingInstruction(ctxt->userData,
2718: target, NULL);
2719: ctxt->instate = state;
1.170 daniel 2720: xmlFree(target);
1.156 daniel 2721: return;
2722: }
1.135 daniel 2723: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2724: if (buf == NULL) {
2725: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2726: ctxt->instate = state;
1.135 daniel 2727: return;
2728: }
2729: cur = CUR;
2730: if (!IS_BLANK(cur)) {
1.230 veillard 2731: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2733: ctxt->sax->error(ctxt->userData,
2734: "xmlParsePI: PI %s space expected\n", target);
2735: ctxt->wellFormed = 0;
1.180 daniel 2736: ctxt->disableSAX = 1;
1.114 daniel 2737: }
2738: SKIP_BLANKS;
1.152 daniel 2739: cur = CUR_CHAR(l);
1.223 veillard 2740: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2741: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 2742: if (len + 5 >= size) {
1.135 daniel 2743: size *= 2;
1.204 veillard 2744: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2745: if (buf == NULL) {
2746: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2747: ctxt->instate = state;
1.135 daniel 2748: return;
2749: }
1.223 veillard 2750: }
2751: count++;
2752: if (count > 50) {
2753: GROW;
2754: count = 0;
1.135 daniel 2755: }
1.152 daniel 2756: COPY_BUF(l,buf,len,cur);
2757: NEXTL(l);
2758: cur = CUR_CHAR(l);
1.135 daniel 2759: if (cur == 0) {
2760: SHRINK;
2761: GROW;
1.152 daniel 2762: cur = CUR_CHAR(l);
1.135 daniel 2763: }
2764: }
2765: buf[len] = 0;
1.152 daniel 2766: if (cur != '?') {
1.230 veillard 2767: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2768: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2769: ctxt->sax->error(ctxt->userData,
1.72 daniel 2770: "xmlParsePI: PI %s never end ...\n", target);
2771: ctxt->wellFormed = 0;
1.180 daniel 2772: ctxt->disableSAX = 1;
1.22 daniel 2773: } else {
1.187 daniel 2774: if (input != ctxt->input) {
1.230 veillard 2775: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2777: ctxt->sax->error(ctxt->userData,
2778: "PI declaration doesn't start and stop in the same entity\n");
2779: ctxt->wellFormed = 0;
2780: ctxt->disableSAX = 1;
2781: }
1.72 daniel 2782: SKIP(2);
1.44 daniel 2783:
1.72 daniel 2784: /*
2785: * SAX: PI detected.
2786: */
1.171 daniel 2787: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 2788: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2789: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 2790: target, buf);
1.22 daniel 2791: }
1.135 daniel 2792: xmlFree(buf);
1.119 daniel 2793: xmlFree(target);
1.3 veillard 2794: } else {
1.230 veillard 2795: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.55 daniel 2796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2797: ctxt->sax->error(ctxt->userData,
2798: "xmlParsePI : no target name\n");
1.59 daniel 2799: ctxt->wellFormed = 0;
1.180 daniel 2800: ctxt->disableSAX = 1;
1.22 daniel 2801: }
1.140 daniel 2802: ctxt->instate = state;
1.22 daniel 2803: }
2804: }
2805:
1.50 daniel 2806: /**
2807: * xmlParseNotationDecl:
2808: * @ctxt: an XML parser context
2809: *
2810: * parse a notation declaration
1.22 daniel 2811: *
2812: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2813: *
2814: * Hence there is actually 3 choices:
2815: * 'PUBLIC' S PubidLiteral
2816: * 'PUBLIC' S PubidLiteral S SystemLiteral
2817: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2818: *
1.67 daniel 2819: * See the NOTE on xmlParseExternalID().
1.22 daniel 2820: */
2821:
1.55 daniel 2822: void
2823: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2824: xmlChar *name;
2825: xmlChar *Pubid;
2826: xmlChar *Systemid;
1.22 daniel 2827:
1.152 daniel 2828: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2829: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2830: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2831: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2832: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 2833: xmlParserInputPtr input = ctxt->input;
1.91 daniel 2834: SHRINK;
1.40 daniel 2835: SKIP(10);
1.67 daniel 2836: if (!IS_BLANK(CUR)) {
1.230 veillard 2837: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2838: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2839: ctxt->sax->error(ctxt->userData,
2840: "Space required after '<!NOTATION'\n");
1.67 daniel 2841: ctxt->wellFormed = 0;
1.180 daniel 2842: ctxt->disableSAX = 1;
1.67 daniel 2843: return;
2844: }
2845: SKIP_BLANKS;
1.22 daniel 2846:
2847: name = xmlParseName(ctxt);
2848: if (name == NULL) {
1.230 veillard 2849: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.55 daniel 2850: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2851: ctxt->sax->error(ctxt->userData,
2852: "NOTATION: Name expected here\n");
1.67 daniel 2853: ctxt->wellFormed = 0;
1.180 daniel 2854: ctxt->disableSAX = 1;
1.67 daniel 2855: return;
2856: }
2857: if (!IS_BLANK(CUR)) {
1.230 veillard 2858: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2860: ctxt->sax->error(ctxt->userData,
1.67 daniel 2861: "Space required after the NOTATION name'\n");
1.59 daniel 2862: ctxt->wellFormed = 0;
1.180 daniel 2863: ctxt->disableSAX = 1;
1.22 daniel 2864: return;
2865: }
1.42 daniel 2866: SKIP_BLANKS;
1.67 daniel 2867:
1.22 daniel 2868: /*
1.67 daniel 2869: * Parse the IDs.
1.22 daniel 2870: */
1.160 daniel 2871: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 2872: SKIP_BLANKS;
2873:
1.152 daniel 2874: if (RAW == '>') {
1.187 daniel 2875: if (input != ctxt->input) {
1.230 veillard 2876: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2877: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2878: ctxt->sax->error(ctxt->userData,
2879: "Notation declaration doesn't start and stop in the same entity\n");
2880: ctxt->wellFormed = 0;
2881: ctxt->disableSAX = 1;
2882: }
1.40 daniel 2883: NEXT;
1.171 daniel 2884: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2885: (ctxt->sax->notationDecl != NULL))
1.74 daniel 2886: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2887: } else {
1.230 veillard 2888: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 2889: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2890: ctxt->sax->error(ctxt->userData,
1.67 daniel 2891: "'>' required to close NOTATION declaration\n");
2892: ctxt->wellFormed = 0;
1.180 daniel 2893: ctxt->disableSAX = 1;
1.67 daniel 2894: }
1.119 daniel 2895: xmlFree(name);
2896: if (Systemid != NULL) xmlFree(Systemid);
2897: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 2898: }
2899: }
2900:
1.50 daniel 2901: /**
2902: * xmlParseEntityDecl:
2903: * @ctxt: an XML parser context
2904: *
2905: * parse <!ENTITY declarations
1.22 daniel 2906: *
2907: * [70] EntityDecl ::= GEDecl | PEDecl
2908: *
2909: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2910: *
2911: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2912: *
2913: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2914: *
2915: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2916: *
2917: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2918: *
2919: * [ VC: Notation Declared ]
1.116 daniel 2920: * The Name must match the declared name of a notation.
1.22 daniel 2921: */
2922:
1.55 daniel 2923: void
2924: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2925: xmlChar *name = NULL;
2926: xmlChar *value = NULL;
2927: xmlChar *URI = NULL, *literal = NULL;
2928: xmlChar *ndata = NULL;
1.39 daniel 2929: int isParameter = 0;
1.123 daniel 2930: xmlChar *orig = NULL;
1.22 daniel 2931:
1.94 daniel 2932: GROW;
1.152 daniel 2933: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2934: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2935: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2936: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 2937: xmlParserInputPtr input = ctxt->input;
1.96 daniel 2938: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2939: SHRINK;
1.40 daniel 2940: SKIP(8);
1.59 daniel 2941: if (!IS_BLANK(CUR)) {
1.230 veillard 2942: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2944: ctxt->sax->error(ctxt->userData,
2945: "Space required after '<!ENTITY'\n");
1.59 daniel 2946: ctxt->wellFormed = 0;
1.180 daniel 2947: ctxt->disableSAX = 1;
1.59 daniel 2948: }
2949: SKIP_BLANKS;
1.40 daniel 2950:
1.152 daniel 2951: if (RAW == '%') {
1.40 daniel 2952: NEXT;
1.59 daniel 2953: if (!IS_BLANK(CUR)) {
1.230 veillard 2954: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2955: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2956: ctxt->sax->error(ctxt->userData,
2957: "Space required after '%'\n");
1.59 daniel 2958: ctxt->wellFormed = 0;
1.180 daniel 2959: ctxt->disableSAX = 1;
1.59 daniel 2960: }
1.42 daniel 2961: SKIP_BLANKS;
1.39 daniel 2962: isParameter = 1;
1.22 daniel 2963: }
2964:
2965: name = xmlParseName(ctxt);
1.24 daniel 2966: if (name == NULL) {
1.230 veillard 2967: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 2968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2969: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 2970: ctxt->wellFormed = 0;
1.180 daniel 2971: ctxt->disableSAX = 1;
1.24 daniel 2972: return;
2973: }
1.59 daniel 2974: if (!IS_BLANK(CUR)) {
1.230 veillard 2975: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2976: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2977: ctxt->sax->error(ctxt->userData,
1.59 daniel 2978: "Space required after the entity name\n");
2979: ctxt->wellFormed = 0;
1.180 daniel 2980: ctxt->disableSAX = 1;
1.59 daniel 2981: }
1.42 daniel 2982: SKIP_BLANKS;
1.24 daniel 2983:
1.22 daniel 2984: /*
1.68 daniel 2985: * handle the various case of definitions...
1.22 daniel 2986: */
1.39 daniel 2987: if (isParameter) {
1.225 veillard 2988: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 2989: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 2990: if (value) {
1.171 daniel 2991: if ((ctxt->sax != NULL) &&
2992: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2993: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2994: XML_INTERNAL_PARAMETER_ENTITY,
2995: NULL, NULL, value);
2996: }
1.225 veillard 2997: } else {
1.67 daniel 2998: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 2999: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3000: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3001: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3002: ctxt->sax->error(ctxt->userData,
3003: "Entity value required\n");
3004: ctxt->wellFormed = 0;
1.180 daniel 3005: ctxt->disableSAX = 1;
1.169 daniel 3006: }
1.39 daniel 3007: if (URI) {
1.193 daniel 3008: xmlURIPtr uri;
3009:
3010: uri = xmlParseURI((const char *) URI);
3011: if (uri == NULL) {
1.230 veillard 3012: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3013: if ((ctxt->sax != NULL) &&
3014: (!ctxt->disableSAX) &&
3015: (ctxt->sax->error != NULL))
3016: ctxt->sax->error(ctxt->userData,
3017: "Invalid URI: %s\n", URI);
3018: ctxt->wellFormed = 0;
3019: } else {
3020: if (uri->fragment != NULL) {
1.230 veillard 3021: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3022: if ((ctxt->sax != NULL) &&
3023: (!ctxt->disableSAX) &&
3024: (ctxt->sax->error != NULL))
3025: ctxt->sax->error(ctxt->userData,
3026: "Fragment not allowed: %s\n", URI);
3027: ctxt->wellFormed = 0;
3028: } else {
3029: if ((ctxt->sax != NULL) &&
3030: (!ctxt->disableSAX) &&
3031: (ctxt->sax->entityDecl != NULL))
3032: ctxt->sax->entityDecl(ctxt->userData, name,
3033: XML_EXTERNAL_PARAMETER_ENTITY,
3034: literal, URI, NULL);
3035: }
3036: xmlFreeURI(uri);
3037: }
1.39 daniel 3038: }
1.24 daniel 3039: }
3040: } else {
1.152 daniel 3041: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3042: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 3043: if ((ctxt->sax != NULL) &&
3044: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3045: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3046: XML_INTERNAL_GENERAL_ENTITY,
3047: NULL, NULL, value);
3048: } else {
1.67 daniel 3049: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3050: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3051: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3052: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3053: ctxt->sax->error(ctxt->userData,
3054: "Entity value required\n");
3055: ctxt->wellFormed = 0;
1.180 daniel 3056: ctxt->disableSAX = 1;
1.169 daniel 3057: }
1.193 daniel 3058: if (URI) {
3059: xmlURIPtr uri;
3060:
3061: uri = xmlParseURI((const char *)URI);
3062: if (uri == NULL) {
1.230 veillard 3063: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3064: if ((ctxt->sax != NULL) &&
3065: (!ctxt->disableSAX) &&
3066: (ctxt->sax->error != NULL))
3067: ctxt->sax->error(ctxt->userData,
3068: "Invalid URI: %s\n", URI);
3069: ctxt->wellFormed = 0;
3070: } else {
3071: if (uri->fragment != NULL) {
1.230 veillard 3072: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3073: if ((ctxt->sax != NULL) &&
3074: (!ctxt->disableSAX) &&
3075: (ctxt->sax->error != NULL))
3076: ctxt->sax->error(ctxt->userData,
3077: "Fragment not allowed: %s\n", URI);
3078: ctxt->wellFormed = 0;
3079: }
3080: xmlFreeURI(uri);
3081: }
3082: }
1.152 daniel 3083: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.230 veillard 3084: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3085: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3086: ctxt->sax->error(ctxt->userData,
1.59 daniel 3087: "Space required before 'NDATA'\n");
3088: ctxt->wellFormed = 0;
1.180 daniel 3089: ctxt->disableSAX = 1;
1.59 daniel 3090: }
1.42 daniel 3091: SKIP_BLANKS;
1.152 daniel 3092: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 3093: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3094: (NXT(4) == 'A')) {
3095: SKIP(5);
1.59 daniel 3096: if (!IS_BLANK(CUR)) {
1.230 veillard 3097: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3099: ctxt->sax->error(ctxt->userData,
1.59 daniel 3100: "Space required after 'NDATA'\n");
3101: ctxt->wellFormed = 0;
1.180 daniel 3102: ctxt->disableSAX = 1;
1.59 daniel 3103: }
1.42 daniel 3104: SKIP_BLANKS;
1.24 daniel 3105: ndata = xmlParseName(ctxt);
1.171 daniel 3106: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 3107: (ctxt->sax->unparsedEntityDecl != NULL))
3108: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3109: literal, URI, ndata);
3110: } else {
1.171 daniel 3111: if ((ctxt->sax != NULL) &&
3112: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3113: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3114: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3115: literal, URI, NULL);
1.24 daniel 3116: }
3117: }
3118: }
1.42 daniel 3119: SKIP_BLANKS;
1.152 daniel 3120: if (RAW != '>') {
1.230 veillard 3121: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3122: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3123: ctxt->sax->error(ctxt->userData,
1.31 daniel 3124: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3125: ctxt->wellFormed = 0;
1.180 daniel 3126: ctxt->disableSAX = 1;
1.187 daniel 3127: } else {
3128: if (input != ctxt->input) {
1.230 veillard 3129: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3130: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3131: ctxt->sax->error(ctxt->userData,
3132: "Entity declaration doesn't start and stop in the same entity\n");
3133: ctxt->wellFormed = 0;
3134: ctxt->disableSAX = 1;
3135: }
1.40 daniel 3136: NEXT;
1.187 daniel 3137: }
1.78 daniel 3138: if (orig != NULL) {
3139: /*
1.98 daniel 3140: * Ugly mechanism to save the raw entity value.
1.78 daniel 3141: */
3142: xmlEntityPtr cur = NULL;
3143:
1.98 daniel 3144: if (isParameter) {
3145: if ((ctxt->sax != NULL) &&
3146: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3147: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3148: } else {
3149: if ((ctxt->sax != NULL) &&
3150: (ctxt->sax->getEntity != NULL))
1.120 daniel 3151: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3152: }
3153: if (cur != NULL) {
3154: if (cur->orig != NULL)
1.119 daniel 3155: xmlFree(orig);
1.98 daniel 3156: else
3157: cur->orig = orig;
3158: } else
1.119 daniel 3159: xmlFree(orig);
1.78 daniel 3160: }
1.119 daniel 3161: if (name != NULL) xmlFree(name);
3162: if (value != NULL) xmlFree(value);
3163: if (URI != NULL) xmlFree(URI);
3164: if (literal != NULL) xmlFree(literal);
3165: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3166: }
3167: }
3168:
1.50 daniel 3169: /**
1.59 daniel 3170: * xmlParseDefaultDecl:
3171: * @ctxt: an XML parser context
3172: * @value: Receive a possible fixed default value for the attribute
3173: *
3174: * Parse an attribute default declaration
3175: *
3176: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3177: *
1.99 daniel 3178: * [ VC: Required Attribute ]
1.117 daniel 3179: * if the default declaration is the keyword #REQUIRED, then the
3180: * attribute must be specified for all elements of the type in the
3181: * attribute-list declaration.
1.99 daniel 3182: *
3183: * [ VC: Attribute Default Legal ]
1.102 daniel 3184: * The declared default value must meet the lexical constraints of
3185: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3186: *
3187: * [ VC: Fixed Attribute Default ]
1.117 daniel 3188: * if an attribute has a default value declared with the #FIXED
3189: * keyword, instances of that attribute must match the default value.
1.99 daniel 3190: *
3191: * [ WFC: No < in Attribute Values ]
3192: * handled in xmlParseAttValue()
3193: *
1.59 daniel 3194: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3195: * or XML_ATTRIBUTE_FIXED.
3196: */
3197:
3198: int
1.123 daniel 3199: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3200: int val;
1.123 daniel 3201: xmlChar *ret;
1.59 daniel 3202:
3203: *value = NULL;
1.152 daniel 3204: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 3205: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3206: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3207: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3208: (NXT(8) == 'D')) {
3209: SKIP(9);
3210: return(XML_ATTRIBUTE_REQUIRED);
3211: }
1.152 daniel 3212: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 3213: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3214: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3215: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3216: SKIP(8);
3217: return(XML_ATTRIBUTE_IMPLIED);
3218: }
3219: val = XML_ATTRIBUTE_NONE;
1.152 daniel 3220: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 3221: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3222: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3223: SKIP(6);
3224: val = XML_ATTRIBUTE_FIXED;
3225: if (!IS_BLANK(CUR)) {
1.230 veillard 3226: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3227: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3228: ctxt->sax->error(ctxt->userData,
3229: "Space required after '#FIXED'\n");
1.59 daniel 3230: ctxt->wellFormed = 0;
1.180 daniel 3231: ctxt->disableSAX = 1;
1.59 daniel 3232: }
3233: SKIP_BLANKS;
3234: }
3235: ret = xmlParseAttValue(ctxt);
1.96 daniel 3236: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3237: if (ret == NULL) {
3238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3239: ctxt->sax->error(ctxt->userData,
1.59 daniel 3240: "Attribute default value declaration error\n");
3241: ctxt->wellFormed = 0;
1.180 daniel 3242: ctxt->disableSAX = 1;
1.59 daniel 3243: } else
3244: *value = ret;
3245: return(val);
3246: }
3247:
3248: /**
1.66 daniel 3249: * xmlParseNotationType:
3250: * @ctxt: an XML parser context
3251: *
3252: * parse an Notation attribute type.
3253: *
1.99 daniel 3254: * Note: the leading 'NOTATION' S part has already being parsed...
3255: *
1.66 daniel 3256: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3257: *
1.99 daniel 3258: * [ VC: Notation Attributes ]
1.117 daniel 3259: * Values of this type must match one of the notation names included
1.99 daniel 3260: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3261: *
3262: * Returns: the notation attribute tree built while parsing
3263: */
3264:
3265: xmlEnumerationPtr
3266: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3267: xmlChar *name;
1.66 daniel 3268: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3269:
1.152 daniel 3270: if (RAW != '(') {
1.230 veillard 3271: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3273: ctxt->sax->error(ctxt->userData,
3274: "'(' required to start 'NOTATION'\n");
1.66 daniel 3275: ctxt->wellFormed = 0;
1.180 daniel 3276: ctxt->disableSAX = 1;
1.66 daniel 3277: return(NULL);
3278: }
1.91 daniel 3279: SHRINK;
1.66 daniel 3280: do {
3281: NEXT;
3282: SKIP_BLANKS;
3283: name = xmlParseName(ctxt);
3284: if (name == NULL) {
1.230 veillard 3285: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3287: ctxt->sax->error(ctxt->userData,
1.66 daniel 3288: "Name expected in NOTATION declaration\n");
3289: ctxt->wellFormed = 0;
1.180 daniel 3290: ctxt->disableSAX = 1;
1.66 daniel 3291: return(ret);
3292: }
3293: cur = xmlCreateEnumeration(name);
1.119 daniel 3294: xmlFree(name);
1.66 daniel 3295: if (cur == NULL) return(ret);
3296: if (last == NULL) ret = last = cur;
3297: else {
3298: last->next = cur;
3299: last = cur;
3300: }
3301: SKIP_BLANKS;
1.152 daniel 3302: } while (RAW == '|');
3303: if (RAW != ')') {
1.230 veillard 3304: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3305: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3306: ctxt->sax->error(ctxt->userData,
1.66 daniel 3307: "')' required to finish NOTATION declaration\n");
3308: ctxt->wellFormed = 0;
1.180 daniel 3309: ctxt->disableSAX = 1;
1.170 daniel 3310: if ((last != NULL) && (last != ret))
3311: xmlFreeEnumeration(last);
1.66 daniel 3312: return(ret);
3313: }
3314: NEXT;
3315: return(ret);
3316: }
3317:
3318: /**
3319: * xmlParseEnumerationType:
3320: * @ctxt: an XML parser context
3321: *
3322: * parse an Enumeration attribute type.
3323: *
3324: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3325: *
1.99 daniel 3326: * [ VC: Enumeration ]
1.117 daniel 3327: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3328: * the declaration
3329: *
1.66 daniel 3330: * Returns: the enumeration attribute tree built while parsing
3331: */
3332:
3333: xmlEnumerationPtr
3334: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3335: xmlChar *name;
1.66 daniel 3336: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3337:
1.152 daniel 3338: if (RAW != '(') {
1.230 veillard 3339: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3341: ctxt->sax->error(ctxt->userData,
1.66 daniel 3342: "'(' required to start ATTLIST enumeration\n");
3343: ctxt->wellFormed = 0;
1.180 daniel 3344: ctxt->disableSAX = 1;
1.66 daniel 3345: return(NULL);
3346: }
1.91 daniel 3347: SHRINK;
1.66 daniel 3348: do {
3349: NEXT;
3350: SKIP_BLANKS;
3351: name = xmlParseNmtoken(ctxt);
3352: if (name == NULL) {
1.230 veillard 3353: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3354: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3355: ctxt->sax->error(ctxt->userData,
1.66 daniel 3356: "NmToken expected in ATTLIST enumeration\n");
3357: ctxt->wellFormed = 0;
1.180 daniel 3358: ctxt->disableSAX = 1;
1.66 daniel 3359: return(ret);
3360: }
3361: cur = xmlCreateEnumeration(name);
1.119 daniel 3362: xmlFree(name);
1.66 daniel 3363: if (cur == NULL) return(ret);
3364: if (last == NULL) ret = last = cur;
3365: else {
3366: last->next = cur;
3367: last = cur;
3368: }
3369: SKIP_BLANKS;
1.152 daniel 3370: } while (RAW == '|');
3371: if (RAW != ')') {
1.230 veillard 3372: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3373: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3374: ctxt->sax->error(ctxt->userData,
1.66 daniel 3375: "')' required to finish ATTLIST enumeration\n");
3376: ctxt->wellFormed = 0;
1.180 daniel 3377: ctxt->disableSAX = 1;
1.66 daniel 3378: return(ret);
3379: }
3380: NEXT;
3381: return(ret);
3382: }
3383:
3384: /**
1.50 daniel 3385: * xmlParseEnumeratedType:
3386: * @ctxt: an XML parser context
1.66 daniel 3387: * @tree: the enumeration tree built while parsing
1.50 daniel 3388: *
1.66 daniel 3389: * parse an Enumerated attribute type.
1.22 daniel 3390: *
3391: * [57] EnumeratedType ::= NotationType | Enumeration
3392: *
3393: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3394: *
1.50 daniel 3395: *
1.66 daniel 3396: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3397: */
3398:
1.66 daniel 3399: int
3400: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 3401: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 3402: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3403: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3404: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3405: SKIP(8);
3406: if (!IS_BLANK(CUR)) {
1.230 veillard 3407: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3409: ctxt->sax->error(ctxt->userData,
3410: "Space required after 'NOTATION'\n");
1.66 daniel 3411: ctxt->wellFormed = 0;
1.180 daniel 3412: ctxt->disableSAX = 1;
1.66 daniel 3413: return(0);
3414: }
3415: SKIP_BLANKS;
3416: *tree = xmlParseNotationType(ctxt);
3417: if (*tree == NULL) return(0);
3418: return(XML_ATTRIBUTE_NOTATION);
3419: }
3420: *tree = xmlParseEnumerationType(ctxt);
3421: if (*tree == NULL) return(0);
3422: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3423: }
3424:
1.50 daniel 3425: /**
3426: * xmlParseAttributeType:
3427: * @ctxt: an XML parser context
1.66 daniel 3428: * @tree: the enumeration tree built while parsing
1.50 daniel 3429: *
1.59 daniel 3430: * parse the Attribute list def for an element
1.22 daniel 3431: *
3432: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3433: *
3434: * [55] StringType ::= 'CDATA'
3435: *
3436: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3437: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3438: *
1.102 daniel 3439: * Validity constraints for attribute values syntax are checked in
3440: * xmlValidateAttributeValue()
3441: *
1.99 daniel 3442: * [ VC: ID ]
1.117 daniel 3443: * Values of type ID must match the Name production. A name must not
1.99 daniel 3444: * appear more than once in an XML document as a value of this type;
3445: * i.e., ID values must uniquely identify the elements which bear them.
3446: *
3447: * [ VC: One ID per Element Type ]
1.117 daniel 3448: * No element type may have more than one ID attribute specified.
1.99 daniel 3449: *
3450: * [ VC: ID Attribute Default ]
1.117 daniel 3451: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3452: *
3453: * [ VC: IDREF ]
1.102 daniel 3454: * Values of type IDREF must match the Name production, and values
1.140 daniel 3455: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 3456: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3457: * values must match the value of some ID attribute.
3458: *
3459: * [ VC: Entity Name ]
1.102 daniel 3460: * Values of type ENTITY must match the Name production, values
1.140 daniel 3461: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 3462: * name of an unparsed entity declared in the DTD.
1.99 daniel 3463: *
3464: * [ VC: Name Token ]
1.102 daniel 3465: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3466: * of type NMTOKENS must match Nmtokens.
3467: *
1.69 daniel 3468: * Returns the attribute type
1.22 daniel 3469: */
1.59 daniel 3470: int
1.66 daniel 3471: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3472: SHRINK;
1.152 daniel 3473: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 3474: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3475: (NXT(4) == 'A')) {
3476: SKIP(5);
1.66 daniel 3477: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 3478: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 3479: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3480: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3481: SKIP(6);
3482: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 3483: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 3484: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3485: (NXT(4) == 'F')) {
3486: SKIP(5);
1.59 daniel 3487: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 3488: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 3489: SKIP(2);
3490: return(XML_ATTRIBUTE_ID);
1.152 daniel 3491: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3492: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3493: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3494: SKIP(6);
1.59 daniel 3495: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 3496: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3497: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3498: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3499: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3500: SKIP(8);
1.59 daniel 3501: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 3502: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 3503: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3504: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3505: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3506: SKIP(8);
3507: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 3508: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 3509: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3510: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3511: (NXT(6) == 'N')) {
3512: SKIP(7);
1.59 daniel 3513: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3514: }
1.66 daniel 3515: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3516: }
3517:
1.50 daniel 3518: /**
3519: * xmlParseAttributeListDecl:
3520: * @ctxt: an XML parser context
3521: *
3522: * : parse the Attribute list def for an element
1.22 daniel 3523: *
3524: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3525: *
3526: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3527: *
1.22 daniel 3528: */
1.55 daniel 3529: void
3530: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3531: xmlChar *elemName;
3532: xmlChar *attrName;
1.103 daniel 3533: xmlEnumerationPtr tree;
1.22 daniel 3534:
1.152 daniel 3535: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3536: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3537: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3538: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3539: (NXT(8) == 'T')) {
1.187 daniel 3540: xmlParserInputPtr input = ctxt->input;
3541:
1.40 daniel 3542: SKIP(9);
1.59 daniel 3543: if (!IS_BLANK(CUR)) {
1.230 veillard 3544: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3546: ctxt->sax->error(ctxt->userData,
3547: "Space required after '<!ATTLIST'\n");
1.59 daniel 3548: ctxt->wellFormed = 0;
1.180 daniel 3549: ctxt->disableSAX = 1;
1.59 daniel 3550: }
1.42 daniel 3551: SKIP_BLANKS;
1.59 daniel 3552: elemName = xmlParseName(ctxt);
3553: if (elemName == NULL) {
1.230 veillard 3554: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3556: ctxt->sax->error(ctxt->userData,
3557: "ATTLIST: no name for Element\n");
1.59 daniel 3558: ctxt->wellFormed = 0;
1.180 daniel 3559: ctxt->disableSAX = 1;
1.22 daniel 3560: return;
3561: }
1.42 daniel 3562: SKIP_BLANKS;
1.220 veillard 3563: GROW;
1.152 daniel 3564: while (RAW != '>') {
1.123 daniel 3565: const xmlChar *check = CUR_PTR;
1.59 daniel 3566: int type;
3567: int def;
1.123 daniel 3568: xmlChar *defaultValue = NULL;
1.59 daniel 3569:
1.220 veillard 3570: GROW;
1.103 daniel 3571: tree = NULL;
1.59 daniel 3572: attrName = xmlParseName(ctxt);
3573: if (attrName == NULL) {
1.230 veillard 3574: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3576: ctxt->sax->error(ctxt->userData,
3577: "ATTLIST: no name for Attribute\n");
1.59 daniel 3578: ctxt->wellFormed = 0;
1.180 daniel 3579: ctxt->disableSAX = 1;
1.59 daniel 3580: break;
3581: }
1.97 daniel 3582: GROW;
1.59 daniel 3583: if (!IS_BLANK(CUR)) {
1.230 veillard 3584: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3586: ctxt->sax->error(ctxt->userData,
1.59 daniel 3587: "Space required after the attribute name\n");
3588: ctxt->wellFormed = 0;
1.180 daniel 3589: ctxt->disableSAX = 1;
1.170 daniel 3590: if (attrName != NULL)
3591: xmlFree(attrName);
3592: if (defaultValue != NULL)
3593: xmlFree(defaultValue);
1.59 daniel 3594: break;
3595: }
3596: SKIP_BLANKS;
3597:
1.66 daniel 3598: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 3599: if (type <= 0) {
3600: if (attrName != NULL)
3601: xmlFree(attrName);
3602: if (defaultValue != NULL)
3603: xmlFree(defaultValue);
3604: break;
3605: }
1.22 daniel 3606:
1.97 daniel 3607: GROW;
1.59 daniel 3608: if (!IS_BLANK(CUR)) {
1.230 veillard 3609: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3611: ctxt->sax->error(ctxt->userData,
1.59 daniel 3612: "Space required after the attribute type\n");
3613: ctxt->wellFormed = 0;
1.180 daniel 3614: ctxt->disableSAX = 1;
1.170 daniel 3615: if (attrName != NULL)
3616: xmlFree(attrName);
3617: if (defaultValue != NULL)
3618: xmlFree(defaultValue);
3619: if (tree != NULL)
3620: xmlFreeEnumeration(tree);
1.59 daniel 3621: break;
3622: }
1.42 daniel 3623: SKIP_BLANKS;
1.59 daniel 3624:
3625: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 3626: if (def <= 0) {
3627: if (attrName != NULL)
3628: xmlFree(attrName);
3629: if (defaultValue != NULL)
3630: xmlFree(defaultValue);
3631: if (tree != NULL)
3632: xmlFreeEnumeration(tree);
3633: break;
3634: }
1.59 daniel 3635:
1.97 daniel 3636: GROW;
1.152 daniel 3637: if (RAW != '>') {
1.59 daniel 3638: if (!IS_BLANK(CUR)) {
1.230 veillard 3639: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3641: ctxt->sax->error(ctxt->userData,
1.59 daniel 3642: "Space required after the attribute default value\n");
3643: ctxt->wellFormed = 0;
1.180 daniel 3644: ctxt->disableSAX = 1;
1.170 daniel 3645: if (attrName != NULL)
3646: xmlFree(attrName);
3647: if (defaultValue != NULL)
3648: xmlFree(defaultValue);
3649: if (tree != NULL)
3650: xmlFreeEnumeration(tree);
1.59 daniel 3651: break;
3652: }
3653: SKIP_BLANKS;
3654: }
1.40 daniel 3655: if (check == CUR_PTR) {
1.230 veillard 3656: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3658: ctxt->sax->error(ctxt->userData,
1.59 daniel 3659: "xmlParseAttributeListDecl: detected internal error\n");
1.170 daniel 3660: if (attrName != NULL)
3661: xmlFree(attrName);
3662: if (defaultValue != NULL)
3663: xmlFree(defaultValue);
3664: if (tree != NULL)
3665: xmlFreeEnumeration(tree);
1.22 daniel 3666: break;
3667: }
1.171 daniel 3668: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3669: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3670: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3671: type, def, defaultValue, tree);
1.59 daniel 3672: if (attrName != NULL)
1.119 daniel 3673: xmlFree(attrName);
1.59 daniel 3674: if (defaultValue != NULL)
1.119 daniel 3675: xmlFree(defaultValue);
1.97 daniel 3676: GROW;
1.22 daniel 3677: }
1.187 daniel 3678: if (RAW == '>') {
3679: if (input != ctxt->input) {
1.230 veillard 3680: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3681: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3682: ctxt->sax->error(ctxt->userData,
3683: "Attribute list declaration doesn't start and stop in the same entity\n");
3684: ctxt->wellFormed = 0;
3685: ctxt->disableSAX = 1;
3686: }
1.40 daniel 3687: NEXT;
1.187 daniel 3688: }
1.22 daniel 3689:
1.119 daniel 3690: xmlFree(elemName);
1.22 daniel 3691: }
3692: }
3693:
1.50 daniel 3694: /**
1.61 daniel 3695: * xmlParseElementMixedContentDecl:
3696: * @ctxt: an XML parser context
3697: *
3698: * parse the declaration for a Mixed Element content
3699: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3700: *
3701: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3702: * '(' S? '#PCDATA' S? ')'
3703: *
1.99 daniel 3704: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3705: *
3706: * [ VC: No Duplicate Types ]
1.117 daniel 3707: * The same name must not appear more than once in a single
3708: * mixed-content declaration.
1.99 daniel 3709: *
1.61 daniel 3710: * returns: the list of the xmlElementContentPtr describing the element choices
3711: */
3712: xmlElementContentPtr
1.62 daniel 3713: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3714: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3715: xmlChar *elem = NULL;
1.61 daniel 3716:
1.97 daniel 3717: GROW;
1.152 daniel 3718: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 3719: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3720: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3721: (NXT(6) == 'A')) {
3722: SKIP(7);
3723: SKIP_BLANKS;
1.91 daniel 3724: SHRINK;
1.152 daniel 3725: if (RAW == ')') {
1.187 daniel 3726: ctxt->entity = ctxt->input;
1.63 daniel 3727: NEXT;
3728: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 3729: if (RAW == '*') {
1.136 daniel 3730: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3731: NEXT;
3732: }
1.63 daniel 3733: return(ret);
3734: }
1.152 daniel 3735: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 3736: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3737: if (ret == NULL) return(NULL);
1.99 daniel 3738: }
1.152 daniel 3739: while (RAW == '|') {
1.64 daniel 3740: NEXT;
1.61 daniel 3741: if (elem == NULL) {
3742: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3743: if (ret == NULL) return(NULL);
3744: ret->c1 = cur;
1.64 daniel 3745: cur = ret;
1.61 daniel 3746: } else {
1.64 daniel 3747: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3748: if (n == NULL) return(NULL);
3749: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3750: cur->c2 = n;
3751: cur = n;
1.119 daniel 3752: xmlFree(elem);
1.61 daniel 3753: }
3754: SKIP_BLANKS;
3755: elem = xmlParseName(ctxt);
3756: if (elem == NULL) {
1.230 veillard 3757: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3758: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3759: ctxt->sax->error(ctxt->userData,
1.61 daniel 3760: "xmlParseElementMixedContentDecl : Name expected\n");
3761: ctxt->wellFormed = 0;
1.180 daniel 3762: ctxt->disableSAX = 1;
1.61 daniel 3763: xmlFreeElementContent(cur);
3764: return(NULL);
3765: }
3766: SKIP_BLANKS;
1.97 daniel 3767: GROW;
1.61 daniel 3768: }
1.152 daniel 3769: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 3770: if (elem != NULL) {
1.61 daniel 3771: cur->c2 = xmlNewElementContent(elem,
3772: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3773: xmlFree(elem);
1.66 daniel 3774: }
1.65 daniel 3775: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 3776: ctxt->entity = ctxt->input;
1.64 daniel 3777: SKIP(2);
1.61 daniel 3778: } else {
1.119 daniel 3779: if (elem != NULL) xmlFree(elem);
1.230 veillard 3780: xmlFreeElementContent(ret);
3781: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3783: ctxt->sax->error(ctxt->userData,
1.63 daniel 3784: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3785: ctxt->wellFormed = 0;
1.180 daniel 3786: ctxt->disableSAX = 1;
1.61 daniel 3787: return(NULL);
3788: }
3789:
3790: } else {
1.230 veillard 3791: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3793: ctxt->sax->error(ctxt->userData,
1.61 daniel 3794: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3795: ctxt->wellFormed = 0;
1.180 daniel 3796: ctxt->disableSAX = 1;
1.61 daniel 3797: }
3798: return(ret);
3799: }
3800:
3801: /**
3802: * xmlParseElementChildrenContentDecl:
1.50 daniel 3803: * @ctxt: an XML parser context
3804: *
1.61 daniel 3805: * parse the declaration for a Mixed Element content
3806: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3807: *
1.61 daniel 3808: *
1.22 daniel 3809: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3810: *
3811: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3812: *
3813: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3814: *
3815: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3816: *
1.99 daniel 3817: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3818: * TODO Parameter-entity replacement text must be properly nested
3819: * with parenthetized groups. That is to say, if either of the
3820: * opening or closing parentheses in a choice, seq, or Mixed
3821: * construct is contained in the replacement text for a parameter
3822: * entity, both must be contained in the same replacement text. For
3823: * interoperability, if a parameter-entity reference appears in a
3824: * choice, seq, or Mixed construct, its replacement text should not
3825: * be empty, and neither the first nor last non-blank character of
3826: * the replacement text should be a connector (| or ,).
3827: *
1.62 daniel 3828: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3829: * hierarchy.
3830: */
3831: xmlElementContentPtr
1.62 daniel 3832: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3833: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 3834: xmlChar *elem;
3835: xmlChar type = 0;
1.62 daniel 3836:
3837: SKIP_BLANKS;
1.94 daniel 3838: GROW;
1.152 daniel 3839: if (RAW == '(') {
1.63 daniel 3840: /* Recurse on first child */
1.62 daniel 3841: NEXT;
3842: SKIP_BLANKS;
3843: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3844: SKIP_BLANKS;
1.101 daniel 3845: GROW;
1.62 daniel 3846: } else {
3847: elem = xmlParseName(ctxt);
3848: if (elem == NULL) {
1.230 veillard 3849: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3850: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3851: ctxt->sax->error(ctxt->userData,
1.62 daniel 3852: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3853: ctxt->wellFormed = 0;
1.180 daniel 3854: ctxt->disableSAX = 1;
1.62 daniel 3855: return(NULL);
3856: }
3857: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3858: GROW;
1.152 daniel 3859: if (RAW == '?') {
1.104 daniel 3860: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3861: NEXT;
1.152 daniel 3862: } else if (RAW == '*') {
1.104 daniel 3863: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3864: NEXT;
1.152 daniel 3865: } else if (RAW == '+') {
1.104 daniel 3866: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3867: NEXT;
3868: } else {
1.104 daniel 3869: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3870: }
1.119 daniel 3871: xmlFree(elem);
1.101 daniel 3872: GROW;
1.62 daniel 3873: }
3874: SKIP_BLANKS;
1.91 daniel 3875: SHRINK;
1.152 daniel 3876: while (RAW != ')') {
1.63 daniel 3877: /*
3878: * Each loop we parse one separator and one element.
3879: */
1.152 daniel 3880: if (RAW == ',') {
1.62 daniel 3881: if (type == 0) type = CUR;
3882:
3883: /*
3884: * Detect "Name | Name , Name" error
3885: */
3886: else if (type != CUR) {
1.230 veillard 3887: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3888: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3889: ctxt->sax->error(ctxt->userData,
1.62 daniel 3890: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3891: type);
3892: ctxt->wellFormed = 0;
1.180 daniel 3893: ctxt->disableSAX = 1;
1.170 daniel 3894: if ((op != NULL) && (op != ret))
3895: xmlFreeElementContent(op);
1.211 veillard 3896: if ((last != NULL) && (last != ret) &&
3897: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3898: xmlFreeElementContent(last);
3899: if (ret != NULL)
3900: xmlFreeElementContent(ret);
1.62 daniel 3901: return(NULL);
3902: }
1.64 daniel 3903: NEXT;
1.62 daniel 3904:
1.63 daniel 3905: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3906: if (op == NULL) {
3907: xmlFreeElementContent(ret);
3908: return(NULL);
3909: }
3910: if (last == NULL) {
3911: op->c1 = ret;
1.65 daniel 3912: ret = cur = op;
1.63 daniel 3913: } else {
3914: cur->c2 = op;
3915: op->c1 = last;
3916: cur =op;
1.65 daniel 3917: last = NULL;
1.63 daniel 3918: }
1.152 daniel 3919: } else if (RAW == '|') {
1.62 daniel 3920: if (type == 0) type = CUR;
3921:
3922: /*
1.63 daniel 3923: * Detect "Name , Name | Name" error
1.62 daniel 3924: */
3925: else if (type != CUR) {
1.230 veillard 3926: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3928: ctxt->sax->error(ctxt->userData,
1.62 daniel 3929: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3930: type);
3931: ctxt->wellFormed = 0;
1.180 daniel 3932: ctxt->disableSAX = 1;
1.211 veillard 3933: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 3934: xmlFreeElementContent(op);
1.211 veillard 3935: if ((last != NULL) && (last != ret) &&
3936: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3937: xmlFreeElementContent(last);
3938: if (ret != NULL)
3939: xmlFreeElementContent(ret);
1.62 daniel 3940: return(NULL);
3941: }
1.64 daniel 3942: NEXT;
1.62 daniel 3943:
1.63 daniel 3944: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3945: if (op == NULL) {
1.170 daniel 3946: if ((op != NULL) && (op != ret))
3947: xmlFreeElementContent(op);
1.211 veillard 3948: if ((last != NULL) && (last != ret) &&
3949: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3950: xmlFreeElementContent(last);
3951: if (ret != NULL)
3952: xmlFreeElementContent(ret);
1.63 daniel 3953: return(NULL);
3954: }
3955: if (last == NULL) {
3956: op->c1 = ret;
1.65 daniel 3957: ret = cur = op;
1.63 daniel 3958: } else {
3959: cur->c2 = op;
3960: op->c1 = last;
3961: cur =op;
1.65 daniel 3962: last = NULL;
1.63 daniel 3963: }
1.62 daniel 3964: } else {
1.230 veillard 3965: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 3966: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3967: ctxt->sax->error(ctxt->userData,
1.62 daniel 3968: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3969: ctxt->wellFormed = 0;
1.180 daniel 3970: ctxt->disableSAX = 1;
1.170 daniel 3971: if ((op != NULL) && (op != ret))
3972: xmlFreeElementContent(op);
1.211 veillard 3973: if ((last != NULL) && (last != ret) &&
3974: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3975: xmlFreeElementContent(last);
3976: if (ret != NULL)
3977: xmlFreeElementContent(ret);
1.62 daniel 3978: return(NULL);
3979: }
1.101 daniel 3980: GROW;
1.62 daniel 3981: SKIP_BLANKS;
1.101 daniel 3982: GROW;
1.152 daniel 3983: if (RAW == '(') {
1.63 daniel 3984: /* Recurse on second child */
1.62 daniel 3985: NEXT;
3986: SKIP_BLANKS;
1.65 daniel 3987: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3988: SKIP_BLANKS;
3989: } else {
3990: elem = xmlParseName(ctxt);
3991: if (elem == NULL) {
1.230 veillard 3992: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3994: ctxt->sax->error(ctxt->userData,
1.122 daniel 3995: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.62 daniel 3996: ctxt->wellFormed = 0;
1.180 daniel 3997: ctxt->disableSAX = 1;
1.170 daniel 3998: if ((op != NULL) && (op != ret))
3999: xmlFreeElementContent(op);
1.211 veillard 4000: if ((last != NULL) && (last != ret) &&
4001: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4002: xmlFreeElementContent(last);
4003: if (ret != NULL)
4004: xmlFreeElementContent(ret);
1.62 daniel 4005: return(NULL);
4006: }
1.65 daniel 4007: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4008: xmlFree(elem);
1.152 daniel 4009: if (RAW == '?') {
1.105 daniel 4010: last->ocur = XML_ELEMENT_CONTENT_OPT;
4011: NEXT;
1.152 daniel 4012: } else if (RAW == '*') {
1.105 daniel 4013: last->ocur = XML_ELEMENT_CONTENT_MULT;
4014: NEXT;
1.152 daniel 4015: } else if (RAW == '+') {
1.105 daniel 4016: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4017: NEXT;
4018: } else {
4019: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4020: }
1.63 daniel 4021: }
4022: SKIP_BLANKS;
1.97 daniel 4023: GROW;
1.64 daniel 4024: }
1.65 daniel 4025: if ((cur != NULL) && (last != NULL)) {
4026: cur->c2 = last;
1.62 daniel 4027: }
1.187 daniel 4028: ctxt->entity = ctxt->input;
1.62 daniel 4029: NEXT;
1.152 daniel 4030: if (RAW == '?') {
1.62 daniel 4031: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4032: NEXT;
1.152 daniel 4033: } else if (RAW == '*') {
1.62 daniel 4034: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4035: NEXT;
1.152 daniel 4036: } else if (RAW == '+') {
1.62 daniel 4037: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4038: NEXT;
4039: }
4040: return(ret);
1.61 daniel 4041: }
4042:
4043: /**
4044: * xmlParseElementContentDecl:
4045: * @ctxt: an XML parser context
4046: * @name: the name of the element being defined.
4047: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4048: *
1.61 daniel 4049: * parse the declaration for an Element content either Mixed or Children,
4050: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4051: *
4052: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4053: *
1.61 daniel 4054: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4055: */
4056:
1.61 daniel 4057: int
1.123 daniel 4058: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4059: xmlElementContentPtr *result) {
4060:
4061: xmlElementContentPtr tree = NULL;
1.187 daniel 4062: xmlParserInputPtr input = ctxt->input;
1.61 daniel 4063: int res;
4064:
4065: *result = NULL;
4066:
1.152 daniel 4067: if (RAW != '(') {
1.230 veillard 4068: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4069: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4070: ctxt->sax->error(ctxt->userData,
1.61 daniel 4071: "xmlParseElementContentDecl : '(' expected\n");
4072: ctxt->wellFormed = 0;
1.180 daniel 4073: ctxt->disableSAX = 1;
1.61 daniel 4074: return(-1);
4075: }
4076: NEXT;
1.97 daniel 4077: GROW;
1.61 daniel 4078: SKIP_BLANKS;
1.152 daniel 4079: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4080: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4081: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4082: (NXT(6) == 'A')) {
1.62 daniel 4083: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4084: res = XML_ELEMENT_TYPE_MIXED;
4085: } else {
1.62 daniel 4086: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4087: res = XML_ELEMENT_TYPE_ELEMENT;
4088: }
1.187 daniel 4089: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
1.230 veillard 4090: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092: ctxt->sax->error(ctxt->userData,
4093: "Element content declaration doesn't start and stop in the same entity\n");
4094: ctxt->wellFormed = 0;
4095: ctxt->disableSAX = 1;
4096: }
1.61 daniel 4097: SKIP_BLANKS;
1.63 daniel 4098: *result = tree;
1.61 daniel 4099: return(res);
1.22 daniel 4100: }
4101:
1.50 daniel 4102: /**
4103: * xmlParseElementDecl:
4104: * @ctxt: an XML parser context
4105: *
4106: * parse an Element declaration.
1.22 daniel 4107: *
4108: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4109: *
1.99 daniel 4110: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4111: * No element type may be declared more than once
1.69 daniel 4112: *
4113: * Returns the type of the element, or -1 in case of error
1.22 daniel 4114: */
1.59 daniel 4115: int
1.55 daniel 4116: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4117: xmlChar *name;
1.59 daniel 4118: int ret = -1;
1.61 daniel 4119: xmlElementContentPtr content = NULL;
1.22 daniel 4120:
1.97 daniel 4121: GROW;
1.152 daniel 4122: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4123: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4124: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4125: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4126: (NXT(8) == 'T')) {
1.187 daniel 4127: xmlParserInputPtr input = ctxt->input;
4128:
1.40 daniel 4129: SKIP(9);
1.59 daniel 4130: if (!IS_BLANK(CUR)) {
1.230 veillard 4131: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4132: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4133: ctxt->sax->error(ctxt->userData,
1.59 daniel 4134: "Space required after 'ELEMENT'\n");
4135: ctxt->wellFormed = 0;
1.180 daniel 4136: ctxt->disableSAX = 1;
1.59 daniel 4137: }
1.42 daniel 4138: SKIP_BLANKS;
1.22 daniel 4139: name = xmlParseName(ctxt);
4140: if (name == NULL) {
1.230 veillard 4141: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4143: ctxt->sax->error(ctxt->userData,
1.59 daniel 4144: "xmlParseElementDecl: no name for Element\n");
4145: ctxt->wellFormed = 0;
1.180 daniel 4146: ctxt->disableSAX = 1;
1.59 daniel 4147: return(-1);
4148: }
4149: if (!IS_BLANK(CUR)) {
1.230 veillard 4150: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4152: ctxt->sax->error(ctxt->userData,
1.59 daniel 4153: "Space required after the element name\n");
4154: ctxt->wellFormed = 0;
1.180 daniel 4155: ctxt->disableSAX = 1;
1.22 daniel 4156: }
1.42 daniel 4157: SKIP_BLANKS;
1.152 daniel 4158: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 4159: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4160: (NXT(4) == 'Y')) {
4161: SKIP(5);
1.22 daniel 4162: /*
4163: * Element must always be empty.
4164: */
1.59 daniel 4165: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 4166: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 4167: (NXT(2) == 'Y')) {
4168: SKIP(3);
1.22 daniel 4169: /*
4170: * Element is a generic container.
4171: */
1.59 daniel 4172: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 4173: } else if (RAW == '(') {
1.61 daniel 4174: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4175: } else {
1.98 daniel 4176: /*
4177: * [ WFC: PEs in Internal Subset ] error handling.
4178: */
1.152 daniel 4179: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 4180: (ctxt->inputNr == 1)) {
1.230 veillard 4181: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4183: ctxt->sax->error(ctxt->userData,
4184: "PEReference: forbidden within markup decl in internal subset\n");
4185: } else {
1.230 veillard 4186: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4188: ctxt->sax->error(ctxt->userData,
4189: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4190: }
1.61 daniel 4191: ctxt->wellFormed = 0;
1.180 daniel 4192: ctxt->disableSAX = 1;
1.119 daniel 4193: if (name != NULL) xmlFree(name);
1.61 daniel 4194: return(-1);
1.22 daniel 4195: }
1.142 daniel 4196:
4197: SKIP_BLANKS;
4198: /*
4199: * Pop-up of finished entities.
4200: */
1.152 daniel 4201: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 4202: xmlPopInput(ctxt);
1.42 daniel 4203: SKIP_BLANKS;
1.142 daniel 4204:
1.152 daniel 4205: if (RAW != '>') {
1.230 veillard 4206: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 4207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4208: ctxt->sax->error(ctxt->userData,
1.31 daniel 4209: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4210: ctxt->wellFormed = 0;
1.180 daniel 4211: ctxt->disableSAX = 1;
1.61 daniel 4212: } else {
1.187 daniel 4213: if (input != ctxt->input) {
1.230 veillard 4214: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4215: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4216: ctxt->sax->error(ctxt->userData,
4217: "Element declaration doesn't start and stop in the same entity\n");
4218: ctxt->wellFormed = 0;
4219: ctxt->disableSAX = 1;
4220: }
4221:
1.40 daniel 4222: NEXT;
1.171 daniel 4223: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4224: (ctxt->sax->elementDecl != NULL))
1.76 daniel 4225: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4226: content);
1.61 daniel 4227: }
1.84 daniel 4228: if (content != NULL) {
4229: xmlFreeElementContent(content);
4230: }
1.61 daniel 4231: if (name != NULL) {
1.119 daniel 4232: xmlFree(name);
1.61 daniel 4233: }
1.22 daniel 4234: }
1.59 daniel 4235: return(ret);
1.22 daniel 4236: }
4237:
1.50 daniel 4238: /**
4239: * xmlParseMarkupDecl:
4240: * @ctxt: an XML parser context
4241: *
4242: * parse Markup declarations
1.22 daniel 4243: *
4244: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4245: * NotationDecl | PI | Comment
4246: *
1.98 daniel 4247: * [ VC: Proper Declaration/PE Nesting ]
1.229 veillard 4248: * Parameter-entity replacement text must be properly nested with
1.98 daniel 4249: * markup declarations. That is to say, if either the first character
4250: * or the last character of a markup declaration (markupdecl above) is
4251: * contained in the replacement text for a parameter-entity reference,
4252: * both must be contained in the same replacement text.
4253: *
4254: * [ WFC: PEs in Internal Subset ]
4255: * In the internal DTD subset, parameter-entity references can occur
4256: * only where markup declarations can occur, not within markup declarations.
4257: * (This does not apply to references that occur in external parameter
4258: * entities or to the external subset.)
1.22 daniel 4259: */
1.55 daniel 4260: void
4261: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4262: GROW;
1.22 daniel 4263: xmlParseElementDecl(ctxt);
4264: xmlParseAttributeListDecl(ctxt);
4265: xmlParseEntityDecl(ctxt);
4266: xmlParseNotationDecl(ctxt);
4267: xmlParsePI(ctxt);
1.114 daniel 4268: xmlParseComment(ctxt);
1.98 daniel 4269: /*
4270: * This is only for internal subset. On external entities,
4271: * the replacement is done before parsing stage
4272: */
4273: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4274: xmlParsePEReference(ctxt);
1.97 daniel 4275: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4276: }
4277:
1.50 daniel 4278: /**
1.76 daniel 4279: * xmlParseTextDecl:
4280: * @ctxt: an XML parser context
4281: *
4282: * parse an XML declaration header for external entities
4283: *
4284: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 4285: *
4286: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 4287: */
4288:
1.172 daniel 4289: void
1.76 daniel 4290: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4291: xmlChar *version;
1.76 daniel 4292:
4293: /*
4294: * We know that '<?xml' is here.
4295: */
1.193 daniel 4296: if ((RAW == '<') && (NXT(1) == '?') &&
4297: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4298: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4299: SKIP(5);
4300: } else {
1.230 veillard 4301: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
1.193 daniel 4302: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4303: ctxt->sax->error(ctxt->userData,
4304: "Text declaration '<?xml' required\n");
4305: ctxt->wellFormed = 0;
4306: ctxt->disableSAX = 1;
4307:
4308: return;
4309: }
1.76 daniel 4310:
4311: if (!IS_BLANK(CUR)) {
1.230 veillard 4312: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4313: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4314: ctxt->sax->error(ctxt->userData,
4315: "Space needed after '<?xml'\n");
1.76 daniel 4316: ctxt->wellFormed = 0;
1.180 daniel 4317: ctxt->disableSAX = 1;
1.76 daniel 4318: }
4319: SKIP_BLANKS;
4320:
4321: /*
4322: * We may have the VersionInfo here.
4323: */
4324: version = xmlParseVersionInfo(ctxt);
4325: if (version == NULL)
4326: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 4327: ctxt->input->version = version;
1.76 daniel 4328:
4329: /*
4330: * We must have the encoding declaration
4331: */
4332: if (!IS_BLANK(CUR)) {
1.230 veillard 4333: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4335: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.76 daniel 4336: ctxt->wellFormed = 0;
1.180 daniel 4337: ctxt->disableSAX = 1;
1.76 daniel 4338: }
1.195 daniel 4339: xmlParseEncodingDecl(ctxt);
1.193 daniel 4340: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4341: /*
4342: * The XML REC instructs us to stop parsing right here
4343: */
4344: return;
4345: }
1.76 daniel 4346:
4347: SKIP_BLANKS;
1.152 daniel 4348: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 4349: SKIP(2);
1.152 daniel 4350: } else if (RAW == '>') {
1.76 daniel 4351: /* Deprecated old WD ... */
1.230 veillard 4352: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4353: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4354: ctxt->sax->error(ctxt->userData,
4355: "XML declaration must end-up with '?>'\n");
1.76 daniel 4356: ctxt->wellFormed = 0;
1.180 daniel 4357: ctxt->disableSAX = 1;
1.76 daniel 4358: NEXT;
4359: } else {
1.230 veillard 4360: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4361: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4362: ctxt->sax->error(ctxt->userData,
4363: "parsing XML declaration: '?>' expected\n");
1.76 daniel 4364: ctxt->wellFormed = 0;
1.180 daniel 4365: ctxt->disableSAX = 1;
1.76 daniel 4366: MOVETO_ENDTAG(CUR_PTR);
4367: NEXT;
4368: }
4369: }
4370:
4371: /*
4372: * xmlParseConditionalSections
4373: * @ctxt: an XML parser context
4374: *
4375: * [61] conditionalSect ::= includeSect | ignoreSect
4376: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4377: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4378: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4379: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4380: */
4381:
4382: void
4383: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 4384: SKIP(3);
4385: SKIP_BLANKS;
1.168 daniel 4386: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4387: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4388: (NXT(6) == 'E')) {
1.165 daniel 4389: SKIP(7);
1.168 daniel 4390: SKIP_BLANKS;
4391: if (RAW != '[') {
1.230 veillard 4392: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4393: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4394: ctxt->sax->error(ctxt->userData,
4395: "XML conditional section '[' expected\n");
4396: ctxt->wellFormed = 0;
1.180 daniel 4397: ctxt->disableSAX = 1;
1.168 daniel 4398: } else {
4399: NEXT;
4400: }
1.220 veillard 4401: if (xmlParserDebugEntities) {
4402: if ((ctxt->input != NULL) && (ctxt->input->filename))
4403: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4404: ctxt->input->line);
4405: fprintf(stderr, "Entering INCLUDE Conditional Section\n");
4406: }
4407:
1.165 daniel 4408: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4409: (NXT(2) != '>'))) {
4410: const xmlChar *check = CUR_PTR;
4411: int cons = ctxt->input->consumed;
4412: int tok = ctxt->token;
4413:
4414: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4415: xmlParseConditionalSections(ctxt);
4416: } else if (IS_BLANK(CUR)) {
4417: NEXT;
4418: } else if (RAW == '%') {
4419: xmlParsePEReference(ctxt);
4420: } else
4421: xmlParseMarkupDecl(ctxt);
4422:
4423: /*
4424: * Pop-up of finished entities.
4425: */
4426: while ((RAW == 0) && (ctxt->inputNr > 1))
4427: xmlPopInput(ctxt);
4428:
4429: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4430: (tok == ctxt->token)) {
1.230 veillard 4431: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.165 daniel 4432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4433: ctxt->sax->error(ctxt->userData,
4434: "Content error in the external subset\n");
4435: ctxt->wellFormed = 0;
1.180 daniel 4436: ctxt->disableSAX = 1;
1.165 daniel 4437: break;
4438: }
4439: }
1.220 veillard 4440: if (xmlParserDebugEntities) {
4441: if ((ctxt->input != NULL) && (ctxt->input->filename))
4442: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4443: ctxt->input->line);
4444: fprintf(stderr, "Leaving INCLUDE Conditional Section\n");
4445: }
4446:
1.168 daniel 4447: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4448: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 4449: int state;
4450:
1.168 daniel 4451: SKIP(6);
4452: SKIP_BLANKS;
4453: if (RAW != '[') {
1.230 veillard 4454: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4456: ctxt->sax->error(ctxt->userData,
4457: "XML conditional section '[' expected\n");
4458: ctxt->wellFormed = 0;
1.180 daniel 4459: ctxt->disableSAX = 1;
1.168 daniel 4460: } else {
4461: NEXT;
4462: }
1.220 veillard 4463: if (xmlParserDebugEntities) {
4464: if ((ctxt->input != NULL) && (ctxt->input->filename))
4465: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4466: ctxt->input->line);
4467: fprintf(stderr, "Entering IGNORE Conditional Section\n");
4468: }
1.171 daniel 4469:
1.143 daniel 4470: /*
1.171 daniel 4471: * Parse up to the end of the conditionnal section
4472: * But disable SAX event generating DTD building in the meantime
1.143 daniel 4473: */
1.171 daniel 4474: state = ctxt->disableSAX;
1.220 veillard 4475: ctxt->disableSAX = 1;
1.165 daniel 4476: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4477: (NXT(2) != '>'))) {
1.171 daniel 4478: const xmlChar *check = CUR_PTR;
4479: int cons = ctxt->input->consumed;
4480: int tok = ctxt->token;
4481:
4482: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4483: xmlParseConditionalSections(ctxt);
4484: } else if (IS_BLANK(CUR)) {
4485: NEXT;
4486: } else if (RAW == '%') {
4487: xmlParsePEReference(ctxt);
4488: } else
4489: xmlParseMarkupDecl(ctxt);
4490:
1.165 daniel 4491: /*
4492: * Pop-up of finished entities.
4493: */
4494: while ((RAW == 0) && (ctxt->inputNr > 1))
4495: xmlPopInput(ctxt);
1.143 daniel 4496:
1.171 daniel 4497: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4498: (tok == ctxt->token)) {
1.230 veillard 4499: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.171 daniel 4500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4501: ctxt->sax->error(ctxt->userData,
4502: "Content error in the external subset\n");
4503: ctxt->wellFormed = 0;
1.180 daniel 4504: ctxt->disableSAX = 1;
1.171 daniel 4505: break;
4506: }
1.165 daniel 4507: }
1.171 daniel 4508: ctxt->disableSAX = state;
1.220 veillard 4509: if (xmlParserDebugEntities) {
4510: if ((ctxt->input != NULL) && (ctxt->input->filename))
4511: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4512: ctxt->input->line);
4513: fprintf(stderr, "Leaving IGNORE Conditional Section\n");
4514: }
4515:
1.168 daniel 4516: } else {
1.230 veillard 4517: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4519: ctxt->sax->error(ctxt->userData,
4520: "XML conditional section INCLUDE or IGNORE keyword expected\n");
4521: ctxt->wellFormed = 0;
1.180 daniel 4522: ctxt->disableSAX = 1;
1.143 daniel 4523: }
4524:
1.152 daniel 4525: if (RAW == 0)
1.143 daniel 4526: SHRINK;
4527:
1.152 daniel 4528: if (RAW == 0) {
1.230 veillard 4529: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4530: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4531: ctxt->sax->error(ctxt->userData,
4532: "XML conditional section not closed\n");
4533: ctxt->wellFormed = 0;
1.180 daniel 4534: ctxt->disableSAX = 1;
1.143 daniel 4535: } else {
4536: SKIP(3);
1.76 daniel 4537: }
4538: }
4539:
4540: /**
1.124 daniel 4541: * xmlParseExternalSubset:
1.76 daniel 4542: * @ctxt: an XML parser context
1.124 daniel 4543: * @ExternalID: the external identifier
4544: * @SystemID: the system identifier (or URL)
1.76 daniel 4545: *
4546: * parse Markup declarations from an external subset
4547: *
4548: * [30] extSubset ::= textDecl? extSubsetDecl
4549: *
4550: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4551: */
4552: void
1.123 daniel 4553: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4554: const xmlChar *SystemID) {
1.132 daniel 4555: GROW;
1.152 daniel 4556: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 4557: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4558: (NXT(4) == 'l')) {
1.172 daniel 4559: xmlParseTextDecl(ctxt);
1.193 daniel 4560: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4561: /*
4562: * The XML REC instructs us to stop parsing right here
4563: */
4564: ctxt->instate = XML_PARSER_EOF;
4565: return;
4566: }
1.76 daniel 4567: }
1.79 daniel 4568: if (ctxt->myDoc == NULL) {
1.116 daniel 4569: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4570: }
4571: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4572: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4573:
1.96 daniel 4574: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4575: ctxt->external = 1;
1.152 daniel 4576: while (((RAW == '<') && (NXT(1) == '?')) ||
4577: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 4578: IS_BLANK(CUR)) {
1.123 daniel 4579: const xmlChar *check = CUR_PTR;
1.115 daniel 4580: int cons = ctxt->input->consumed;
1.164 daniel 4581: int tok = ctxt->token;
1.115 daniel 4582:
1.221 veillard 4583: GROW;
1.152 daniel 4584: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 4585: xmlParseConditionalSections(ctxt);
4586: } else if (IS_BLANK(CUR)) {
4587: NEXT;
1.152 daniel 4588: } else if (RAW == '%') {
1.76 daniel 4589: xmlParsePEReference(ctxt);
4590: } else
4591: xmlParseMarkupDecl(ctxt);
1.77 daniel 4592:
4593: /*
4594: * Pop-up of finished entities.
4595: */
1.166 daniel 4596: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 4597: xmlPopInput(ctxt);
4598:
1.164 daniel 4599: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4600: (tok == ctxt->token)) {
1.230 veillard 4601: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4603: ctxt->sax->error(ctxt->userData,
4604: "Content error in the external subset\n");
4605: ctxt->wellFormed = 0;
1.180 daniel 4606: ctxt->disableSAX = 1;
1.115 daniel 4607: break;
4608: }
1.76 daniel 4609: }
4610:
1.152 daniel 4611: if (RAW != 0) {
1.230 veillard 4612: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4614: ctxt->sax->error(ctxt->userData,
4615: "Extra content at the end of the document\n");
4616: ctxt->wellFormed = 0;
1.180 daniel 4617: ctxt->disableSAX = 1;
1.76 daniel 4618: }
4619:
4620: }
4621:
4622: /**
1.77 daniel 4623: * xmlParseReference:
4624: * @ctxt: an XML parser context
4625: *
4626: * parse and handle entity references in content, depending on the SAX
4627: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4628: * CharRef, a predefined entity, if there is no reference() callback.
4629: * or if the parser was asked to switch to that mode.
1.77 daniel 4630: *
4631: * [67] Reference ::= EntityRef | CharRef
4632: */
4633: void
4634: xmlParseReference(xmlParserCtxtPtr ctxt) {
4635: xmlEntityPtr ent;
1.123 daniel 4636: xmlChar *val;
1.152 daniel 4637: if (RAW != '&') return;
1.77 daniel 4638:
4639: if (NXT(1) == '#') {
1.152 daniel 4640: int i = 0;
1.153 daniel 4641: xmlChar out[10];
4642: int hex = NXT(2);
1.77 daniel 4643: int val = xmlParseCharRef(ctxt);
1.152 daniel 4644:
1.198 daniel 4645: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 4646: /*
4647: * So we are using non-UTF-8 buffers
4648: * Check that the char fit on 8bits, if not
4649: * generate a CharRef.
4650: */
4651: if (val <= 0xFF) {
4652: out[0] = val;
4653: out[1] = 0;
1.171 daniel 4654: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4655: (!ctxt->disableSAX))
1.153 daniel 4656: ctxt->sax->characters(ctxt->userData, out, 1);
4657: } else {
4658: if ((hex == 'x') || (hex == 'X'))
4659: sprintf((char *)out, "#x%X", val);
4660: else
4661: sprintf((char *)out, "#%d", val);
1.171 daniel 4662: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4663: (!ctxt->disableSAX))
1.153 daniel 4664: ctxt->sax->reference(ctxt->userData, out);
4665: }
4666: } else {
4667: /*
4668: * Just encode the value in UTF-8
4669: */
4670: COPY_BUF(0 ,out, i, val);
4671: out[i] = 0;
1.171 daniel 4672: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4673: (!ctxt->disableSAX))
1.153 daniel 4674: ctxt->sax->characters(ctxt->userData, out, i);
4675: }
1.77 daniel 4676: } else {
4677: ent = xmlParseEntityRef(ctxt);
4678: if (ent == NULL) return;
4679: if ((ent->name != NULL) &&
1.159 daniel 4680: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 4681: xmlNodePtr list = NULL;
4682: int ret;
4683:
4684:
4685: /*
4686: * The first reference to the entity trigger a parsing phase
4687: * where the ent->children is filled with the result from
4688: * the parsing.
4689: */
4690: if (ent->children == NULL) {
4691: xmlChar *value;
4692: value = ent->content;
4693:
4694: /*
4695: * Check that this entity is well formed
4696: */
4697: if ((value != NULL) &&
4698: (value[1] == 0) && (value[0] == '<') &&
4699: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
4700: /*
1.222 veillard 4701: * DONE: get definite answer on this !!!
1.180 daniel 4702: * Lots of entity decls are used to declare a single
4703: * char
4704: * <!ENTITY lt "<">
4705: * Which seems to be valid since
4706: * 2.4: The ampersand character (&) and the left angle
4707: * bracket (<) may appear in their literal form only
4708: * when used ... They are also legal within the literal
4709: * entity value of an internal entity declaration;i
4710: * see "4.3.2 Well-Formed Parsed Entities".
4711: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4712: * Looking at the OASIS test suite and James Clark
4713: * tests, this is broken. However the XML REC uses
4714: * it. Is the XML REC not well-formed ????
4715: * This is a hack to avoid this problem
1.222 veillard 4716: *
4717: * ANSWER: since lt gt amp .. are already defined,
4718: * this is a redefinition and hence the fact that the
4719: * contentis not well balanced is not a Wf error, this
4720: * is lousy but acceptable.
1.180 daniel 4721: */
4722: list = xmlNewDocText(ctxt->myDoc, value);
4723: if (list != NULL) {
4724: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4725: (ent->children == NULL)) {
4726: ent->children = list;
4727: ent->last = list;
4728: list->parent = (xmlNodePtr) ent;
4729: } else {
4730: xmlFreeNodeList(list);
4731: }
4732: } else if (list != NULL) {
4733: xmlFreeNodeList(list);
4734: }
1.181 daniel 4735: } else {
1.180 daniel 4736: /*
4737: * 4.3.2: An internal general parsed entity is well-formed
4738: * if its replacement text matches the production labeled
4739: * content.
4740: */
1.185 daniel 4741: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4742: ctxt->depth++;
1.180 daniel 4743: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 4744: ctxt->sax, NULL, ctxt->depth,
4745: value, &list);
4746: ctxt->depth--;
4747: } else if (ent->etype ==
4748: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4749: ctxt->depth++;
1.180 daniel 4750: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 4751: ctxt->sax, NULL, ctxt->depth,
1.228 veillard 4752: ent->URI, ent->ExternalID, &list);
1.185 daniel 4753: ctxt->depth--;
4754: } else {
1.180 daniel 4755: ret = -1;
4756: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4757: ctxt->sax->error(ctxt->userData,
4758: "Internal: invalid entity type\n");
4759: }
1.185 daniel 4760: if (ret == XML_ERR_ENTITY_LOOP) {
1.230 veillard 4761: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 4762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4763: ctxt->sax->error(ctxt->userData,
4764: "Detected entity reference loop\n");
4765: ctxt->wellFormed = 0;
4766: ctxt->disableSAX = 1;
4767: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 4768: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4769: (ent->children == NULL)) {
4770: ent->children = list;
4771: while (list != NULL) {
4772: list->parent = (xmlNodePtr) ent;
4773: if (list->next == NULL)
4774: ent->last = list;
4775: list = list->next;
4776: }
4777: } else {
4778: xmlFreeNodeList(list);
4779: }
4780: } else if (ret > 0) {
1.230 veillard 4781: ctxt->errNo = ret;
1.180 daniel 4782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4783: ctxt->sax->error(ctxt->userData,
4784: "Entity value required\n");
4785: ctxt->wellFormed = 0;
4786: ctxt->disableSAX = 1;
4787: } else if (list != NULL) {
4788: xmlFreeNodeList(list);
4789: }
4790: }
4791: }
1.113 daniel 4792: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 4793: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 4794: /*
4795: * Create a node.
4796: */
4797: ctxt->sax->reference(ctxt->userData, ent->name);
4798: return;
4799: } else if (ctxt->replaceEntities) {
1.222 veillard 4800: if ((ctxt->node != NULL) && (ent->children != NULL)) {
4801: /*
4802: * Seems we are generating the DOM content, do
4803: * a simple tree copy
4804: */
4805: xmlNodePtr new;
4806: new = xmlCopyNodeList(ent->children);
4807:
4808: xmlAddChildList(ctxt->node, new);
4809: /*
4810: * This is to avoid a nasty side effect, see
4811: * characters() in SAX.c
4812: */
4813: ctxt->nodemem = 0;
4814: ctxt->nodelen = 0;
4815: return;
4816: } else {
4817: /*
4818: * Probably running in SAX mode
4819: */
4820: xmlParserInputPtr input;
1.79 daniel 4821:
1.222 veillard 4822: input = xmlNewEntityInputStream(ctxt, ent);
4823: xmlPushInput(ctxt, input);
4824: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4825: (RAW == '<') && (NXT(1) == '?') &&
4826: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4827: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4828: xmlParseTextDecl(ctxt);
4829: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4830: /*
4831: * The XML REC instructs us to stop parsing right here
4832: */
4833: ctxt->instate = XML_PARSER_EOF;
4834: return;
4835: }
4836: if (input->standalone == 1) {
1.230 veillard 4837: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
1.222 veillard 4838: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4839: ctxt->sax->error(ctxt->userData,
4840: "external parsed entities cannot be standalone\n");
4841: ctxt->wellFormed = 0;
4842: ctxt->disableSAX = 1;
4843: }
1.167 daniel 4844: }
1.222 veillard 4845: return;
1.167 daniel 4846: }
1.113 daniel 4847: }
1.222 veillard 4848: } else {
4849: val = ent->content;
4850: if (val == NULL) return;
4851: /*
4852: * inline the entity.
4853: */
4854: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4855: (!ctxt->disableSAX))
4856: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
1.77 daniel 4857: }
4858: }
1.24 daniel 4859: }
4860:
1.50 daniel 4861: /**
4862: * xmlParseEntityRef:
4863: * @ctxt: an XML parser context
4864: *
4865: * parse ENTITY references declarations
1.24 daniel 4866: *
4867: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4868: *
1.98 daniel 4869: * [ WFC: Entity Declared ]
4870: * In a document without any DTD, a document with only an internal DTD
4871: * subset which contains no parameter entity references, or a document
4872: * with "standalone='yes'", the Name given in the entity reference
4873: * must match that in an entity declaration, except that well-formed
4874: * documents need not declare any of the following entities: amp, lt,
4875: * gt, apos, quot. The declaration of a parameter entity must precede
4876: * any reference to it. Similarly, the declaration of a general entity
4877: * must precede any reference to it which appears in a default value in an
4878: * attribute-list declaration. Note that if entities are declared in the
4879: * external subset or in external parameter entities, a non-validating
4880: * processor is not obligated to read and process their declarations;
4881: * for such documents, the rule that an entity must be declared is a
4882: * well-formedness constraint only if standalone='yes'.
4883: *
4884: * [ WFC: Parsed Entity ]
4885: * An entity reference must not contain the name of an unparsed entity
4886: *
1.77 daniel 4887: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4888: */
1.77 daniel 4889: xmlEntityPtr
1.55 daniel 4890: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 4891: xmlChar *name;
1.72 daniel 4892: xmlEntityPtr ent = NULL;
1.24 daniel 4893:
1.91 daniel 4894: GROW;
1.111 daniel 4895:
1.152 daniel 4896: if (RAW == '&') {
1.40 daniel 4897: NEXT;
1.24 daniel 4898: name = xmlParseName(ctxt);
4899: if (name == NULL) {
1.230 veillard 4900: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4902: ctxt->sax->error(ctxt->userData,
4903: "xmlParseEntityRef: no name\n");
1.59 daniel 4904: ctxt->wellFormed = 0;
1.180 daniel 4905: ctxt->disableSAX = 1;
1.24 daniel 4906: } else {
1.152 daniel 4907: if (RAW == ';') {
1.40 daniel 4908: NEXT;
1.24 daniel 4909: /*
1.77 daniel 4910: * Ask first SAX for entity resolution, otherwise try the
4911: * predefined set.
4912: */
4913: if (ctxt->sax != NULL) {
4914: if (ctxt->sax->getEntity != NULL)
4915: ent = ctxt->sax->getEntity(ctxt->userData, name);
4916: if (ent == NULL)
4917: ent = xmlGetPredefinedEntity(name);
4918: }
4919: /*
1.98 daniel 4920: * [ WFC: Entity Declared ]
4921: * In a document without any DTD, a document with only an
4922: * internal DTD subset which contains no parameter entity
4923: * references, or a document with "standalone='yes'", the
4924: * Name given in the entity reference must match that in an
4925: * entity declaration, except that well-formed documents
4926: * need not declare any of the following entities: amp, lt,
4927: * gt, apos, quot.
4928: * The declaration of a parameter entity must precede any
4929: * reference to it.
4930: * Similarly, the declaration of a general entity must
4931: * precede any reference to it which appears in a default
4932: * value in an attribute-list declaration. Note that if
4933: * entities are declared in the external subset or in
4934: * external parameter entities, a non-validating processor
4935: * is not obligated to read and process their declarations;
4936: * for such documents, the rule that an entity must be
4937: * declared is a well-formedness constraint only if
4938: * standalone='yes'.
1.59 daniel 4939: */
1.77 daniel 4940: if (ent == NULL) {
1.98 daniel 4941: if ((ctxt->standalone == 1) ||
4942: ((ctxt->hasExternalSubset == 0) &&
4943: (ctxt->hasPErefs == 0))) {
1.230 veillard 4944: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 4945: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4946: ctxt->sax->error(ctxt->userData,
4947: "Entity '%s' not defined\n", name);
4948: ctxt->wellFormed = 0;
1.180 daniel 4949: ctxt->disableSAX = 1;
1.77 daniel 4950: } else {
1.230 veillard 4951: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.98 daniel 4952: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4953: ctxt->sax->warning(ctxt->userData,
4954: "Entity '%s' not defined\n", name);
1.59 daniel 4955: }
1.77 daniel 4956: }
1.59 daniel 4957:
4958: /*
1.98 daniel 4959: * [ WFC: Parsed Entity ]
4960: * An entity reference must not contain the name of an
4961: * unparsed entity
4962: */
1.159 daniel 4963: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 4964: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 4965: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4966: ctxt->sax->error(ctxt->userData,
4967: "Entity reference to unparsed entity %s\n", name);
4968: ctxt->wellFormed = 0;
1.180 daniel 4969: ctxt->disableSAX = 1;
1.98 daniel 4970: }
4971:
4972: /*
4973: * [ WFC: No External Entity References ]
4974: * Attribute values cannot contain direct or indirect
4975: * entity references to external entities.
4976: */
4977: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 4978: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 4979: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 4980: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4981: ctxt->sax->error(ctxt->userData,
4982: "Attribute references external entity '%s'\n", name);
4983: ctxt->wellFormed = 0;
1.180 daniel 4984: ctxt->disableSAX = 1;
1.98 daniel 4985: }
4986: /*
4987: * [ WFC: No < in Attribute Values ]
4988: * The replacement text of any entity referred to directly or
4989: * indirectly in an attribute value (other than "<") must
4990: * not contain a <.
1.59 daniel 4991: */
1.98 daniel 4992: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 4993: (ent != NULL) &&
4994: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 4995: (ent->content != NULL) &&
4996: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 4997: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 4998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4999: ctxt->sax->error(ctxt->userData,
5000: "'<' in entity '%s' is not allowed in attributes values\n", name);
5001: ctxt->wellFormed = 0;
1.180 daniel 5002: ctxt->disableSAX = 1;
1.98 daniel 5003: }
5004:
5005: /*
5006: * Internal check, no parameter entities here ...
5007: */
5008: else {
1.159 daniel 5009: switch (ent->etype) {
1.59 daniel 5010: case XML_INTERNAL_PARAMETER_ENTITY:
5011: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5012: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5013: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5014: ctxt->sax->error(ctxt->userData,
1.59 daniel 5015: "Attempt to reference the parameter entity '%s'\n", name);
5016: ctxt->wellFormed = 0;
1.180 daniel 5017: ctxt->disableSAX = 1;
5018: break;
5019: default:
1.59 daniel 5020: break;
5021: }
5022: }
5023:
5024: /*
1.98 daniel 5025: * [ WFC: No Recursion ]
1.229 veillard 5026: * A parsed entity must not contain a recursive reference
1.117 daniel 5027: * to itself, either directly or indirectly.
1.229 veillard 5028: * Done somewhere else
1.59 daniel 5029: */
1.77 daniel 5030:
1.24 daniel 5031: } else {
1.230 veillard 5032: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5034: ctxt->sax->error(ctxt->userData,
1.59 daniel 5035: "xmlParseEntityRef: expecting ';'\n");
5036: ctxt->wellFormed = 0;
1.180 daniel 5037: ctxt->disableSAX = 1;
1.24 daniel 5038: }
1.119 daniel 5039: xmlFree(name);
1.24 daniel 5040: }
5041: }
1.77 daniel 5042: return(ent);
1.24 daniel 5043: }
1.229 veillard 5044:
1.135 daniel 5045: /**
5046: * xmlParseStringEntityRef:
5047: * @ctxt: an XML parser context
5048: * @str: a pointer to an index in the string
5049: *
5050: * parse ENTITY references declarations, but this version parses it from
5051: * a string value.
5052: *
5053: * [68] EntityRef ::= '&' Name ';'
5054: *
5055: * [ WFC: Entity Declared ]
5056: * In a document without any DTD, a document with only an internal DTD
5057: * subset which contains no parameter entity references, or a document
5058: * with "standalone='yes'", the Name given in the entity reference
5059: * must match that in an entity declaration, except that well-formed
5060: * documents need not declare any of the following entities: amp, lt,
5061: * gt, apos, quot. The declaration of a parameter entity must precede
5062: * any reference to it. Similarly, the declaration of a general entity
5063: * must precede any reference to it which appears in a default value in an
5064: * attribute-list declaration. Note that if entities are declared in the
5065: * external subset or in external parameter entities, a non-validating
5066: * processor is not obligated to read and process their declarations;
5067: * for such documents, the rule that an entity must be declared is a
5068: * well-formedness constraint only if standalone='yes'.
5069: *
5070: * [ WFC: Parsed Entity ]
5071: * An entity reference must not contain the name of an unparsed entity
5072: *
5073: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5074: * is updated to the current location in the string.
5075: */
5076: xmlEntityPtr
5077: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5078: xmlChar *name;
5079: const xmlChar *ptr;
5080: xmlChar cur;
5081: xmlEntityPtr ent = NULL;
5082:
1.156 daniel 5083: if ((str == NULL) || (*str == NULL))
5084: return(NULL);
1.135 daniel 5085: ptr = *str;
5086: cur = *ptr;
5087: if (cur == '&') {
5088: ptr++;
5089: cur = *ptr;
5090: name = xmlParseStringName(ctxt, &ptr);
5091: if (name == NULL) {
1.230 veillard 5092: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5093: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5094: ctxt->sax->error(ctxt->userData,
5095: "xmlParseEntityRef: no name\n");
5096: ctxt->wellFormed = 0;
1.180 daniel 5097: ctxt->disableSAX = 1;
1.135 daniel 5098: } else {
1.185 daniel 5099: if (*ptr == ';') {
5100: ptr++;
1.135 daniel 5101: /*
5102: * Ask first SAX for entity resolution, otherwise try the
5103: * predefined set.
5104: */
5105: if (ctxt->sax != NULL) {
5106: if (ctxt->sax->getEntity != NULL)
5107: ent = ctxt->sax->getEntity(ctxt->userData, name);
5108: if (ent == NULL)
5109: ent = xmlGetPredefinedEntity(name);
5110: }
5111: /*
5112: * [ WFC: Entity Declared ]
5113: * In a document without any DTD, a document with only an
5114: * internal DTD subset which contains no parameter entity
5115: * references, or a document with "standalone='yes'", the
5116: * Name given in the entity reference must match that in an
5117: * entity declaration, except that well-formed documents
5118: * need not declare any of the following entities: amp, lt,
5119: * gt, apos, quot.
5120: * The declaration of a parameter entity must precede any
5121: * reference to it.
5122: * Similarly, the declaration of a general entity must
5123: * precede any reference to it which appears in a default
5124: * value in an attribute-list declaration. Note that if
5125: * entities are declared in the external subset or in
5126: * external parameter entities, a non-validating processor
5127: * is not obligated to read and process their declarations;
5128: * for such documents, the rule that an entity must be
5129: * declared is a well-formedness constraint only if
5130: * standalone='yes'.
5131: */
5132: if (ent == NULL) {
5133: if ((ctxt->standalone == 1) ||
5134: ((ctxt->hasExternalSubset == 0) &&
5135: (ctxt->hasPErefs == 0))) {
1.230 veillard 5136: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5137: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5138: ctxt->sax->error(ctxt->userData,
5139: "Entity '%s' not defined\n", name);
5140: ctxt->wellFormed = 0;
1.180 daniel 5141: ctxt->disableSAX = 1;
1.135 daniel 5142: } else {
1.230 veillard 5143: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.135 daniel 5144: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5145: ctxt->sax->warning(ctxt->userData,
5146: "Entity '%s' not defined\n", name);
5147: }
5148: }
5149:
5150: /*
5151: * [ WFC: Parsed Entity ]
5152: * An entity reference must not contain the name of an
5153: * unparsed entity
5154: */
1.159 daniel 5155: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5156: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.135 daniel 5157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5158: ctxt->sax->error(ctxt->userData,
5159: "Entity reference to unparsed entity %s\n", name);
5160: ctxt->wellFormed = 0;
1.180 daniel 5161: ctxt->disableSAX = 1;
1.135 daniel 5162: }
5163:
5164: /*
5165: * [ WFC: No External Entity References ]
5166: * Attribute values cannot contain direct or indirect
5167: * entity references to external entities.
5168: */
5169: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5170: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5171: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.135 daniel 5172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5173: ctxt->sax->error(ctxt->userData,
5174: "Attribute references external entity '%s'\n", name);
5175: ctxt->wellFormed = 0;
1.180 daniel 5176: ctxt->disableSAX = 1;
1.135 daniel 5177: }
5178: /*
5179: * [ WFC: No < in Attribute Values ]
5180: * The replacement text of any entity referred to directly or
5181: * indirectly in an attribute value (other than "<") must
5182: * not contain a <.
5183: */
5184: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5185: (ent != NULL) &&
5186: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
5187: (ent->content != NULL) &&
5188: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5189: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.135 daniel 5190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5191: ctxt->sax->error(ctxt->userData,
5192: "'<' in entity '%s' is not allowed in attributes values\n", name);
5193: ctxt->wellFormed = 0;
1.180 daniel 5194: ctxt->disableSAX = 1;
1.135 daniel 5195: }
5196:
5197: /*
5198: * Internal check, no parameter entities here ...
5199: */
5200: else {
1.159 daniel 5201: switch (ent->etype) {
1.135 daniel 5202: case XML_INTERNAL_PARAMETER_ENTITY:
5203: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5204: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.135 daniel 5205: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5206: ctxt->sax->error(ctxt->userData,
5207: "Attempt to reference the parameter entity '%s'\n", name);
5208: ctxt->wellFormed = 0;
1.180 daniel 5209: ctxt->disableSAX = 1;
5210: break;
5211: default:
1.135 daniel 5212: break;
5213: }
5214: }
5215:
5216: /*
5217: * [ WFC: No Recursion ]
1.229 veillard 5218: * A parsed entity must not contain a recursive reference
1.135 daniel 5219: * to itself, either directly or indirectly.
1.229 veillard 5220: * Done somewhwere else
1.135 daniel 5221: */
5222:
5223: } else {
1.230 veillard 5224: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5225: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5226: ctxt->sax->error(ctxt->userData,
5227: "xmlParseEntityRef: expecting ';'\n");
5228: ctxt->wellFormed = 0;
1.180 daniel 5229: ctxt->disableSAX = 1;
1.135 daniel 5230: }
5231: xmlFree(name);
5232: }
5233: }
1.185 daniel 5234: *str = ptr;
1.135 daniel 5235: return(ent);
5236: }
1.24 daniel 5237:
1.50 daniel 5238: /**
5239: * xmlParsePEReference:
5240: * @ctxt: an XML parser context
5241: *
5242: * parse PEReference declarations
1.77 daniel 5243: * The entity content is handled directly by pushing it's content as
5244: * a new input stream.
1.22 daniel 5245: *
5246: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5247: *
1.98 daniel 5248: * [ WFC: No Recursion ]
1.229 veillard 5249: * A parsed entity must not contain a recursive
1.98 daniel 5250: * reference to itself, either directly or indirectly.
5251: *
5252: * [ WFC: Entity Declared ]
5253: * In a document without any DTD, a document with only an internal DTD
5254: * subset which contains no parameter entity references, or a document
5255: * with "standalone='yes'", ... ... The declaration of a parameter
5256: * entity must precede any reference to it...
5257: *
5258: * [ VC: Entity Declared ]
5259: * In a document with an external subset or external parameter entities
5260: * with "standalone='no'", ... ... The declaration of a parameter entity
5261: * must precede any reference to it...
5262: *
5263: * [ WFC: In DTD ]
5264: * Parameter-entity references may only appear in the DTD.
5265: * NOTE: misleading but this is handled.
1.22 daniel 5266: */
1.77 daniel 5267: void
1.55 daniel 5268: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5269: xmlChar *name;
1.72 daniel 5270: xmlEntityPtr entity = NULL;
1.50 daniel 5271: xmlParserInputPtr input;
1.22 daniel 5272:
1.152 daniel 5273: if (RAW == '%') {
1.40 daniel 5274: NEXT;
1.22 daniel 5275: name = xmlParseName(ctxt);
5276: if (name == NULL) {
1.230 veillard 5277: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5279: ctxt->sax->error(ctxt->userData,
5280: "xmlParsePEReference: no name\n");
1.59 daniel 5281: ctxt->wellFormed = 0;
1.180 daniel 5282: ctxt->disableSAX = 1;
1.22 daniel 5283: } else {
1.152 daniel 5284: if (RAW == ';') {
1.40 daniel 5285: NEXT;
1.98 daniel 5286: if ((ctxt->sax != NULL) &&
5287: (ctxt->sax->getParameterEntity != NULL))
5288: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5289: name);
1.45 daniel 5290: if (entity == NULL) {
1.98 daniel 5291: /*
5292: * [ WFC: Entity Declared ]
5293: * In a document without any DTD, a document with only an
5294: * internal DTD subset which contains no parameter entity
5295: * references, or a document with "standalone='yes'", ...
5296: * ... The declaration of a parameter entity must precede
5297: * any reference to it...
5298: */
5299: if ((ctxt->standalone == 1) ||
5300: ((ctxt->hasExternalSubset == 0) &&
5301: (ctxt->hasPErefs == 0))) {
1.230 veillard 5302: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.220 veillard 5303: if ((!ctxt->disableSAX) &&
5304: (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5305: ctxt->sax->error(ctxt->userData,
5306: "PEReference: %%%s; not found\n", name);
5307: ctxt->wellFormed = 0;
1.180 daniel 5308: ctxt->disableSAX = 1;
1.98 daniel 5309: } else {
5310: /*
5311: * [ VC: Entity Declared ]
5312: * In a document with an external subset or external
5313: * parameter entities with "standalone='no'", ...
5314: * ... The declaration of a parameter entity must precede
5315: * any reference to it...
5316: */
1.220 veillard 5317: if ((!ctxt->disableSAX) &&
5318: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 5319: ctxt->sax->warning(ctxt->userData,
5320: "PEReference: %%%s; not found\n", name);
5321: ctxt->valid = 0;
5322: }
1.50 daniel 5323: } else {
1.98 daniel 5324: /*
5325: * Internal checking in case the entity quest barfed
5326: */
1.159 daniel 5327: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5328: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5329: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5330: ctxt->sax->warning(ctxt->userData,
5331: "Internal: %%%s; is not a parameter entity\n", name);
5332: } else {
1.164 daniel 5333: /*
5334: * TODO !!!
5335: * handle the extra spaces added before and after
5336: * c.f. http://www.w3.org/TR/REC-xml#as-PE
5337: */
1.98 daniel 5338: input = xmlNewEntityInputStream(ctxt, entity);
5339: xmlPushInput(ctxt, input);
1.164 daniel 5340: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5341: (RAW == '<') && (NXT(1) == '?') &&
5342: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5343: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5344: xmlParseTextDecl(ctxt);
1.193 daniel 5345: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5346: /*
5347: * The XML REC instructs us to stop parsing
5348: * right here
5349: */
5350: ctxt->instate = XML_PARSER_EOF;
5351: xmlFree(name);
5352: return;
5353: }
1.164 daniel 5354: }
5355: if (ctxt->token == 0)
5356: ctxt->token = ' ';
1.98 daniel 5357: }
1.45 daniel 5358: }
1.98 daniel 5359: ctxt->hasPErefs = 1;
1.22 daniel 5360: } else {
1.230 veillard 5361: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5363: ctxt->sax->error(ctxt->userData,
1.59 daniel 5364: "xmlParsePEReference: expecting ';'\n");
5365: ctxt->wellFormed = 0;
1.180 daniel 5366: ctxt->disableSAX = 1;
1.22 daniel 5367: }
1.119 daniel 5368: xmlFree(name);
1.3 veillard 5369: }
5370: }
5371: }
5372:
1.50 daniel 5373: /**
1.135 daniel 5374: * xmlParseStringPEReference:
5375: * @ctxt: an XML parser context
5376: * @str: a pointer to an index in the string
5377: *
5378: * parse PEReference declarations
5379: *
5380: * [69] PEReference ::= '%' Name ';'
5381: *
5382: * [ WFC: No Recursion ]
1.229 veillard 5383: * A parsed entity must not contain a recursive
1.135 daniel 5384: * reference to itself, either directly or indirectly.
5385: *
5386: * [ WFC: Entity Declared ]
5387: * In a document without any DTD, a document with only an internal DTD
5388: * subset which contains no parameter entity references, or a document
5389: * with "standalone='yes'", ... ... The declaration of a parameter
5390: * entity must precede any reference to it...
5391: *
5392: * [ VC: Entity Declared ]
5393: * In a document with an external subset or external parameter entities
5394: * with "standalone='no'", ... ... The declaration of a parameter entity
5395: * must precede any reference to it...
5396: *
5397: * [ WFC: In DTD ]
5398: * Parameter-entity references may only appear in the DTD.
5399: * NOTE: misleading but this is handled.
5400: *
5401: * Returns the string of the entity content.
5402: * str is updated to the current value of the index
5403: */
5404: xmlEntityPtr
5405: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5406: const xmlChar *ptr;
5407: xmlChar cur;
5408: xmlChar *name;
5409: xmlEntityPtr entity = NULL;
5410:
5411: if ((str == NULL) || (*str == NULL)) return(NULL);
5412: ptr = *str;
5413: cur = *ptr;
5414: if (cur == '%') {
5415: ptr++;
5416: cur = *ptr;
5417: name = xmlParseStringName(ctxt, &ptr);
5418: if (name == NULL) {
1.230 veillard 5419: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5421: ctxt->sax->error(ctxt->userData,
5422: "xmlParseStringPEReference: no name\n");
5423: ctxt->wellFormed = 0;
1.180 daniel 5424: ctxt->disableSAX = 1;
1.135 daniel 5425: } else {
5426: cur = *ptr;
5427: if (cur == ';') {
5428: ptr++;
5429: cur = *ptr;
5430: if ((ctxt->sax != NULL) &&
5431: (ctxt->sax->getParameterEntity != NULL))
5432: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5433: name);
5434: if (entity == NULL) {
5435: /*
5436: * [ WFC: Entity Declared ]
5437: * In a document without any DTD, a document with only an
5438: * internal DTD subset which contains no parameter entity
5439: * references, or a document with "standalone='yes'", ...
5440: * ... The declaration of a parameter entity must precede
5441: * any reference to it...
5442: */
5443: if ((ctxt->standalone == 1) ||
5444: ((ctxt->hasExternalSubset == 0) &&
5445: (ctxt->hasPErefs == 0))) {
1.230 veillard 5446: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5448: ctxt->sax->error(ctxt->userData,
5449: "PEReference: %%%s; not found\n", name);
5450: ctxt->wellFormed = 0;
1.180 daniel 5451: ctxt->disableSAX = 1;
1.135 daniel 5452: } else {
5453: /*
5454: * [ VC: Entity Declared ]
5455: * In a document with an external subset or external
5456: * parameter entities with "standalone='no'", ...
5457: * ... The declaration of a parameter entity must
5458: * precede any reference to it...
5459: */
5460: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5461: ctxt->sax->warning(ctxt->userData,
5462: "PEReference: %%%s; not found\n", name);
5463: ctxt->valid = 0;
5464: }
5465: } else {
5466: /*
5467: * Internal checking in case the entity quest barfed
5468: */
1.159 daniel 5469: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5470: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 5471: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5472: ctxt->sax->warning(ctxt->userData,
5473: "Internal: %%%s; is not a parameter entity\n", name);
5474: }
5475: }
5476: ctxt->hasPErefs = 1;
5477: } else {
1.230 veillard 5478: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5480: ctxt->sax->error(ctxt->userData,
5481: "xmlParseStringPEReference: expecting ';'\n");
5482: ctxt->wellFormed = 0;
1.180 daniel 5483: ctxt->disableSAX = 1;
1.135 daniel 5484: }
5485: xmlFree(name);
5486: }
5487: }
5488: *str = ptr;
5489: return(entity);
5490: }
5491:
5492: /**
1.181 daniel 5493: * xmlParseDocTypeDecl:
1.50 daniel 5494: * @ctxt: an XML parser context
5495: *
5496: * parse a DOCTYPE declaration
1.21 daniel 5497: *
1.22 daniel 5498: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5499: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 5500: *
5501: * [ VC: Root Element Type ]
1.99 daniel 5502: * The Name in the document type declaration must match the element
1.98 daniel 5503: * type of the root element.
1.21 daniel 5504: */
5505:
1.55 daniel 5506: void
5507: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 5508: xmlChar *name = NULL;
1.123 daniel 5509: xmlChar *ExternalID = NULL;
5510: xmlChar *URI = NULL;
1.21 daniel 5511:
5512: /*
5513: * We know that '<!DOCTYPE' has been detected.
5514: */
1.40 daniel 5515: SKIP(9);
1.21 daniel 5516:
1.42 daniel 5517: SKIP_BLANKS;
1.21 daniel 5518:
5519: /*
5520: * Parse the DOCTYPE name.
5521: */
5522: name = xmlParseName(ctxt);
5523: if (name == NULL) {
1.230 veillard 5524: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5526: ctxt->sax->error(ctxt->userData,
5527: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 5528: ctxt->wellFormed = 0;
1.180 daniel 5529: ctxt->disableSAX = 1;
1.21 daniel 5530: }
1.165 daniel 5531: ctxt->intSubName = name;
1.21 daniel 5532:
1.42 daniel 5533: SKIP_BLANKS;
1.21 daniel 5534:
5535: /*
1.22 daniel 5536: * Check for SystemID and ExternalID
5537: */
1.67 daniel 5538: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 5539:
5540: if ((URI != NULL) || (ExternalID != NULL)) {
5541: ctxt->hasExternalSubset = 1;
5542: }
1.165 daniel 5543: ctxt->extSubURI = URI;
5544: ctxt->extSubSystem = ExternalID;
1.98 daniel 5545:
1.42 daniel 5546: SKIP_BLANKS;
1.36 daniel 5547:
1.76 daniel 5548: /*
1.165 daniel 5549: * Create and update the internal subset.
1.76 daniel 5550: */
1.171 daniel 5551: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5552: (!ctxt->disableSAX))
1.74 daniel 5553: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 5554:
5555: /*
1.140 daniel 5556: * Is there any internal subset declarations ?
5557: * they are handled separately in xmlParseInternalSubset()
5558: */
1.152 daniel 5559: if (RAW == '[')
1.140 daniel 5560: return;
5561:
5562: /*
5563: * We should be at the end of the DOCTYPE declaration.
5564: */
1.152 daniel 5565: if (RAW != '>') {
1.230 veillard 5566: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.140 daniel 5567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5568: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5569: ctxt->wellFormed = 0;
1.180 daniel 5570: ctxt->disableSAX = 1;
1.140 daniel 5571: }
5572: NEXT;
5573: }
5574:
5575: /**
1.181 daniel 5576: * xmlParseInternalsubset:
1.140 daniel 5577: * @ctxt: an XML parser context
5578: *
5579: * parse the internal subset declaration
5580: *
5581: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5582: */
5583:
5584: void
5585: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5586: /*
1.22 daniel 5587: * Is there any DTD definition ?
5588: */
1.152 daniel 5589: if (RAW == '[') {
1.96 daniel 5590: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 5591: NEXT;
1.22 daniel 5592: /*
5593: * Parse the succession of Markup declarations and
5594: * PEReferences.
5595: * Subsequence (markupdecl | PEReference | S)*
5596: */
1.152 daniel 5597: while (RAW != ']') {
1.123 daniel 5598: const xmlChar *check = CUR_PTR;
1.115 daniel 5599: int cons = ctxt->input->consumed;
1.22 daniel 5600:
1.42 daniel 5601: SKIP_BLANKS;
1.22 daniel 5602: xmlParseMarkupDecl(ctxt);
1.50 daniel 5603: xmlParsePEReference(ctxt);
1.22 daniel 5604:
1.115 daniel 5605: /*
5606: * Pop-up of finished entities.
5607: */
1.152 daniel 5608: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 5609: xmlPopInput(ctxt);
5610:
1.118 daniel 5611: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.230 veillard 5612: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5614: ctxt->sax->error(ctxt->userData,
1.140 daniel 5615: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 5616: ctxt->wellFormed = 0;
1.180 daniel 5617: ctxt->disableSAX = 1;
1.22 daniel 5618: break;
5619: }
5620: }
1.209 veillard 5621: if (RAW == ']') {
5622: NEXT;
5623: SKIP_BLANKS;
5624: }
1.22 daniel 5625: }
5626:
5627: /*
5628: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 5629: */
1.152 daniel 5630: if (RAW != '>') {
1.230 veillard 5631: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.55 daniel 5632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5633: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 5634: ctxt->wellFormed = 0;
1.180 daniel 5635: ctxt->disableSAX = 1;
1.21 daniel 5636: }
1.40 daniel 5637: NEXT;
1.21 daniel 5638: }
5639:
1.50 daniel 5640: /**
5641: * xmlParseAttribute:
5642: * @ctxt: an XML parser context
1.123 daniel 5643: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 5644: *
5645: * parse an attribute
1.3 veillard 5646: *
1.22 daniel 5647: * [41] Attribute ::= Name Eq AttValue
5648: *
1.98 daniel 5649: * [ WFC: No External Entity References ]
5650: * Attribute values cannot contain direct or indirect entity references
5651: * to external entities.
5652: *
5653: * [ WFC: No < in Attribute Values ]
5654: * The replacement text of any entity referred to directly or indirectly in
5655: * an attribute value (other than "<") must not contain a <.
5656: *
5657: * [ VC: Attribute Value Type ]
1.117 daniel 5658: * The attribute must have been declared; the value must be of the type
1.99 daniel 5659: * declared for it.
1.98 daniel 5660: *
1.22 daniel 5661: * [25] Eq ::= S? '=' S?
5662: *
1.29 daniel 5663: * With namespace:
5664: *
5665: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 5666: *
5667: * Also the case QName == xmlns:??? is handled independently as a namespace
5668: * definition.
1.69 daniel 5669: *
1.72 daniel 5670: * Returns the attribute name, and the value in *value.
1.3 veillard 5671: */
5672:
1.123 daniel 5673: xmlChar *
5674: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5675: xmlChar *name, *val;
1.3 veillard 5676:
1.72 daniel 5677: *value = NULL;
5678: name = xmlParseName(ctxt);
1.22 daniel 5679: if (name == NULL) {
1.230 veillard 5680: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5681: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5682: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 5683: ctxt->wellFormed = 0;
1.180 daniel 5684: ctxt->disableSAX = 1;
1.52 daniel 5685: return(NULL);
1.3 veillard 5686: }
5687:
5688: /*
1.29 daniel 5689: * read the value
1.3 veillard 5690: */
1.42 daniel 5691: SKIP_BLANKS;
1.152 daniel 5692: if (RAW == '=') {
1.40 daniel 5693: NEXT;
1.42 daniel 5694: SKIP_BLANKS;
1.72 daniel 5695: val = xmlParseAttValue(ctxt);
1.96 daniel 5696: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 5697: } else {
1.230 veillard 5698: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.55 daniel 5699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5700: ctxt->sax->error(ctxt->userData,
1.59 daniel 5701: "Specification mandate value for attribute %s\n", name);
5702: ctxt->wellFormed = 0;
1.180 daniel 5703: ctxt->disableSAX = 1;
1.170 daniel 5704: xmlFree(name);
1.52 daniel 5705: return(NULL);
1.43 daniel 5706: }
5707:
1.172 daniel 5708: /*
5709: * Check that xml:lang conforms to the specification
1.222 veillard 5710: * No more registered as an error, just generate a warning now
5711: * since this was deprecated in XML second edition
1.172 daniel 5712: */
1.229 veillard 5713: if ((ctxt->pedantic) && (!xmlStrcmp(name, BAD_CAST "xml:lang"))) {
1.172 daniel 5714: if (!xmlCheckLanguageID(val)) {
1.222 veillard 5715: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5716: ctxt->sax->warning(ctxt->userData,
5717: "Malformed value for xml:lang : %s\n", val);
1.172 daniel 5718: }
5719: }
5720:
1.176 daniel 5721: /*
5722: * Check that xml:space conforms to the specification
5723: */
5724: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
5725: if (!xmlStrcmp(val, BAD_CAST "default"))
5726: *(ctxt->space) = 0;
5727: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
5728: *(ctxt->space) = 1;
5729: else {
1.230 veillard 5730: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.176 daniel 5731: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5732: ctxt->sax->error(ctxt->userData,
5733: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5734: val);
5735: ctxt->wellFormed = 0;
1.180 daniel 5736: ctxt->disableSAX = 1;
1.176 daniel 5737: }
5738: }
5739:
1.72 daniel 5740: *value = val;
5741: return(name);
1.3 veillard 5742: }
5743:
1.50 daniel 5744: /**
5745: * xmlParseStartTag:
5746: * @ctxt: an XML parser context
5747: *
5748: * parse a start of tag either for rule element or
5749: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 5750: *
5751: * [40] STag ::= '<' Name (S Attribute)* S? '>'
5752: *
1.98 daniel 5753: * [ WFC: Unique Att Spec ]
5754: * No attribute name may appear more than once in the same start-tag or
5755: * empty-element tag.
5756: *
1.29 daniel 5757: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5758: *
1.98 daniel 5759: * [ WFC: Unique Att Spec ]
5760: * No attribute name may appear more than once in the same start-tag or
5761: * empty-element tag.
5762: *
1.29 daniel 5763: * With namespace:
5764: *
5765: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5766: *
5767: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 5768: *
1.192 daniel 5769: * Returns the element name parsed
1.2 veillard 5770: */
5771:
1.123 daniel 5772: xmlChar *
1.69 daniel 5773: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5774: xmlChar *name;
5775: xmlChar *attname;
5776: xmlChar *attvalue;
5777: const xmlChar **atts = NULL;
1.72 daniel 5778: int nbatts = 0;
5779: int maxatts = 0;
5780: int i;
1.2 veillard 5781:
1.152 daniel 5782: if (RAW != '<') return(NULL);
1.40 daniel 5783: NEXT;
1.3 veillard 5784:
1.72 daniel 5785: name = xmlParseName(ctxt);
1.59 daniel 5786: if (name == NULL) {
1.230 veillard 5787: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5788: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5789: ctxt->sax->error(ctxt->userData,
1.59 daniel 5790: "xmlParseStartTag: invalid element name\n");
5791: ctxt->wellFormed = 0;
1.180 daniel 5792: ctxt->disableSAX = 1;
1.83 daniel 5793: return(NULL);
1.50 daniel 5794: }
5795:
5796: /*
1.3 veillard 5797: * Now parse the attributes, it ends up with the ending
5798: *
5799: * (S Attribute)* S?
5800: */
1.42 daniel 5801: SKIP_BLANKS;
1.91 daniel 5802: GROW;
1.168 daniel 5803:
1.153 daniel 5804: while ((IS_CHAR(RAW)) &&
1.152 daniel 5805: (RAW != '>') &&
5806: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 5807: const xmlChar *q = CUR_PTR;
1.91 daniel 5808: int cons = ctxt->input->consumed;
1.29 daniel 5809:
1.72 daniel 5810: attname = xmlParseAttribute(ctxt, &attvalue);
5811: if ((attname != NULL) && (attvalue != NULL)) {
5812: /*
1.98 daniel 5813: * [ WFC: Unique Att Spec ]
5814: * No attribute name may appear more than once in the same
5815: * start-tag or empty-element tag.
1.72 daniel 5816: */
5817: for (i = 0; i < nbatts;i += 2) {
5818: if (!xmlStrcmp(atts[i], attname)) {
1.230 veillard 5819: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.72 daniel 5820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5821: ctxt->sax->error(ctxt->userData,
5822: "Attribute %s redefined\n",
5823: attname);
1.72 daniel 5824: ctxt->wellFormed = 0;
1.180 daniel 5825: ctxt->disableSAX = 1;
1.119 daniel 5826: xmlFree(attname);
5827: xmlFree(attvalue);
1.98 daniel 5828: goto failed;
1.72 daniel 5829: }
5830: }
5831:
5832: /*
5833: * Add the pair to atts
5834: */
5835: if (atts == NULL) {
5836: maxatts = 10;
1.123 daniel 5837: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5838: if (atts == NULL) {
1.86 daniel 5839: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 5840: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5841: return(NULL);
1.72 daniel 5842: }
1.127 daniel 5843: } else if (nbatts + 4 > maxatts) {
1.72 daniel 5844: maxatts *= 2;
1.123 daniel 5845: atts = (const xmlChar **) xmlRealloc(atts,
5846: maxatts * sizeof(xmlChar *));
1.72 daniel 5847: if (atts == NULL) {
1.86 daniel 5848: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 5849: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5850: return(NULL);
1.72 daniel 5851: }
5852: }
5853: atts[nbatts++] = attname;
5854: atts[nbatts++] = attvalue;
5855: atts[nbatts] = NULL;
5856: atts[nbatts + 1] = NULL;
1.176 daniel 5857: } else {
5858: if (attname != NULL)
5859: xmlFree(attname);
5860: if (attvalue != NULL)
5861: xmlFree(attvalue);
1.72 daniel 5862: }
5863:
1.116 daniel 5864: failed:
1.168 daniel 5865:
5866: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5867: break;
5868: if (!IS_BLANK(RAW)) {
1.230 veillard 5869: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.168 daniel 5870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5871: ctxt->sax->error(ctxt->userData,
5872: "attributes construct error\n");
5873: ctxt->wellFormed = 0;
1.180 daniel 5874: ctxt->disableSAX = 1;
1.168 daniel 5875: }
1.42 daniel 5876: SKIP_BLANKS;
1.91 daniel 5877: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.230 veillard 5878: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5880: ctxt->sax->error(ctxt->userData,
1.31 daniel 5881: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 5882: ctxt->wellFormed = 0;
1.180 daniel 5883: ctxt->disableSAX = 1;
1.29 daniel 5884: break;
1.3 veillard 5885: }
1.91 daniel 5886: GROW;
1.3 veillard 5887: }
5888:
1.43 daniel 5889: /*
1.72 daniel 5890: * SAX: Start of Element !
1.43 daniel 5891: */
1.171 daniel 5892: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5893: (!ctxt->disableSAX))
1.74 daniel 5894: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5895:
1.72 daniel 5896: if (atts != NULL) {
1.123 daniel 5897: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 5898: xmlFree(atts);
1.72 daniel 5899: }
1.83 daniel 5900: return(name);
1.3 veillard 5901: }
5902:
1.50 daniel 5903: /**
5904: * xmlParseEndTag:
5905: * @ctxt: an XML parser context
5906: *
5907: * parse an end of tag
1.27 daniel 5908: *
5909: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5910: *
5911: * With namespace
5912: *
1.72 daniel 5913: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5914: */
5915:
1.55 daniel 5916: void
1.140 daniel 5917: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5918: xmlChar *name;
1.140 daniel 5919: xmlChar *oldname;
1.7 veillard 5920:
1.91 daniel 5921: GROW;
1.152 daniel 5922: if ((RAW != '<') || (NXT(1) != '/')) {
1.230 veillard 5923: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.55 daniel 5924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5925: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5926: ctxt->wellFormed = 0;
1.180 daniel 5927: ctxt->disableSAX = 1;
1.27 daniel 5928: return;
5929: }
1.40 daniel 5930: SKIP(2);
1.7 veillard 5931:
1.72 daniel 5932: name = xmlParseName(ctxt);
1.7 veillard 5933:
5934: /*
5935: * We should definitely be at the ending "S? '>'" part
5936: */
1.91 daniel 5937: GROW;
1.42 daniel 5938: SKIP_BLANKS;
1.153 daniel 5939: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.230 veillard 5940: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 5941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5942: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 5943: ctxt->wellFormed = 0;
1.180 daniel 5944: ctxt->disableSAX = 1;
1.7 veillard 5945: } else
1.40 daniel 5946: NEXT;
1.7 veillard 5947:
1.72 daniel 5948: /*
1.98 daniel 5949: * [ WFC: Element Type Match ]
5950: * The Name in an element's end-tag must match the element type in the
5951: * start-tag.
5952: *
1.83 daniel 5953: */
1.147 daniel 5954: if ((name == NULL) || (ctxt->name == NULL) ||
5955: (xmlStrcmp(name, ctxt->name))) {
1.230 veillard 5956: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.147 daniel 5957: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
5958: if ((name != NULL) && (ctxt->name != NULL)) {
5959: ctxt->sax->error(ctxt->userData,
5960: "Opening and ending tag mismatch: %s and %s\n",
5961: ctxt->name, name);
5962: } else if (ctxt->name != NULL) {
5963: ctxt->sax->error(ctxt->userData,
5964: "Ending tag eror for: %s\n", ctxt->name);
5965: } else {
5966: ctxt->sax->error(ctxt->userData,
5967: "Ending tag error: internal error ???\n");
5968: }
1.122 daniel 5969:
1.147 daniel 5970: }
1.83 daniel 5971: ctxt->wellFormed = 0;
1.180 daniel 5972: ctxt->disableSAX = 1;
1.83 daniel 5973: }
5974:
5975: /*
1.72 daniel 5976: * SAX: End of Tag
5977: */
1.171 daniel 5978: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
5979: (!ctxt->disableSAX))
1.74 daniel 5980: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 5981:
5982: if (name != NULL)
1.119 daniel 5983: xmlFree(name);
1.140 daniel 5984: oldname = namePop(ctxt);
1.176 daniel 5985: spacePop(ctxt);
1.140 daniel 5986: if (oldname != NULL) {
5987: #ifdef DEBUG_STACK
5988: fprintf(stderr,"Close: popped %s\n", oldname);
5989: #endif
5990: xmlFree(oldname);
5991: }
1.7 veillard 5992: return;
5993: }
5994:
1.50 daniel 5995: /**
5996: * xmlParseCDSect:
5997: * @ctxt: an XML parser context
5998: *
5999: * Parse escaped pure raw content.
1.29 daniel 6000: *
6001: * [18] CDSect ::= CDStart CData CDEnd
6002: *
6003: * [19] CDStart ::= '<![CDATA['
6004: *
6005: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6006: *
6007: * [21] CDEnd ::= ']]>'
1.3 veillard 6008: */
1.55 daniel 6009: void
6010: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6011: xmlChar *buf = NULL;
6012: int len = 0;
1.140 daniel 6013: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6014: int r, rl;
6015: int s, sl;
6016: int cur, l;
1.3 veillard 6017:
1.106 daniel 6018: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6019: (NXT(2) == '[') && (NXT(3) == 'C') &&
6020: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6021: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6022: (NXT(8) == '[')) {
6023: SKIP(9);
1.29 daniel 6024: } else
1.45 daniel 6025: return;
1.109 daniel 6026:
6027: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6028: r = CUR_CHAR(rl);
6029: if (!IS_CHAR(r)) {
1.230 veillard 6030: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6032: ctxt->sax->error(ctxt->userData,
1.135 daniel 6033: "CData section not finished\n");
1.59 daniel 6034: ctxt->wellFormed = 0;
1.180 daniel 6035: ctxt->disableSAX = 1;
1.109 daniel 6036: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6037: return;
1.3 veillard 6038: }
1.152 daniel 6039: NEXTL(rl);
6040: s = CUR_CHAR(sl);
6041: if (!IS_CHAR(s)) {
1.230 veillard 6042: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6044: ctxt->sax->error(ctxt->userData,
1.135 daniel 6045: "CData section not finished\n");
1.59 daniel 6046: ctxt->wellFormed = 0;
1.180 daniel 6047: ctxt->disableSAX = 1;
1.109 daniel 6048: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6049: return;
1.3 veillard 6050: }
1.152 daniel 6051: NEXTL(sl);
6052: cur = CUR_CHAR(l);
1.135 daniel 6053: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6054: if (buf == NULL) {
6055: fprintf(stderr, "malloc of %d byte failed\n", size);
6056: return;
6057: }
1.108 veillard 6058: while (IS_CHAR(cur) &&
1.110 daniel 6059: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6060: if (len + 5 >= size) {
1.135 daniel 6061: size *= 2;
1.204 veillard 6062: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6063: if (buf == NULL) {
6064: fprintf(stderr, "realloc of %d byte failed\n", size);
6065: return;
6066: }
6067: }
1.152 daniel 6068: COPY_BUF(rl,buf,len,r);
1.110 daniel 6069: r = s;
1.152 daniel 6070: rl = sl;
1.110 daniel 6071: s = cur;
1.152 daniel 6072: sl = l;
6073: NEXTL(l);
6074: cur = CUR_CHAR(l);
1.3 veillard 6075: }
1.135 daniel 6076: buf[len] = 0;
1.109 daniel 6077: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6078: if (cur != '>') {
1.230 veillard 6079: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6081: ctxt->sax->error(ctxt->userData,
1.135 daniel 6082: "CData section not finished\n%.50s\n", buf);
1.59 daniel 6083: ctxt->wellFormed = 0;
1.180 daniel 6084: ctxt->disableSAX = 1;
1.135 daniel 6085: xmlFree(buf);
1.45 daniel 6086: return;
1.3 veillard 6087: }
1.152 daniel 6088: NEXTL(l);
1.16 daniel 6089:
1.45 daniel 6090: /*
1.135 daniel 6091: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6092: */
1.171 daniel 6093: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 6094: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6095: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6096: }
1.135 daniel 6097: xmlFree(buf);
1.2 veillard 6098: }
6099:
1.50 daniel 6100: /**
6101: * xmlParseContent:
6102: * @ctxt: an XML parser context
6103: *
6104: * Parse a content:
1.2 veillard 6105: *
1.27 daniel 6106: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6107: */
6108:
1.55 daniel 6109: void
6110: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6111: GROW;
1.176 daniel 6112: while (((RAW != 0) || (ctxt->token != 0)) &&
6113: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 6114: const xmlChar *test = CUR_PTR;
1.91 daniel 6115: int cons = ctxt->input->consumed;
1.123 daniel 6116: xmlChar tok = ctxt->token;
1.27 daniel 6117:
6118: /*
1.152 daniel 6119: * Handle possible processed charrefs.
6120: */
6121: if (ctxt->token != 0) {
6122: xmlParseCharData(ctxt, 0);
6123: }
6124: /*
1.27 daniel 6125: * First case : a Processing Instruction.
6126: */
1.152 daniel 6127: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6128: xmlParsePI(ctxt);
6129: }
1.72 daniel 6130:
1.27 daniel 6131: /*
6132: * Second case : a CDSection
6133: */
1.152 daniel 6134: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6135: (NXT(2) == '[') && (NXT(3) == 'C') &&
6136: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6137: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6138: (NXT(8) == '[')) {
1.45 daniel 6139: xmlParseCDSect(ctxt);
1.27 daniel 6140: }
1.72 daniel 6141:
1.27 daniel 6142: /*
6143: * Third case : a comment
6144: */
1.152 daniel 6145: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6146: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6147: xmlParseComment(ctxt);
1.97 daniel 6148: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6149: }
1.72 daniel 6150:
1.27 daniel 6151: /*
6152: * Fourth case : a sub-element.
6153: */
1.152 daniel 6154: else if (RAW == '<') {
1.72 daniel 6155: xmlParseElement(ctxt);
1.45 daniel 6156: }
1.72 daniel 6157:
1.45 daniel 6158: /*
1.50 daniel 6159: * Fifth case : a reference. If if has not been resolved,
6160: * parsing returns it's Name, create the node
1.45 daniel 6161: */
1.97 daniel 6162:
1.152 daniel 6163: else if (RAW == '&') {
1.77 daniel 6164: xmlParseReference(ctxt);
1.27 daniel 6165: }
1.72 daniel 6166:
1.27 daniel 6167: /*
6168: * Last case, text. Note that References are handled directly.
6169: */
6170: else {
1.45 daniel 6171: xmlParseCharData(ctxt, 0);
1.3 veillard 6172: }
1.14 veillard 6173:
1.91 daniel 6174: GROW;
1.14 veillard 6175: /*
1.45 daniel 6176: * Pop-up of finished entities.
1.14 veillard 6177: */
1.152 daniel 6178: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6179: xmlPopInput(ctxt);
1.135 daniel 6180: SHRINK;
1.45 daniel 6181:
1.113 daniel 6182: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6183: (tok == ctxt->token)) {
1.230 veillard 6184: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6186: ctxt->sax->error(ctxt->userData,
1.59 daniel 6187: "detected an error in element content\n");
6188: ctxt->wellFormed = 0;
1.180 daniel 6189: ctxt->disableSAX = 1;
1.224 veillard 6190: ctxt->instate = XML_PARSER_EOF;
1.29 daniel 6191: break;
6192: }
1.3 veillard 6193: }
1.2 veillard 6194: }
6195:
1.50 daniel 6196: /**
6197: * xmlParseElement:
6198: * @ctxt: an XML parser context
6199: *
6200: * parse an XML element, this is highly recursive
1.26 daniel 6201: *
6202: * [39] element ::= EmptyElemTag | STag content ETag
6203: *
1.98 daniel 6204: * [ WFC: Element Type Match ]
6205: * The Name in an element's end-tag must match the element type in the
6206: * start-tag.
6207: *
6208: * [ VC: Element Valid ]
1.117 daniel 6209: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6210: * where the Name matches the element type and one of the following holds:
6211: * - The declaration matches EMPTY and the element has no content.
6212: * - The declaration matches children and the sequence of child elements
6213: * belongs to the language generated by the regular expression in the
6214: * content model, with optional white space (characters matching the
6215: * nonterminal S) between each pair of child elements.
6216: * - The declaration matches Mixed and the content consists of character
6217: * data and child elements whose types match names in the content model.
6218: * - The declaration matches ANY, and the types of any child elements have
6219: * been declared.
1.2 veillard 6220: */
1.26 daniel 6221:
1.72 daniel 6222: void
1.69 daniel 6223: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6224: const xmlChar *openTag = CUR_PTR;
6225: xmlChar *name;
1.140 daniel 6226: xmlChar *oldname;
1.32 daniel 6227: xmlParserNodeInfo node_info;
1.118 daniel 6228: xmlNodePtr ret;
1.2 veillard 6229:
1.32 daniel 6230: /* Capture start position */
1.118 daniel 6231: if (ctxt->record_info) {
6232: node_info.begin_pos = ctxt->input->consumed +
6233: (CUR_PTR - ctxt->input->base);
6234: node_info.begin_line = ctxt->input->line;
6235: }
1.32 daniel 6236:
1.176 daniel 6237: if (ctxt->spaceNr == 0)
6238: spacePush(ctxt, -1);
6239: else
6240: spacePush(ctxt, *ctxt->space);
6241:
1.83 daniel 6242: name = xmlParseStartTag(ctxt);
6243: if (name == NULL) {
1.176 daniel 6244: spacePop(ctxt);
1.83 daniel 6245: return;
6246: }
1.140 daniel 6247: namePush(ctxt, name);
1.118 daniel 6248: ret = ctxt->node;
1.2 veillard 6249:
6250: /*
1.99 daniel 6251: * [ VC: Root Element Type ]
6252: * The Name in the document type declaration must match the element
6253: * type of the root element.
6254: */
1.105 daniel 6255: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6256: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6257: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6258:
6259: /*
1.2 veillard 6260: * Check for an Empty Element.
6261: */
1.152 daniel 6262: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6263: SKIP(2);
1.171 daniel 6264: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6265: (!ctxt->disableSAX))
1.83 daniel 6266: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6267: oldname = namePop(ctxt);
1.176 daniel 6268: spacePop(ctxt);
1.140 daniel 6269: if (oldname != NULL) {
6270: #ifdef DEBUG_STACK
6271: fprintf(stderr,"Close: popped %s\n", oldname);
6272: #endif
6273: xmlFree(oldname);
1.211 veillard 6274: }
6275: if ( ret != NULL && ctxt->record_info ) {
6276: node_info.end_pos = ctxt->input->consumed +
6277: (CUR_PTR - ctxt->input->base);
6278: node_info.end_line = ctxt->input->line;
6279: node_info.node = ret;
6280: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 6281: }
1.72 daniel 6282: return;
1.2 veillard 6283: }
1.152 daniel 6284: if (RAW == '>') {
1.91 daniel 6285: NEXT;
6286: } else {
1.230 veillard 6287: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6289: ctxt->sax->error(ctxt->userData,
6290: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6291: openTag);
1.59 daniel 6292: ctxt->wellFormed = 0;
1.180 daniel 6293: ctxt->disableSAX = 1;
1.45 daniel 6294:
6295: /*
6296: * end of parsing of this node.
6297: */
6298: nodePop(ctxt);
1.140 daniel 6299: oldname = namePop(ctxt);
1.176 daniel 6300: spacePop(ctxt);
1.140 daniel 6301: if (oldname != NULL) {
6302: #ifdef DEBUG_STACK
6303: fprintf(stderr,"Close: popped %s\n", oldname);
6304: #endif
6305: xmlFree(oldname);
6306: }
1.118 daniel 6307:
6308: /*
6309: * Capture end position and add node
6310: */
6311: if ( ret != NULL && ctxt->record_info ) {
6312: node_info.end_pos = ctxt->input->consumed +
6313: (CUR_PTR - ctxt->input->base);
6314: node_info.end_line = ctxt->input->line;
6315: node_info.node = ret;
6316: xmlParserAddNodeInfo(ctxt, &node_info);
6317: }
1.72 daniel 6318: return;
1.2 veillard 6319: }
6320:
6321: /*
6322: * Parse the content of the element:
6323: */
1.45 daniel 6324: xmlParseContent(ctxt);
1.153 daniel 6325: if (!IS_CHAR(RAW)) {
1.230 veillard 6326: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.55 daniel 6327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6328: ctxt->sax->error(ctxt->userData,
1.57 daniel 6329: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6330: ctxt->wellFormed = 0;
1.180 daniel 6331: ctxt->disableSAX = 1;
1.45 daniel 6332:
6333: /*
6334: * end of parsing of this node.
6335: */
6336: nodePop(ctxt);
1.140 daniel 6337: oldname = namePop(ctxt);
1.176 daniel 6338: spacePop(ctxt);
1.140 daniel 6339: if (oldname != NULL) {
6340: #ifdef DEBUG_STACK
6341: fprintf(stderr,"Close: popped %s\n", oldname);
6342: #endif
6343: xmlFree(oldname);
6344: }
1.72 daniel 6345: return;
1.2 veillard 6346: }
6347:
6348: /*
1.27 daniel 6349: * parse the end of tag: '</' should be here.
1.2 veillard 6350: */
1.140 daniel 6351: xmlParseEndTag(ctxt);
1.118 daniel 6352:
6353: /*
6354: * Capture end position and add node
6355: */
6356: if ( ret != NULL && ctxt->record_info ) {
6357: node_info.end_pos = ctxt->input->consumed +
6358: (CUR_PTR - ctxt->input->base);
6359: node_info.end_line = ctxt->input->line;
6360: node_info.node = ret;
6361: xmlParserAddNodeInfo(ctxt, &node_info);
6362: }
1.2 veillard 6363: }
6364:
1.50 daniel 6365: /**
6366: * xmlParseVersionNum:
6367: * @ctxt: an XML parser context
6368: *
6369: * parse the XML version value.
1.29 daniel 6370: *
6371: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6372: *
6373: * Returns the string giving the XML version number, or NULL
1.29 daniel 6374: */
1.123 daniel 6375: xmlChar *
1.55 daniel 6376: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6377: xmlChar *buf = NULL;
6378: int len = 0;
6379: int size = 10;
6380: xmlChar cur;
1.29 daniel 6381:
1.135 daniel 6382: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6383: if (buf == NULL) {
6384: fprintf(stderr, "malloc of %d byte failed\n", size);
6385: return(NULL);
6386: }
6387: cur = CUR;
1.152 daniel 6388: while (((cur >= 'a') && (cur <= 'z')) ||
6389: ((cur >= 'A') && (cur <= 'Z')) ||
6390: ((cur >= '0') && (cur <= '9')) ||
6391: (cur == '_') || (cur == '.') ||
6392: (cur == ':') || (cur == '-')) {
1.135 daniel 6393: if (len + 1 >= size) {
6394: size *= 2;
1.204 veillard 6395: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6396: if (buf == NULL) {
6397: fprintf(stderr, "realloc of %d byte failed\n", size);
6398: return(NULL);
6399: }
6400: }
6401: buf[len++] = cur;
6402: NEXT;
6403: cur=CUR;
6404: }
6405: buf[len] = 0;
6406: return(buf);
1.29 daniel 6407: }
6408:
1.50 daniel 6409: /**
6410: * xmlParseVersionInfo:
6411: * @ctxt: an XML parser context
6412: *
6413: * parse the XML version.
1.29 daniel 6414: *
6415: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6416: *
6417: * [25] Eq ::= S? '=' S?
1.50 daniel 6418: *
1.68 daniel 6419: * Returns the version string, e.g. "1.0"
1.29 daniel 6420: */
6421:
1.123 daniel 6422: xmlChar *
1.55 daniel 6423: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6424: xmlChar *version = NULL;
6425: const xmlChar *q;
1.29 daniel 6426:
1.152 daniel 6427: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6428: (NXT(2) == 'r') && (NXT(3) == 's') &&
6429: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6430: (NXT(6) == 'n')) {
6431: SKIP(7);
1.42 daniel 6432: SKIP_BLANKS;
1.152 daniel 6433: if (RAW != '=') {
1.230 veillard 6434: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6436: ctxt->sax->error(ctxt->userData,
6437: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6438: ctxt->wellFormed = 0;
1.180 daniel 6439: ctxt->disableSAX = 1;
1.31 daniel 6440: return(NULL);
6441: }
1.40 daniel 6442: NEXT;
1.42 daniel 6443: SKIP_BLANKS;
1.152 daniel 6444: if (RAW == '"') {
1.40 daniel 6445: NEXT;
6446: q = CUR_PTR;
1.29 daniel 6447: version = xmlParseVersionNum(ctxt);
1.152 daniel 6448: if (RAW != '"') {
1.230 veillard 6449: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6451: ctxt->sax->error(ctxt->userData,
6452: "String not closed\n%.50s\n", q);
1.59 daniel 6453: ctxt->wellFormed = 0;
1.180 daniel 6454: ctxt->disableSAX = 1;
1.55 daniel 6455: } else
1.40 daniel 6456: NEXT;
1.152 daniel 6457: } else if (RAW == '\''){
1.40 daniel 6458: NEXT;
6459: q = CUR_PTR;
1.29 daniel 6460: version = xmlParseVersionNum(ctxt);
1.152 daniel 6461: if (RAW != '\'') {
1.230 veillard 6462: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6464: ctxt->sax->error(ctxt->userData,
6465: "String not closed\n%.50s\n", q);
1.59 daniel 6466: ctxt->wellFormed = 0;
1.180 daniel 6467: ctxt->disableSAX = 1;
1.55 daniel 6468: } else
1.40 daniel 6469: NEXT;
1.31 daniel 6470: } else {
1.230 veillard 6471: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6472: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6473: ctxt->sax->error(ctxt->userData,
1.59 daniel 6474: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6475: ctxt->wellFormed = 0;
1.180 daniel 6476: ctxt->disableSAX = 1;
1.29 daniel 6477: }
6478: }
6479: return(version);
6480: }
6481:
1.50 daniel 6482: /**
6483: * xmlParseEncName:
6484: * @ctxt: an XML parser context
6485: *
6486: * parse the XML encoding name
1.29 daniel 6487: *
6488: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 6489: *
1.68 daniel 6490: * Returns the encoding name value or NULL
1.29 daniel 6491: */
1.123 daniel 6492: xmlChar *
1.55 daniel 6493: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 6494: xmlChar *buf = NULL;
6495: int len = 0;
6496: int size = 10;
6497: xmlChar cur;
1.29 daniel 6498:
1.135 daniel 6499: cur = CUR;
6500: if (((cur >= 'a') && (cur <= 'z')) ||
6501: ((cur >= 'A') && (cur <= 'Z'))) {
6502: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6503: if (buf == NULL) {
6504: fprintf(stderr, "malloc of %d byte failed\n", size);
6505: return(NULL);
6506: }
6507:
6508: buf[len++] = cur;
1.40 daniel 6509: NEXT;
1.135 daniel 6510: cur = CUR;
1.152 daniel 6511: while (((cur >= 'a') && (cur <= 'z')) ||
6512: ((cur >= 'A') && (cur <= 'Z')) ||
6513: ((cur >= '0') && (cur <= '9')) ||
6514: (cur == '.') || (cur == '_') ||
6515: (cur == '-')) {
1.135 daniel 6516: if (len + 1 >= size) {
6517: size *= 2;
1.204 veillard 6518: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6519: if (buf == NULL) {
6520: fprintf(stderr, "realloc of %d byte failed\n", size);
6521: return(NULL);
6522: }
6523: }
6524: buf[len++] = cur;
6525: NEXT;
6526: cur = CUR;
6527: if (cur == 0) {
6528: SHRINK;
6529: GROW;
6530: cur = CUR;
6531: }
6532: }
6533: buf[len] = 0;
1.29 daniel 6534: } else {
1.230 veillard 6535: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.55 daniel 6536: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6537: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 6538: ctxt->wellFormed = 0;
1.180 daniel 6539: ctxt->disableSAX = 1;
1.29 daniel 6540: }
1.135 daniel 6541: return(buf);
1.29 daniel 6542: }
6543:
1.50 daniel 6544: /**
6545: * xmlParseEncodingDecl:
6546: * @ctxt: an XML parser context
6547: *
6548: * parse the XML encoding declaration
1.29 daniel 6549: *
6550: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 6551: *
1.229 veillard 6552: * this setups the conversion filters.
1.50 daniel 6553: *
1.68 daniel 6554: * Returns the encoding value or NULL
1.29 daniel 6555: */
6556:
1.123 daniel 6557: xmlChar *
1.55 daniel 6558: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6559: xmlChar *encoding = NULL;
6560: const xmlChar *q;
1.29 daniel 6561:
1.42 daniel 6562: SKIP_BLANKS;
1.152 daniel 6563: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 6564: (NXT(2) == 'c') && (NXT(3) == 'o') &&
6565: (NXT(4) == 'd') && (NXT(5) == 'i') &&
6566: (NXT(6) == 'n') && (NXT(7) == 'g')) {
6567: SKIP(8);
1.42 daniel 6568: SKIP_BLANKS;
1.152 daniel 6569: if (RAW != '=') {
1.230 veillard 6570: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6572: ctxt->sax->error(ctxt->userData,
6573: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 6574: ctxt->wellFormed = 0;
1.180 daniel 6575: ctxt->disableSAX = 1;
1.31 daniel 6576: return(NULL);
6577: }
1.40 daniel 6578: NEXT;
1.42 daniel 6579: SKIP_BLANKS;
1.152 daniel 6580: if (RAW == '"') {
1.40 daniel 6581: NEXT;
6582: q = CUR_PTR;
1.29 daniel 6583: encoding = xmlParseEncName(ctxt);
1.152 daniel 6584: if (RAW != '"') {
1.230 veillard 6585: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6586: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6587: ctxt->sax->error(ctxt->userData,
6588: "String not closed\n%.50s\n", q);
1.59 daniel 6589: ctxt->wellFormed = 0;
1.180 daniel 6590: ctxt->disableSAX = 1;
1.55 daniel 6591: } else
1.40 daniel 6592: NEXT;
1.152 daniel 6593: } else if (RAW == '\''){
1.40 daniel 6594: NEXT;
6595: q = CUR_PTR;
1.29 daniel 6596: encoding = xmlParseEncName(ctxt);
1.152 daniel 6597: if (RAW != '\'') {
1.230 veillard 6598: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6600: ctxt->sax->error(ctxt->userData,
6601: "String not closed\n%.50s\n", q);
1.59 daniel 6602: ctxt->wellFormed = 0;
1.180 daniel 6603: ctxt->disableSAX = 1;
1.55 daniel 6604: } else
1.40 daniel 6605: NEXT;
1.152 daniel 6606: } else if (RAW == '"'){
1.230 veillard 6607: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6609: ctxt->sax->error(ctxt->userData,
1.59 daniel 6610: "xmlParseEncodingDecl : expected ' or \"\n");
6611: ctxt->wellFormed = 0;
1.180 daniel 6612: ctxt->disableSAX = 1;
1.29 daniel 6613: }
1.193 daniel 6614: if (encoding != NULL) {
6615: xmlCharEncoding enc;
6616: xmlCharEncodingHandlerPtr handler;
6617:
1.195 daniel 6618: if (ctxt->input->encoding != NULL)
6619: xmlFree((xmlChar *) ctxt->input->encoding);
6620: ctxt->input->encoding = encoding;
6621:
1.193 daniel 6622: enc = xmlParseCharEncoding((const char *) encoding);
6623: /*
6624: * registered set of known encodings
6625: */
6626: if (enc != XML_CHAR_ENCODING_ERROR) {
6627: xmlSwitchEncoding(ctxt, enc);
6628: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6629: xmlFree(encoding);
6630: return(NULL);
6631: }
6632: } else {
6633: /*
6634: * fallback for unknown encodings
6635: */
6636: handler = xmlFindCharEncodingHandler((const char *) encoding);
6637: if (handler != NULL) {
6638: xmlSwitchToEncoding(ctxt, handler);
6639: } else {
6640: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 6641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6642: ctxt->sax->error(ctxt->userData,
6643: "Unsupported encoding %s\n", encoding);
1.193 daniel 6644: return(NULL);
6645: }
6646: }
6647: }
1.29 daniel 6648: }
6649: return(encoding);
6650: }
6651:
1.50 daniel 6652: /**
6653: * xmlParseSDDecl:
6654: * @ctxt: an XML parser context
6655: *
6656: * parse the XML standalone declaration
1.29 daniel 6657: *
6658: * [32] SDDecl ::= S 'standalone' Eq
6659: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 6660: *
6661: * [ VC: Standalone Document Declaration ]
6662: * TODO The standalone document declaration must have the value "no"
6663: * if any external markup declarations contain declarations of:
6664: * - attributes with default values, if elements to which these
6665: * attributes apply appear in the document without specifications
6666: * of values for these attributes, or
6667: * - entities (other than amp, lt, gt, apos, quot), if references
6668: * to those entities appear in the document, or
6669: * - attributes with values subject to normalization, where the
6670: * attribute appears in the document with a value which will change
6671: * as a result of normalization, or
6672: * - element types with element content, if white space occurs directly
6673: * within any instance of those types.
1.68 daniel 6674: *
6675: * Returns 1 if standalone, 0 otherwise
1.29 daniel 6676: */
6677:
1.55 daniel 6678: int
6679: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 6680: int standalone = -1;
6681:
1.42 daniel 6682: SKIP_BLANKS;
1.152 daniel 6683: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 6684: (NXT(2) == 'a') && (NXT(3) == 'n') &&
6685: (NXT(4) == 'd') && (NXT(5) == 'a') &&
6686: (NXT(6) == 'l') && (NXT(7) == 'o') &&
6687: (NXT(8) == 'n') && (NXT(9) == 'e')) {
6688: SKIP(10);
1.81 daniel 6689: SKIP_BLANKS;
1.152 daniel 6690: if (RAW != '=') {
1.230 veillard 6691: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6693: ctxt->sax->error(ctxt->userData,
1.59 daniel 6694: "XML standalone declaration : expected '='\n");
6695: ctxt->wellFormed = 0;
1.180 daniel 6696: ctxt->disableSAX = 1;
1.32 daniel 6697: return(standalone);
6698: }
1.40 daniel 6699: NEXT;
1.42 daniel 6700: SKIP_BLANKS;
1.152 daniel 6701: if (RAW == '\''){
1.40 daniel 6702: NEXT;
1.152 daniel 6703: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6704: standalone = 0;
1.40 daniel 6705: SKIP(2);
1.152 daniel 6706: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6707: (NXT(2) == 's')) {
1.29 daniel 6708: standalone = 1;
1.40 daniel 6709: SKIP(3);
1.29 daniel 6710: } else {
1.230 veillard 6711: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6712: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6713: ctxt->sax->error(ctxt->userData,
6714: "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 6715: ctxt->wellFormed = 0;
1.180 daniel 6716: ctxt->disableSAX = 1;
1.29 daniel 6717: }
1.152 daniel 6718: if (RAW != '\'') {
1.230 veillard 6719: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6721: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6722: ctxt->wellFormed = 0;
1.180 daniel 6723: ctxt->disableSAX = 1;
1.55 daniel 6724: } else
1.40 daniel 6725: NEXT;
1.152 daniel 6726: } else if (RAW == '"'){
1.40 daniel 6727: NEXT;
1.152 daniel 6728: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6729: standalone = 0;
1.40 daniel 6730: SKIP(2);
1.152 daniel 6731: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6732: (NXT(2) == 's')) {
1.29 daniel 6733: standalone = 1;
1.40 daniel 6734: SKIP(3);
1.29 daniel 6735: } else {
1.230 veillard 6736: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6738: ctxt->sax->error(ctxt->userData,
1.59 daniel 6739: "standalone accepts only 'yes' or 'no'\n");
6740: ctxt->wellFormed = 0;
1.180 daniel 6741: ctxt->disableSAX = 1;
1.29 daniel 6742: }
1.152 daniel 6743: if (RAW != '"') {
1.230 veillard 6744: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6746: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6747: ctxt->wellFormed = 0;
1.180 daniel 6748: ctxt->disableSAX = 1;
1.55 daniel 6749: } else
1.40 daniel 6750: NEXT;
1.37 daniel 6751: } else {
1.230 veillard 6752: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6754: ctxt->sax->error(ctxt->userData,
6755: "Standalone value not found\n");
1.59 daniel 6756: ctxt->wellFormed = 0;
1.180 daniel 6757: ctxt->disableSAX = 1;
1.37 daniel 6758: }
1.29 daniel 6759: }
6760: return(standalone);
6761: }
6762:
1.50 daniel 6763: /**
6764: * xmlParseXMLDecl:
6765: * @ctxt: an XML parser context
6766: *
6767: * parse an XML declaration header
1.29 daniel 6768: *
6769: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 6770: */
6771:
1.55 daniel 6772: void
6773: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6774: xmlChar *version;
1.1 veillard 6775:
6776: /*
1.19 daniel 6777: * We know that '<?xml' is here.
1.1 veillard 6778: */
1.40 daniel 6779: SKIP(5);
1.1 veillard 6780:
1.153 daniel 6781: if (!IS_BLANK(RAW)) {
1.230 veillard 6782: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6783: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6784: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 6785: ctxt->wellFormed = 0;
1.180 daniel 6786: ctxt->disableSAX = 1;
1.59 daniel 6787: }
1.42 daniel 6788: SKIP_BLANKS;
1.1 veillard 6789:
6790: /*
1.29 daniel 6791: * We should have the VersionInfo here.
1.1 veillard 6792: */
1.29 daniel 6793: version = xmlParseVersionInfo(ctxt);
6794: if (version == NULL)
1.45 daniel 6795: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 6796: ctxt->version = xmlStrdup(version);
1.119 daniel 6797: xmlFree(version);
1.29 daniel 6798:
6799: /*
6800: * We may have the encoding declaration
6801: */
1.153 daniel 6802: if (!IS_BLANK(RAW)) {
1.152 daniel 6803: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6804: SKIP(2);
6805: return;
6806: }
1.230 veillard 6807: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6809: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6810: ctxt->wellFormed = 0;
1.180 daniel 6811: ctxt->disableSAX = 1;
1.59 daniel 6812: }
1.195 daniel 6813: xmlParseEncodingDecl(ctxt);
1.193 daniel 6814: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6815: /*
6816: * The XML REC instructs us to stop parsing right here
6817: */
6818: return;
6819: }
1.1 veillard 6820:
6821: /*
1.29 daniel 6822: * We may have the standalone status.
1.1 veillard 6823: */
1.164 daniel 6824: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 6825: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6826: SKIP(2);
6827: return;
6828: }
1.230 veillard 6829: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6830: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6831: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6832: ctxt->wellFormed = 0;
1.180 daniel 6833: ctxt->disableSAX = 1;
1.59 daniel 6834: }
6835: SKIP_BLANKS;
1.167 daniel 6836: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 6837:
1.42 daniel 6838: SKIP_BLANKS;
1.152 daniel 6839: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 6840: SKIP(2);
1.152 daniel 6841: } else if (RAW == '>') {
1.31 daniel 6842: /* Deprecated old WD ... */
1.230 veillard 6843: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6845: ctxt->sax->error(ctxt->userData,
6846: "XML declaration must end-up with '?>'\n");
1.59 daniel 6847: ctxt->wellFormed = 0;
1.180 daniel 6848: ctxt->disableSAX = 1;
1.40 daniel 6849: NEXT;
1.29 daniel 6850: } else {
1.230 veillard 6851: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6853: ctxt->sax->error(ctxt->userData,
6854: "parsing XML declaration: '?>' expected\n");
1.59 daniel 6855: ctxt->wellFormed = 0;
1.180 daniel 6856: ctxt->disableSAX = 1;
1.40 daniel 6857: MOVETO_ENDTAG(CUR_PTR);
6858: NEXT;
1.29 daniel 6859: }
1.1 veillard 6860: }
6861:
1.50 daniel 6862: /**
6863: * xmlParseMisc:
6864: * @ctxt: an XML parser context
6865: *
6866: * parse an XML Misc* optionnal field.
1.21 daniel 6867: *
1.22 daniel 6868: * [27] Misc ::= Comment | PI | S
1.1 veillard 6869: */
6870:
1.55 daniel 6871: void
6872: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 6873: while (((RAW == '<') && (NXT(1) == '?')) ||
6874: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6875: (NXT(2) == '-') && (NXT(3) == '-')) ||
6876: IS_BLANK(CUR)) {
1.152 daniel 6877: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 6878: xmlParsePI(ctxt);
1.40 daniel 6879: } else if (IS_BLANK(CUR)) {
6880: NEXT;
1.1 veillard 6881: } else
1.114 daniel 6882: xmlParseComment(ctxt);
1.1 veillard 6883: }
6884: }
6885:
1.50 daniel 6886: /**
1.181 daniel 6887: * xmlParseDocument:
1.50 daniel 6888: * @ctxt: an XML parser context
6889: *
6890: * parse an XML document (and build a tree if using the standard SAX
6891: * interface).
1.21 daniel 6892: *
1.22 daniel 6893: * [1] document ::= prolog element Misc*
1.29 daniel 6894: *
6895: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 6896: *
1.68 daniel 6897: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 6898: * as a result of the parsing.
1.1 veillard 6899: */
6900:
1.55 daniel 6901: int
6902: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 6903: xmlChar start[4];
6904: xmlCharEncoding enc;
6905:
1.45 daniel 6906: xmlDefaultSAXHandlerInit();
6907:
1.91 daniel 6908: GROW;
6909:
1.14 veillard 6910: /*
1.44 daniel 6911: * SAX: beginning of the document processing.
6912: */
1.72 daniel 6913: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 6914: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 6915:
1.156 daniel 6916: /*
6917: * Get the 4 first bytes and decode the charset
6918: * if enc != XML_CHAR_ENCODING_NONE
6919: * plug some encoding conversion routines.
6920: */
6921: start[0] = RAW;
6922: start[1] = NXT(1);
6923: start[2] = NXT(2);
6924: start[3] = NXT(3);
6925: enc = xmlDetectCharEncoding(start, 4);
6926: if (enc != XML_CHAR_ENCODING_NONE) {
6927: xmlSwitchEncoding(ctxt, enc);
6928: }
6929:
1.1 veillard 6930:
1.59 daniel 6931: if (CUR == 0) {
1.230 veillard 6932: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 6933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6934: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 6935: ctxt->wellFormed = 0;
1.180 daniel 6936: ctxt->disableSAX = 1;
1.59 daniel 6937: }
1.1 veillard 6938:
6939: /*
6940: * Check for the XMLDecl in the Prolog.
6941: */
1.91 daniel 6942: GROW;
1.152 daniel 6943: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 6944: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 6945: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 6946:
6947: /*
6948: * Note that we will switch encoding on the fly.
6949: */
1.19 daniel 6950: xmlParseXMLDecl(ctxt);
1.193 daniel 6951: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6952: /*
6953: * The XML REC instructs us to stop parsing right here
6954: */
6955: return(-1);
6956: }
1.167 daniel 6957: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 6958: SKIP_BLANKS;
1.1 veillard 6959: } else {
1.72 daniel 6960: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 6961: }
1.171 daniel 6962: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 6963: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 6964:
6965: /*
6966: * The Misc part of the Prolog
6967: */
1.91 daniel 6968: GROW;
1.16 daniel 6969: xmlParseMisc(ctxt);
1.1 veillard 6970:
6971: /*
1.29 daniel 6972: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 6973: * (doctypedecl Misc*)?
6974: */
1.91 daniel 6975: GROW;
1.152 daniel 6976: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6977: (NXT(2) == 'D') && (NXT(3) == 'O') &&
6978: (NXT(4) == 'C') && (NXT(5) == 'T') &&
6979: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
6980: (NXT(8) == 'E')) {
1.165 daniel 6981:
1.166 daniel 6982: ctxt->inSubset = 1;
1.22 daniel 6983: xmlParseDocTypeDecl(ctxt);
1.152 daniel 6984: if (RAW == '[') {
1.140 daniel 6985: ctxt->instate = XML_PARSER_DTD;
6986: xmlParseInternalSubset(ctxt);
6987: }
1.165 daniel 6988:
6989: /*
6990: * Create and update the external subset.
6991: */
1.166 daniel 6992: ctxt->inSubset = 2;
1.171 daniel 6993: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
6994: (!ctxt->disableSAX))
1.165 daniel 6995: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
6996: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 6997: ctxt->inSubset = 0;
1.165 daniel 6998:
6999:
1.96 daniel 7000: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7001: xmlParseMisc(ctxt);
1.21 daniel 7002: }
7003:
7004: /*
7005: * Time to start parsing the tree itself
1.1 veillard 7006: */
1.91 daniel 7007: GROW;
1.152 daniel 7008: if (RAW != '<') {
1.230 veillard 7009: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7011: ctxt->sax->error(ctxt->userData,
1.151 daniel 7012: "Start tag expected, '<' not found\n");
1.59 daniel 7013: ctxt->wellFormed = 0;
1.180 daniel 7014: ctxt->disableSAX = 1;
1.140 daniel 7015: ctxt->instate = XML_PARSER_EOF;
7016: } else {
7017: ctxt->instate = XML_PARSER_CONTENT;
7018: xmlParseElement(ctxt);
7019: ctxt->instate = XML_PARSER_EPILOG;
7020:
7021:
7022: /*
7023: * The Misc part at the end
7024: */
7025: xmlParseMisc(ctxt);
7026:
1.152 daniel 7027: if (RAW != 0) {
1.230 veillard 7028: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7029: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7030: ctxt->sax->error(ctxt->userData,
7031: "Extra content at the end of the document\n");
7032: ctxt->wellFormed = 0;
1.180 daniel 7033: ctxt->disableSAX = 1;
1.140 daniel 7034: }
7035: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7036: }
7037:
1.44 daniel 7038: /*
7039: * SAX: end of the document processing.
7040: */
1.171 daniel 7041: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7042: (!ctxt->disableSAX))
1.74 daniel 7043: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7044:
1.59 daniel 7045: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7046: return(0);
7047: }
7048:
1.229 veillard 7049: /**
7050: * xmlParseExtParsedEnt:
7051: * @ctxt: an XML parser context
7052: *
7053: * parse a genreral parsed entity
7054: * An external general parsed entity is well-formed if it matches the
7055: * production labeled extParsedEnt.
7056: *
7057: * [78] extParsedEnt ::= TextDecl? content
7058: *
7059: * Returns 0, -1 in case of error. the parser context is augmented
7060: * as a result of the parsing.
7061: */
7062:
7063: int
7064: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7065: xmlChar start[4];
7066: xmlCharEncoding enc;
7067:
7068: xmlDefaultSAXHandlerInit();
7069:
7070: GROW;
7071:
7072: /*
7073: * SAX: beginning of the document processing.
7074: */
7075: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7076: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7077:
7078: /*
7079: * Get the 4 first bytes and decode the charset
7080: * if enc != XML_CHAR_ENCODING_NONE
7081: * plug some encoding conversion routines.
7082: */
7083: start[0] = RAW;
7084: start[1] = NXT(1);
7085: start[2] = NXT(2);
7086: start[3] = NXT(3);
7087: enc = xmlDetectCharEncoding(start, 4);
7088: if (enc != XML_CHAR_ENCODING_NONE) {
7089: xmlSwitchEncoding(ctxt, enc);
7090: }
7091:
7092:
7093: if (CUR == 0) {
1.230 veillard 7094: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.229 veillard 7095: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7096: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7097: ctxt->wellFormed = 0;
7098: ctxt->disableSAX = 1;
7099: }
7100:
7101: /*
7102: * Check for the XMLDecl in the Prolog.
7103: */
7104: GROW;
7105: if ((RAW == '<') && (NXT(1) == '?') &&
7106: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7107: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7108:
7109: /*
7110: * Note that we will switch encoding on the fly.
7111: */
7112: xmlParseXMLDecl(ctxt);
7113: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7114: /*
7115: * The XML REC instructs us to stop parsing right here
7116: */
7117: return(-1);
7118: }
7119: SKIP_BLANKS;
7120: } else {
7121: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7122: }
7123: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7124: ctxt->sax->startDocument(ctxt->userData);
7125:
7126: /*
7127: * Doing validity checking on chunk doesn't make sense
7128: */
7129: ctxt->instate = XML_PARSER_CONTENT;
7130: ctxt->validate = 0;
7131: ctxt->depth = 0;
7132:
7133: xmlParseContent(ctxt);
7134:
7135: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 7136: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.229 veillard 7137: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7138: ctxt->sax->error(ctxt->userData,
7139: "chunk is not well balanced\n");
7140: ctxt->wellFormed = 0;
7141: ctxt->disableSAX = 1;
7142: } else if (RAW != 0) {
1.230 veillard 7143: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.229 veillard 7144: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7145: ctxt->sax->error(ctxt->userData,
7146: "extra content at the end of well balanced chunk\n");
7147: ctxt->wellFormed = 0;
7148: ctxt->disableSAX = 1;
7149: }
7150:
7151: /*
7152: * SAX: end of the document processing.
7153: */
7154: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7155: (!ctxt->disableSAX))
7156: ctxt->sax->endDocument(ctxt->userData);
7157:
7158: if (! ctxt->wellFormed) return(-1);
7159: return(0);
7160: }
7161:
1.98 daniel 7162: /************************************************************************
7163: * *
1.128 daniel 7164: * Progressive parsing interfaces *
7165: * *
7166: ************************************************************************/
7167:
7168: /**
7169: * xmlParseLookupSequence:
7170: * @ctxt: an XML parser context
7171: * @first: the first char to lookup
1.140 daniel 7172: * @next: the next char to lookup or zero
7173: * @third: the next char to lookup or zero
1.128 daniel 7174: *
1.140 daniel 7175: * Try to find if a sequence (first, next, third) or just (first next) or
7176: * (first) is available in the input stream.
7177: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7178: * to avoid rescanning sequences of bytes, it DOES change the state of the
7179: * parser, do not use liberally.
1.128 daniel 7180: *
1.140 daniel 7181: * Returns the index to the current parsing point if the full sequence
7182: * is available, -1 otherwise.
1.128 daniel 7183: */
7184: int
1.140 daniel 7185: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7186: xmlChar next, xmlChar third) {
7187: int base, len;
7188: xmlParserInputPtr in;
7189: const xmlChar *buf;
7190:
7191: in = ctxt->input;
7192: if (in == NULL) return(-1);
7193: base = in->cur - in->base;
7194: if (base < 0) return(-1);
7195: if (ctxt->checkIndex > base)
7196: base = ctxt->checkIndex;
7197: if (in->buf == NULL) {
7198: buf = in->base;
7199: len = in->length;
7200: } else {
7201: buf = in->buf->buffer->content;
7202: len = in->buf->buffer->use;
7203: }
7204: /* take into account the sequence length */
7205: if (third) len -= 2;
7206: else if (next) len --;
7207: for (;base < len;base++) {
7208: if (buf[base] == first) {
7209: if (third != 0) {
7210: if ((buf[base + 1] != next) ||
7211: (buf[base + 2] != third)) continue;
7212: } else if (next != 0) {
7213: if (buf[base + 1] != next) continue;
7214: }
7215: ctxt->checkIndex = 0;
7216: #ifdef DEBUG_PUSH
7217: if (next == 0)
7218: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7219: first, base);
7220: else if (third == 0)
7221: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7222: first, next, base);
7223: else
7224: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7225: first, next, third, base);
7226: #endif
7227: return(base - (in->cur - in->base));
7228: }
7229: }
7230: ctxt->checkIndex = base;
7231: #ifdef DEBUG_PUSH
7232: if (next == 0)
7233: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7234: else if (third == 0)
7235: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7236: else
7237: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7238: #endif
7239: return(-1);
1.128 daniel 7240: }
7241:
7242: /**
1.143 daniel 7243: * xmlParseTryOrFinish:
1.128 daniel 7244: * @ctxt: an XML parser context
1.143 daniel 7245: * @terminate: last chunk indicator
1.128 daniel 7246: *
7247: * Try to progress on parsing
7248: *
7249: * Returns zero if no parsing was possible
7250: */
7251: int
1.143 daniel 7252: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7253: int ret = 0;
1.140 daniel 7254: int avail;
7255: xmlChar cur, next;
7256:
7257: #ifdef DEBUG_PUSH
7258: switch (ctxt->instate) {
7259: case XML_PARSER_EOF:
7260: fprintf(stderr, "PP: try EOF\n"); break;
7261: case XML_PARSER_START:
7262: fprintf(stderr, "PP: try START\n"); break;
7263: case XML_PARSER_MISC:
7264: fprintf(stderr, "PP: try MISC\n");break;
7265: case XML_PARSER_COMMENT:
7266: fprintf(stderr, "PP: try COMMENT\n");break;
7267: case XML_PARSER_PROLOG:
7268: fprintf(stderr, "PP: try PROLOG\n");break;
7269: case XML_PARSER_START_TAG:
7270: fprintf(stderr, "PP: try START_TAG\n");break;
7271: case XML_PARSER_CONTENT:
7272: fprintf(stderr, "PP: try CONTENT\n");break;
7273: case XML_PARSER_CDATA_SECTION:
7274: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7275: case XML_PARSER_END_TAG:
7276: fprintf(stderr, "PP: try END_TAG\n");break;
7277: case XML_PARSER_ENTITY_DECL:
7278: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7279: case XML_PARSER_ENTITY_VALUE:
7280: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7281: case XML_PARSER_ATTRIBUTE_VALUE:
7282: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7283: case XML_PARSER_DTD:
7284: fprintf(stderr, "PP: try DTD\n");break;
7285: case XML_PARSER_EPILOG:
7286: fprintf(stderr, "PP: try EPILOG\n");break;
7287: case XML_PARSER_PI:
7288: fprintf(stderr, "PP: try PI\n");break;
7289: }
7290: #endif
1.128 daniel 7291:
7292: while (1) {
1.140 daniel 7293: /*
7294: * Pop-up of finished entities.
7295: */
1.152 daniel 7296: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7297: xmlPopInput(ctxt);
7298:
1.184 daniel 7299: if (ctxt->input ==NULL) break;
7300: if (ctxt->input->buf == NULL)
7301: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7302: else
1.184 daniel 7303: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7304: if (avail < 1)
7305: goto done;
1.128 daniel 7306: switch (ctxt->instate) {
7307: case XML_PARSER_EOF:
1.140 daniel 7308: /*
7309: * Document parsing is done !
7310: */
7311: goto done;
7312: case XML_PARSER_START:
7313: /*
7314: * Very first chars read from the document flow.
7315: */
1.184 daniel 7316: cur = ctxt->input->cur[0];
1.140 daniel 7317: if (IS_BLANK(cur)) {
7318: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7319: ctxt->sax->setDocumentLocator(ctxt->userData,
7320: &xmlDefaultSAXLocator);
1.230 veillard 7321: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.140 daniel 7322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7323: ctxt->sax->error(ctxt->userData,
7324: "Extra spaces at the beginning of the document are not allowed\n");
7325: ctxt->wellFormed = 0;
1.180 daniel 7326: ctxt->disableSAX = 1;
1.140 daniel 7327: SKIP_BLANKS;
7328: ret++;
1.184 daniel 7329: if (ctxt->input->buf == NULL)
7330: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7331: else
1.184 daniel 7332: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7333: }
7334: if (avail < 2)
7335: goto done;
7336:
1.184 daniel 7337: cur = ctxt->input->cur[0];
7338: next = ctxt->input->cur[1];
1.140 daniel 7339: if (cur == 0) {
7340: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7341: ctxt->sax->setDocumentLocator(ctxt->userData,
7342: &xmlDefaultSAXLocator);
1.230 veillard 7343: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7345: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7346: ctxt->wellFormed = 0;
1.180 daniel 7347: ctxt->disableSAX = 1;
1.140 daniel 7348: ctxt->instate = XML_PARSER_EOF;
7349: #ifdef DEBUG_PUSH
7350: fprintf(stderr, "PP: entering EOF\n");
7351: #endif
7352: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7353: ctxt->sax->endDocument(ctxt->userData);
7354: goto done;
7355: }
7356: if ((cur == '<') && (next == '?')) {
7357: /* PI or XML decl */
7358: if (avail < 5) return(ret);
1.143 daniel 7359: if ((!terminate) &&
7360: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7361: return(ret);
7362: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7363: ctxt->sax->setDocumentLocator(ctxt->userData,
7364: &xmlDefaultSAXLocator);
1.184 daniel 7365: if ((ctxt->input->cur[2] == 'x') &&
7366: (ctxt->input->cur[3] == 'm') &&
7367: (ctxt->input->cur[4] == 'l') &&
7368: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 7369: ret += 5;
7370: #ifdef DEBUG_PUSH
7371: fprintf(stderr, "PP: Parsing XML Decl\n");
7372: #endif
7373: xmlParseXMLDecl(ctxt);
1.193 daniel 7374: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7375: /*
7376: * The XML REC instructs us to stop parsing right
7377: * here
7378: */
7379: ctxt->instate = XML_PARSER_EOF;
7380: return(0);
7381: }
1.167 daniel 7382: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7383: if ((ctxt->encoding == NULL) &&
7384: (ctxt->input->encoding != NULL))
7385: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 7386: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7387: (!ctxt->disableSAX))
1.140 daniel 7388: ctxt->sax->startDocument(ctxt->userData);
7389: ctxt->instate = XML_PARSER_MISC;
7390: #ifdef DEBUG_PUSH
7391: fprintf(stderr, "PP: entering MISC\n");
7392: #endif
7393: } else {
7394: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7395: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7396: (!ctxt->disableSAX))
1.140 daniel 7397: ctxt->sax->startDocument(ctxt->userData);
7398: ctxt->instate = XML_PARSER_MISC;
7399: #ifdef DEBUG_PUSH
7400: fprintf(stderr, "PP: entering MISC\n");
7401: #endif
7402: }
7403: } else {
7404: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7405: ctxt->sax->setDocumentLocator(ctxt->userData,
7406: &xmlDefaultSAXLocator);
7407: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7408: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7409: (!ctxt->disableSAX))
1.140 daniel 7410: ctxt->sax->startDocument(ctxt->userData);
7411: ctxt->instate = XML_PARSER_MISC;
7412: #ifdef DEBUG_PUSH
7413: fprintf(stderr, "PP: entering MISC\n");
7414: #endif
7415: }
7416: break;
7417: case XML_PARSER_MISC:
7418: SKIP_BLANKS;
1.184 daniel 7419: if (ctxt->input->buf == NULL)
7420: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7421: else
1.184 daniel 7422: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7423: if (avail < 2)
7424: goto done;
1.184 daniel 7425: cur = ctxt->input->cur[0];
7426: next = ctxt->input->cur[1];
1.140 daniel 7427: if ((cur == '<') && (next == '?')) {
1.143 daniel 7428: if ((!terminate) &&
7429: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7430: goto done;
7431: #ifdef DEBUG_PUSH
7432: fprintf(stderr, "PP: Parsing PI\n");
7433: #endif
7434: xmlParsePI(ctxt);
7435: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7436: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7437: if ((!terminate) &&
7438: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7439: goto done;
7440: #ifdef DEBUG_PUSH
7441: fprintf(stderr, "PP: Parsing Comment\n");
7442: #endif
7443: xmlParseComment(ctxt);
7444: ctxt->instate = XML_PARSER_MISC;
7445: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7446: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7447: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7448: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7449: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 7450: if ((!terminate) &&
7451: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7452: goto done;
7453: #ifdef DEBUG_PUSH
7454: fprintf(stderr, "PP: Parsing internal subset\n");
7455: #endif
1.166 daniel 7456: ctxt->inSubset = 1;
1.140 daniel 7457: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7458: if (RAW == '[') {
1.140 daniel 7459: ctxt->instate = XML_PARSER_DTD;
7460: #ifdef DEBUG_PUSH
7461: fprintf(stderr, "PP: entering DTD\n");
7462: #endif
7463: } else {
1.166 daniel 7464: /*
7465: * Create and update the external subset.
7466: */
7467: ctxt->inSubset = 2;
1.171 daniel 7468: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7469: (ctxt->sax->externalSubset != NULL))
7470: ctxt->sax->externalSubset(ctxt->userData,
7471: ctxt->intSubName, ctxt->extSubSystem,
7472: ctxt->extSubURI);
7473: ctxt->inSubset = 0;
1.140 daniel 7474: ctxt->instate = XML_PARSER_PROLOG;
7475: #ifdef DEBUG_PUSH
7476: fprintf(stderr, "PP: entering PROLOG\n");
7477: #endif
7478: }
7479: } else if ((cur == '<') && (next == '!') &&
7480: (avail < 9)) {
7481: goto done;
7482: } else {
7483: ctxt->instate = XML_PARSER_START_TAG;
7484: #ifdef DEBUG_PUSH
7485: fprintf(stderr, "PP: entering START_TAG\n");
7486: #endif
7487: }
7488: break;
1.128 daniel 7489: case XML_PARSER_PROLOG:
1.140 daniel 7490: SKIP_BLANKS;
1.184 daniel 7491: if (ctxt->input->buf == NULL)
7492: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7493: else
1.184 daniel 7494: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7495: if (avail < 2)
7496: goto done;
1.184 daniel 7497: cur = ctxt->input->cur[0];
7498: next = ctxt->input->cur[1];
1.140 daniel 7499: if ((cur == '<') && (next == '?')) {
1.143 daniel 7500: if ((!terminate) &&
7501: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7502: goto done;
7503: #ifdef DEBUG_PUSH
7504: fprintf(stderr, "PP: Parsing PI\n");
7505: #endif
7506: xmlParsePI(ctxt);
7507: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7508: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7509: if ((!terminate) &&
7510: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7511: goto done;
7512: #ifdef DEBUG_PUSH
7513: fprintf(stderr, "PP: Parsing Comment\n");
7514: #endif
7515: xmlParseComment(ctxt);
7516: ctxt->instate = XML_PARSER_PROLOG;
7517: } else if ((cur == '<') && (next == '!') &&
7518: (avail < 4)) {
7519: goto done;
7520: } else {
7521: ctxt->instate = XML_PARSER_START_TAG;
7522: #ifdef DEBUG_PUSH
7523: fprintf(stderr, "PP: entering START_TAG\n");
7524: #endif
7525: }
7526: break;
7527: case XML_PARSER_EPILOG:
7528: SKIP_BLANKS;
1.184 daniel 7529: if (ctxt->input->buf == NULL)
7530: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7531: else
1.184 daniel 7532: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7533: if (avail < 2)
7534: goto done;
1.184 daniel 7535: cur = ctxt->input->cur[0];
7536: next = ctxt->input->cur[1];
1.140 daniel 7537: if ((cur == '<') && (next == '?')) {
1.143 daniel 7538: if ((!terminate) &&
7539: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7540: goto done;
7541: #ifdef DEBUG_PUSH
7542: fprintf(stderr, "PP: Parsing PI\n");
7543: #endif
7544: xmlParsePI(ctxt);
7545: ctxt->instate = XML_PARSER_EPILOG;
7546: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7547: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7548: if ((!terminate) &&
7549: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7550: goto done;
7551: #ifdef DEBUG_PUSH
7552: fprintf(stderr, "PP: Parsing Comment\n");
7553: #endif
7554: xmlParseComment(ctxt);
7555: ctxt->instate = XML_PARSER_EPILOG;
7556: } else if ((cur == '<') && (next == '!') &&
7557: (avail < 4)) {
7558: goto done;
7559: } else {
1.230 veillard 7560: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7562: ctxt->sax->error(ctxt->userData,
7563: "Extra content at the end of the document\n");
7564: ctxt->wellFormed = 0;
1.180 daniel 7565: ctxt->disableSAX = 1;
1.140 daniel 7566: ctxt->instate = XML_PARSER_EOF;
7567: #ifdef DEBUG_PUSH
7568: fprintf(stderr, "PP: entering EOF\n");
7569: #endif
1.171 daniel 7570: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7571: (!ctxt->disableSAX))
1.140 daniel 7572: ctxt->sax->endDocument(ctxt->userData);
7573: goto done;
7574: }
7575: break;
7576: case XML_PARSER_START_TAG: {
7577: xmlChar *name, *oldname;
7578:
1.184 daniel 7579: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7580: goto done;
1.184 daniel 7581: cur = ctxt->input->cur[0];
1.140 daniel 7582: if (cur != '<') {
1.230 veillard 7583: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7584: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7585: ctxt->sax->error(ctxt->userData,
7586: "Start tag expect, '<' not found\n");
7587: ctxt->wellFormed = 0;
1.180 daniel 7588: ctxt->disableSAX = 1;
1.140 daniel 7589: ctxt->instate = XML_PARSER_EOF;
7590: #ifdef DEBUG_PUSH
7591: fprintf(stderr, "PP: entering EOF\n");
7592: #endif
1.171 daniel 7593: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7594: (!ctxt->disableSAX))
1.140 daniel 7595: ctxt->sax->endDocument(ctxt->userData);
7596: goto done;
7597: }
1.143 daniel 7598: if ((!terminate) &&
7599: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7600: goto done;
1.176 daniel 7601: if (ctxt->spaceNr == 0)
7602: spacePush(ctxt, -1);
7603: else
7604: spacePush(ctxt, *ctxt->space);
1.140 daniel 7605: name = xmlParseStartTag(ctxt);
7606: if (name == NULL) {
1.176 daniel 7607: spacePop(ctxt);
1.140 daniel 7608: ctxt->instate = XML_PARSER_EOF;
7609: #ifdef DEBUG_PUSH
7610: fprintf(stderr, "PP: entering EOF\n");
7611: #endif
1.171 daniel 7612: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7613: (!ctxt->disableSAX))
1.140 daniel 7614: ctxt->sax->endDocument(ctxt->userData);
7615: goto done;
7616: }
7617: namePush(ctxt, xmlStrdup(name));
7618:
7619: /*
7620: * [ VC: Root Element Type ]
7621: * The Name in the document type declaration must match
7622: * the element type of the root element.
7623: */
7624: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7625: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7626: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7627:
7628: /*
7629: * Check for an Empty Element.
7630: */
1.152 daniel 7631: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7632: SKIP(2);
1.171 daniel 7633: if ((ctxt->sax != NULL) &&
7634: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 7635: ctxt->sax->endElement(ctxt->userData, name);
7636: xmlFree(name);
7637: oldname = namePop(ctxt);
1.176 daniel 7638: spacePop(ctxt);
1.140 daniel 7639: if (oldname != NULL) {
7640: #ifdef DEBUG_STACK
7641: fprintf(stderr,"Close: popped %s\n", oldname);
7642: #endif
7643: xmlFree(oldname);
7644: }
7645: if (ctxt->name == NULL) {
7646: ctxt->instate = XML_PARSER_EPILOG;
7647: #ifdef DEBUG_PUSH
7648: fprintf(stderr, "PP: entering EPILOG\n");
7649: #endif
7650: } else {
7651: ctxt->instate = XML_PARSER_CONTENT;
7652: #ifdef DEBUG_PUSH
7653: fprintf(stderr, "PP: entering CONTENT\n");
7654: #endif
7655: }
7656: break;
7657: }
1.152 daniel 7658: if (RAW == '>') {
1.140 daniel 7659: NEXT;
7660: } else {
1.230 veillard 7661: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.140 daniel 7662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7663: ctxt->sax->error(ctxt->userData,
7664: "Couldn't find end of Start Tag %s\n",
7665: name);
7666: ctxt->wellFormed = 0;
1.180 daniel 7667: ctxt->disableSAX = 1;
1.140 daniel 7668:
7669: /*
7670: * end of parsing of this node.
7671: */
7672: nodePop(ctxt);
7673: oldname = namePop(ctxt);
1.176 daniel 7674: spacePop(ctxt);
1.140 daniel 7675: if (oldname != NULL) {
7676: #ifdef DEBUG_STACK
7677: fprintf(stderr,"Close: popped %s\n", oldname);
7678: #endif
7679: xmlFree(oldname);
7680: }
7681: }
7682: xmlFree(name);
7683: ctxt->instate = XML_PARSER_CONTENT;
7684: #ifdef DEBUG_PUSH
7685: fprintf(stderr, "PP: entering CONTENT\n");
7686: #endif
7687: break;
7688: }
1.224 veillard 7689: case XML_PARSER_CONTENT: {
7690: const xmlChar *test;
7691: int cons;
7692: xmlChar tok;
7693:
1.140 daniel 7694: /*
7695: * Handle preparsed entities and charRef
7696: */
7697: if (ctxt->token != 0) {
7698: xmlChar cur[2] = { 0 , 0 } ;
7699:
7700: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 7701: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7702: (ctxt->sax->characters != NULL))
1.140 daniel 7703: ctxt->sax->characters(ctxt->userData, cur, 1);
7704: ctxt->token = 0;
7705: }
1.184 daniel 7706: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7707: goto done;
1.184 daniel 7708: cur = ctxt->input->cur[0];
7709: next = ctxt->input->cur[1];
1.224 veillard 7710:
7711: test = CUR_PTR;
7712: cons = ctxt->input->consumed;
7713: tok = ctxt->token;
1.140 daniel 7714: if ((cur == '<') && (next == '?')) {
1.143 daniel 7715: if ((!terminate) &&
7716: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7717: goto done;
7718: #ifdef DEBUG_PUSH
7719: fprintf(stderr, "PP: Parsing PI\n");
7720: #endif
7721: xmlParsePI(ctxt);
7722: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7723: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7724: if ((!terminate) &&
7725: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7726: goto done;
7727: #ifdef DEBUG_PUSH
7728: fprintf(stderr, "PP: Parsing Comment\n");
7729: #endif
7730: xmlParseComment(ctxt);
7731: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 7732: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7733: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7734: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7735: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7736: (ctxt->input->cur[8] == '[')) {
1.140 daniel 7737: SKIP(9);
7738: ctxt->instate = XML_PARSER_CDATA_SECTION;
7739: #ifdef DEBUG_PUSH
7740: fprintf(stderr, "PP: entering CDATA_SECTION\n");
7741: #endif
7742: break;
7743: } else if ((cur == '<') && (next == '!') &&
7744: (avail < 9)) {
7745: goto done;
7746: } else if ((cur == '<') && (next == '/')) {
7747: ctxt->instate = XML_PARSER_END_TAG;
7748: #ifdef DEBUG_PUSH
7749: fprintf(stderr, "PP: entering END_TAG\n");
7750: #endif
7751: break;
7752: } else if (cur == '<') {
7753: ctxt->instate = XML_PARSER_START_TAG;
7754: #ifdef DEBUG_PUSH
7755: fprintf(stderr, "PP: entering START_TAG\n");
7756: #endif
7757: break;
7758: } else if (cur == '&') {
1.143 daniel 7759: if ((!terminate) &&
7760: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 7761: goto done;
7762: #ifdef DEBUG_PUSH
7763: fprintf(stderr, "PP: Parsing Reference\n");
7764: #endif
7765: xmlParseReference(ctxt);
7766: } else {
1.156 daniel 7767: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 7768: /*
1.181 daniel 7769: * Goal of the following test is:
1.140 daniel 7770: * - minimize calls to the SAX 'character' callback
7771: * when they are mergeable
7772: * - handle an problem for isBlank when we only parse
7773: * a sequence of blank chars and the next one is
7774: * not available to check against '<' presence.
7775: * - tries to homogenize the differences in SAX
7776: * callbacks beween the push and pull versions
7777: * of the parser.
7778: */
7779: if ((ctxt->inputNr == 1) &&
7780: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 7781: if ((!terminate) &&
7782: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 7783: goto done;
7784: }
7785: ctxt->checkIndex = 0;
7786: #ifdef DEBUG_PUSH
7787: fprintf(stderr, "PP: Parsing char data\n");
7788: #endif
7789: xmlParseCharData(ctxt, 0);
7790: }
7791: /*
7792: * Pop-up of finished entities.
7793: */
1.152 daniel 7794: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7795: xmlPopInput(ctxt);
1.224 veillard 7796: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7797: (tok == ctxt->token)) {
1.230 veillard 7798: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.224 veillard 7799: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7800: ctxt->sax->error(ctxt->userData,
7801: "detected an error in element content\n");
7802: ctxt->wellFormed = 0;
7803: ctxt->disableSAX = 1;
7804: ctxt->instate = XML_PARSER_EOF;
7805: break;
7806: }
1.140 daniel 7807: break;
1.224 veillard 7808: }
1.140 daniel 7809: case XML_PARSER_CDATA_SECTION: {
7810: /*
7811: * The Push mode need to have the SAX callback for
7812: * cdataBlock merge back contiguous callbacks.
7813: */
7814: int base;
7815:
7816: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7817: if (base < 0) {
7818: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 7819: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 7820: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 7821: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 7822: XML_PARSER_BIG_BUFFER_SIZE);
7823: }
7824: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7825: ctxt->checkIndex = 0;
7826: }
7827: goto done;
7828: } else {
1.171 daniel 7829: if ((ctxt->sax != NULL) && (base > 0) &&
7830: (!ctxt->disableSAX)) {
1.140 daniel 7831: if (ctxt->sax->cdataBlock != NULL)
7832: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 7833: ctxt->input->cur, base);
1.140 daniel 7834: }
7835: SKIP(base + 3);
7836: ctxt->checkIndex = 0;
7837: ctxt->instate = XML_PARSER_CONTENT;
7838: #ifdef DEBUG_PUSH
7839: fprintf(stderr, "PP: entering CONTENT\n");
7840: #endif
7841: }
7842: break;
7843: }
1.141 daniel 7844: case XML_PARSER_END_TAG:
1.140 daniel 7845: if (avail < 2)
7846: goto done;
1.143 daniel 7847: if ((!terminate) &&
7848: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7849: goto done;
7850: xmlParseEndTag(ctxt);
7851: if (ctxt->name == NULL) {
7852: ctxt->instate = XML_PARSER_EPILOG;
7853: #ifdef DEBUG_PUSH
7854: fprintf(stderr, "PP: entering EPILOG\n");
7855: #endif
7856: } else {
7857: ctxt->instate = XML_PARSER_CONTENT;
7858: #ifdef DEBUG_PUSH
7859: fprintf(stderr, "PP: entering CONTENT\n");
7860: #endif
7861: }
7862: break;
7863: case XML_PARSER_DTD: {
7864: /*
7865: * Sorry but progressive parsing of the internal subset
7866: * is not expected to be supported. We first check that
7867: * the full content of the internal subset is available and
7868: * the parsing is launched only at that point.
7869: * Internal subset ends up with "']' S? '>'" in an unescaped
7870: * section and not in a ']]>' sequence which are conditional
7871: * sections (whoever argued to keep that crap in XML deserve
7872: * a place in hell !).
7873: */
7874: int base, i;
7875: xmlChar *buf;
7876: xmlChar quote = 0;
7877:
1.184 daniel 7878: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 7879: if (base < 0) return(0);
7880: if (ctxt->checkIndex > base)
7881: base = ctxt->checkIndex;
1.184 daniel 7882: buf = ctxt->input->buf->buffer->content;
1.202 daniel 7883: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
7884: base++) {
1.140 daniel 7885: if (quote != 0) {
7886: if (buf[base] == quote)
7887: quote = 0;
7888: continue;
7889: }
7890: if (buf[base] == '"') {
7891: quote = '"';
7892: continue;
7893: }
7894: if (buf[base] == '\'') {
7895: quote = '\'';
7896: continue;
7897: }
7898: if (buf[base] == ']') {
1.202 daniel 7899: if ((unsigned int) base +1 >=
7900: ctxt->input->buf->buffer->use)
1.140 daniel 7901: break;
7902: if (buf[base + 1] == ']') {
7903: /* conditional crap, skip both ']' ! */
7904: base++;
7905: continue;
7906: }
1.202 daniel 7907: for (i = 0;
7908: (unsigned int) base + i < ctxt->input->buf->buffer->use;
7909: i++) {
1.140 daniel 7910: if (buf[base + i] == '>')
7911: goto found_end_int_subset;
7912: }
7913: break;
7914: }
7915: }
7916: /*
7917: * We didn't found the end of the Internal subset
7918: */
7919: if (quote == 0)
7920: ctxt->checkIndex = base;
7921: #ifdef DEBUG_PUSH
7922: if (next == 0)
7923: fprintf(stderr, "PP: lookup of int subset end filed\n");
7924: #endif
7925: goto done;
7926:
7927: found_end_int_subset:
7928: xmlParseInternalSubset(ctxt);
1.166 daniel 7929: ctxt->inSubset = 2;
1.171 daniel 7930: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7931: (ctxt->sax->externalSubset != NULL))
7932: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7933: ctxt->extSubSystem, ctxt->extSubURI);
7934: ctxt->inSubset = 0;
1.140 daniel 7935: ctxt->instate = XML_PARSER_PROLOG;
7936: ctxt->checkIndex = 0;
7937: #ifdef DEBUG_PUSH
7938: fprintf(stderr, "PP: entering PROLOG\n");
7939: #endif
7940: break;
7941: }
7942: case XML_PARSER_COMMENT:
7943: fprintf(stderr, "PP: internal error, state == COMMENT\n");
7944: ctxt->instate = XML_PARSER_CONTENT;
7945: #ifdef DEBUG_PUSH
7946: fprintf(stderr, "PP: entering CONTENT\n");
7947: #endif
7948: break;
7949: case XML_PARSER_PI:
7950: fprintf(stderr, "PP: internal error, state == PI\n");
7951: ctxt->instate = XML_PARSER_CONTENT;
7952: #ifdef DEBUG_PUSH
7953: fprintf(stderr, "PP: entering CONTENT\n");
7954: #endif
7955: break;
1.128 daniel 7956: case XML_PARSER_ENTITY_DECL:
1.140 daniel 7957: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
7958: ctxt->instate = XML_PARSER_DTD;
7959: #ifdef DEBUG_PUSH
7960: fprintf(stderr, "PP: entering DTD\n");
7961: #endif
7962: break;
1.128 daniel 7963: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 7964: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
7965: ctxt->instate = XML_PARSER_CONTENT;
7966: #ifdef DEBUG_PUSH
7967: fprintf(stderr, "PP: entering DTD\n");
7968: #endif
7969: break;
1.128 daniel 7970: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 7971: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 7972: ctxt->instate = XML_PARSER_START_TAG;
7973: #ifdef DEBUG_PUSH
7974: fprintf(stderr, "PP: entering START_TAG\n");
7975: #endif
7976: break;
7977: case XML_PARSER_SYSTEM_LITERAL:
7978: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 7979: ctxt->instate = XML_PARSER_START_TAG;
7980: #ifdef DEBUG_PUSH
7981: fprintf(stderr, "PP: entering START_TAG\n");
7982: #endif
7983: break;
1.128 daniel 7984: }
7985: }
1.140 daniel 7986: done:
7987: #ifdef DEBUG_PUSH
7988: fprintf(stderr, "PP: done %d\n", ret);
7989: #endif
1.128 daniel 7990: return(ret);
7991: }
7992:
7993: /**
1.143 daniel 7994: * xmlParseTry:
7995: * @ctxt: an XML parser context
7996: *
7997: * Try to progress on parsing
7998: *
7999: * Returns zero if no parsing was possible
8000: */
8001: int
8002: xmlParseTry(xmlParserCtxtPtr ctxt) {
8003: return(xmlParseTryOrFinish(ctxt, 0));
8004: }
8005:
8006: /**
1.128 daniel 8007: * xmlParseChunk:
8008: * @ctxt: an XML parser context
8009: * @chunk: an char array
8010: * @size: the size in byte of the chunk
8011: * @terminate: last chunk indicator
8012: *
8013: * Parse a Chunk of memory
8014: *
8015: * Returns zero if no error, the xmlParserErrors otherwise.
8016: */
1.140 daniel 8017: int
1.128 daniel 8018: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8019: int terminate) {
1.132 daniel 8020: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8021: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8022: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8023: int cur = ctxt->input->cur - ctxt->input->base;
8024:
1.132 daniel 8025: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8026: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8027: ctxt->input->cur = ctxt->input->base + cur;
8028: #ifdef DEBUG_PUSH
8029: fprintf(stderr, "PP: pushed %d\n", size);
8030: #endif
8031:
1.150 daniel 8032: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8033: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8034: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8035: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8036: if (terminate) {
1.151 daniel 8037: /*
8038: * Check for termination
8039: */
1.140 daniel 8040: if ((ctxt->instate != XML_PARSER_EOF) &&
8041: (ctxt->instate != XML_PARSER_EPILOG)) {
1.230 veillard 8042: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 8043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8044: ctxt->sax->error(ctxt->userData,
8045: "Extra content at the end of the document\n");
8046: ctxt->wellFormed = 0;
1.180 daniel 8047: ctxt->disableSAX = 1;
1.140 daniel 8048: }
8049: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8050: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8051: (!ctxt->disableSAX))
1.140 daniel 8052: ctxt->sax->endDocument(ctxt->userData);
8053: }
8054: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8055: }
8056: return((xmlParserErrors) ctxt->errNo);
8057: }
8058:
8059: /************************************************************************
8060: * *
1.98 daniel 8061: * I/O front end functions to the parser *
8062: * *
8063: ************************************************************************/
1.201 daniel 8064:
8065: /**
1.229 veillard 8066: * xmlStopParser:
1.201 daniel 8067: * @ctxt: an XML parser context
8068: *
8069: * Blocks further parser processing
8070: */
8071: void
8072: xmlStopParser(xmlParserCtxtPtr ctxt) {
8073: ctxt->instate = XML_PARSER_EOF;
8074: if (ctxt->input != NULL)
8075: ctxt->input->cur = BAD_CAST"";
8076: }
1.98 daniel 8077:
1.50 daniel 8078: /**
1.181 daniel 8079: * xmlCreatePushParserCtxt:
1.140 daniel 8080: * @sax: a SAX handler
8081: * @user_data: The user data returned on SAX callbacks
8082: * @chunk: a pointer to an array of chars
8083: * @size: number of chars in the array
8084: * @filename: an optional file name or URI
8085: *
8086: * Create a parser context for using the XML parser in push mode
8087: * To allow content encoding detection, @size should be >= 4
8088: * The value of @filename is used for fetching external entities
8089: * and error/warning reports.
8090: *
8091: * Returns the new parser context or NULL
8092: */
8093: xmlParserCtxtPtr
8094: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8095: const char *chunk, int size, const char *filename) {
8096: xmlParserCtxtPtr ctxt;
8097: xmlParserInputPtr inputStream;
8098: xmlParserInputBufferPtr buf;
8099: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8100:
8101: /*
1.156 daniel 8102: * plug some encoding conversion routines
1.140 daniel 8103: */
8104: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8105: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8106:
8107: buf = xmlAllocParserInputBuffer(enc);
8108: if (buf == NULL) return(NULL);
8109:
8110: ctxt = xmlNewParserCtxt();
8111: if (ctxt == NULL) {
8112: xmlFree(buf);
8113: return(NULL);
8114: }
8115: if (sax != NULL) {
8116: if (ctxt->sax != &xmlDefaultSAXHandler)
8117: xmlFree(ctxt->sax);
8118: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8119: if (ctxt->sax == NULL) {
8120: xmlFree(buf);
8121: xmlFree(ctxt);
8122: return(NULL);
8123: }
8124: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8125: if (user_data != NULL)
8126: ctxt->userData = user_data;
8127: }
8128: if (filename == NULL) {
8129: ctxt->directory = NULL;
8130: } else {
8131: ctxt->directory = xmlParserGetDirectory(filename);
8132: }
8133:
8134: inputStream = xmlNewInputStream(ctxt);
8135: if (inputStream == NULL) {
8136: xmlFreeParserCtxt(ctxt);
8137: return(NULL);
8138: }
8139:
8140: if (filename == NULL)
8141: inputStream->filename = NULL;
8142: else
8143: inputStream->filename = xmlMemStrdup(filename);
8144: inputStream->buf = buf;
8145: inputStream->base = inputStream->buf->buffer->content;
8146: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8147: if (enc != XML_CHAR_ENCODING_NONE) {
8148: xmlSwitchEncoding(ctxt, enc);
8149: }
1.140 daniel 8150:
8151: inputPush(ctxt, inputStream);
8152:
8153: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8154: (ctxt->input->buf != NULL)) {
8155: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8156: #ifdef DEBUG_PUSH
8157: fprintf(stderr, "PP: pushed %d\n", size);
8158: #endif
8159: }
1.190 daniel 8160:
8161: return(ctxt);
8162: }
8163:
8164: /**
8165: * xmlCreateIOParserCtxt:
8166: * @sax: a SAX handler
8167: * @user_data: The user data returned on SAX callbacks
8168: * @ioread: an I/O read function
8169: * @ioclose: an I/O close function
8170: * @ioctx: an I/O handler
8171: * @enc: the charset encoding if known
8172: *
8173: * Create a parser context for using the XML parser with an existing
8174: * I/O stream
8175: *
8176: * Returns the new parser context or NULL
8177: */
8178: xmlParserCtxtPtr
8179: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8180: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8181: void *ioctx, xmlCharEncoding enc) {
8182: xmlParserCtxtPtr ctxt;
8183: xmlParserInputPtr inputStream;
8184: xmlParserInputBufferPtr buf;
8185:
8186: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8187: if (buf == NULL) return(NULL);
8188:
8189: ctxt = xmlNewParserCtxt();
8190: if (ctxt == NULL) {
8191: xmlFree(buf);
8192: return(NULL);
8193: }
8194: if (sax != NULL) {
8195: if (ctxt->sax != &xmlDefaultSAXHandler)
8196: xmlFree(ctxt->sax);
8197: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8198: if (ctxt->sax == NULL) {
8199: xmlFree(buf);
8200: xmlFree(ctxt);
8201: return(NULL);
8202: }
8203: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8204: if (user_data != NULL)
8205: ctxt->userData = user_data;
8206: }
8207:
1.229 veillard 8208: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8209: if (inputStream == NULL) {
8210: xmlFreeParserCtxt(ctxt);
8211: return(NULL);
1.74 daniel 8212: }
1.229 veillard 8213: inputPush(ctxt, inputStream);
1.69 daniel 8214:
1.229 veillard 8215: return(ctxt);
1.1 veillard 8216: }
8217:
1.229 veillard 8218: /************************************************************************
8219: * *
8220: * Front ends when parsing a Dtd *
8221: * *
8222: ************************************************************************/
1.76 daniel 8223:
8224: /**
1.181 daniel 8225: * xmlSAXParseDTD:
1.76 daniel 8226: * @sax: the SAX handler block
8227: * @ExternalID: a NAME* containing the External ID of the DTD
8228: * @SystemID: a NAME* containing the URL to the DTD
8229: *
8230: * Load and parse an external subset.
8231: *
8232: * Returns the resulting xmlDtdPtr or NULL in case of error.
8233: */
8234:
8235: xmlDtdPtr
1.123 daniel 8236: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8237: const xmlChar *SystemID) {
1.76 daniel 8238: xmlDtdPtr ret = NULL;
8239: xmlParserCtxtPtr ctxt;
1.83 daniel 8240: xmlParserInputPtr input = NULL;
1.76 daniel 8241: xmlCharEncoding enc;
8242:
8243: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8244:
1.97 daniel 8245: ctxt = xmlNewParserCtxt();
1.76 daniel 8246: if (ctxt == NULL) {
8247: return(NULL);
8248: }
8249:
8250: /*
8251: * Set-up the SAX context
8252: */
8253: if (sax != NULL) {
1.93 veillard 8254: if (ctxt->sax != NULL)
1.119 daniel 8255: xmlFree(ctxt->sax);
1.76 daniel 8256: ctxt->sax = sax;
8257: ctxt->userData = NULL;
8258: }
8259:
8260: /*
8261: * Ask the Entity resolver to load the damn thing
8262: */
8263:
8264: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8265: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8266: if (input == NULL) {
1.86 daniel 8267: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8268: xmlFreeParserCtxt(ctxt);
8269: return(NULL);
8270: }
8271:
8272: /*
1.156 daniel 8273: * plug some encoding conversion routines here.
1.76 daniel 8274: */
8275: xmlPushInput(ctxt, input);
1.156 daniel 8276: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8277: xmlSwitchEncoding(ctxt, enc);
8278:
1.95 veillard 8279: if (input->filename == NULL)
1.156 daniel 8280: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8281: input->line = 1;
8282: input->col = 1;
8283: input->base = ctxt->input->cur;
8284: input->cur = ctxt->input->cur;
8285: input->free = NULL;
8286:
8287: /*
8288: * let's parse that entity knowing it's an external subset.
8289: */
1.191 daniel 8290: ctxt->inSubset = 2;
8291: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8292: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8293: ExternalID, SystemID);
1.79 daniel 8294: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8295:
8296: if (ctxt->myDoc != NULL) {
8297: if (ctxt->wellFormed) {
1.191 daniel 8298: ret = ctxt->myDoc->extSubset;
8299: ctxt->myDoc->extSubset = NULL;
1.76 daniel 8300: } else {
8301: ret = NULL;
8302: }
8303: xmlFreeDoc(ctxt->myDoc);
8304: ctxt->myDoc = NULL;
8305: }
1.86 daniel 8306: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8307: xmlFreeParserCtxt(ctxt);
8308:
8309: return(ret);
8310: }
8311:
8312: /**
1.181 daniel 8313: * xmlParseDTD:
1.76 daniel 8314: * @ExternalID: a NAME* containing the External ID of the DTD
8315: * @SystemID: a NAME* containing the URL to the DTD
8316: *
8317: * Load and parse an external subset.
8318: *
8319: * Returns the resulting xmlDtdPtr or NULL in case of error.
8320: */
8321:
8322: xmlDtdPtr
1.123 daniel 8323: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8324: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8325: }
8326:
1.229 veillard 8327: /************************************************************************
8328: * *
8329: * Front ends when parsing an Entity *
8330: * *
8331: ************************************************************************/
8332:
1.59 daniel 8333: /**
1.181 daniel 8334: * xmlSAXParseBalancedChunk:
1.144 daniel 8335: * @ctx: an XML parser context (possibly NULL)
8336: * @sax: the SAX handler bloc (possibly NULL)
8337: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8338: * @input: a parser input stream
8339: * @enc: the encoding
8340: *
8341: * Parse a well-balanced chunk of an XML document
8342: * The user has to provide SAX callback block whose routines will be
8343: * called by the parser
8344: * The allowed sequence for the Well Balanced Chunk is the one defined by
8345: * the content production in the XML grammar:
8346: *
8347: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8348: *
1.176 daniel 8349: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 8350: * the error code otherwise
8351: */
8352:
8353: int
8354: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8355: void *user_data, xmlParserInputPtr input,
8356: xmlCharEncoding enc) {
8357: xmlParserCtxtPtr ctxt;
8358: int ret;
8359:
8360: if (input == NULL) return(-1);
8361:
8362: if (ctx != NULL)
8363: ctxt = ctx;
8364: else {
8365: ctxt = xmlNewParserCtxt();
8366: if (ctxt == NULL)
8367: return(-1);
8368: if (sax == NULL)
8369: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8370: }
8371:
8372: /*
8373: * Set-up the SAX context
8374: */
8375: if (sax != NULL) {
8376: if (ctxt->sax != NULL)
8377: xmlFree(ctxt->sax);
8378: ctxt->sax = sax;
8379: ctxt->userData = user_data;
8380: }
8381:
8382: /*
8383: * plug some encoding conversion routines here.
8384: */
8385: xmlPushInput(ctxt, input);
8386: if (enc != XML_CHAR_ENCODING_NONE)
8387: xmlSwitchEncoding(ctxt, enc);
8388:
8389: /*
8390: * let's parse that entity knowing it's an external subset.
8391: */
8392: xmlParseContent(ctxt);
8393: ret = ctxt->errNo;
8394:
8395: if (ctx == NULL) {
8396: if (sax != NULL)
8397: ctxt->sax = NULL;
8398: else
8399: xmlFreeDoc(ctxt->myDoc);
8400: xmlFreeParserCtxt(ctxt);
8401: }
8402: return(ret);
8403: }
8404:
8405: /**
1.213 veillard 8406: * xmlParseCtxtExternalEntity:
8407: * @ctx: the existing parsing context
8408: * @URL: the URL for the entity to load
8409: * @ID: the System ID for the entity to load
8410: * @list: the return value for the set of parsed nodes
8411: *
8412: * Parse an external general entity within an existing parsing context
8413: * An external general parsed entity is well-formed if it matches the
8414: * production labeled extParsedEnt.
8415: *
8416: * [78] extParsedEnt ::= TextDecl? content
8417: *
8418: * Returns 0 if the entity is well formed, -1 in case of args problem and
8419: * the parser error code otherwise
8420: */
8421:
8422: int
8423: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8424: const xmlChar *ID, xmlNodePtr *list) {
8425: xmlParserCtxtPtr ctxt;
8426: xmlDocPtr newDoc;
8427: xmlSAXHandlerPtr oldsax = NULL;
8428: int ret = 0;
8429:
8430: if (ctx->depth > 40) {
8431: return(XML_ERR_ENTITY_LOOP);
8432: }
8433:
8434: if (list != NULL)
8435: *list = NULL;
8436: if ((URL == NULL) && (ID == NULL))
8437: return(-1);
8438: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8439: return(-1);
8440:
8441:
1.228 veillard 8442: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.213 veillard 8443: if (ctxt == NULL) return(-1);
8444: ctxt->userData = ctxt;
8445: oldsax = ctxt->sax;
8446: ctxt->sax = ctx->sax;
8447: newDoc = xmlNewDoc(BAD_CAST "1.0");
8448: if (newDoc == NULL) {
8449: xmlFreeParserCtxt(ctxt);
8450: return(-1);
8451: }
8452: if (ctx->myDoc != NULL) {
8453: newDoc->intSubset = ctx->myDoc->intSubset;
8454: newDoc->extSubset = ctx->myDoc->extSubset;
8455: }
8456: if (ctx->myDoc->URL != NULL) {
8457: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8458: }
8459: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8460: if (newDoc->children == NULL) {
8461: ctxt->sax = oldsax;
8462: xmlFreeParserCtxt(ctxt);
8463: newDoc->intSubset = NULL;
8464: newDoc->extSubset = NULL;
8465: xmlFreeDoc(newDoc);
8466: return(-1);
8467: }
8468: nodePush(ctxt, newDoc->children);
8469: if (ctx->myDoc == NULL) {
8470: ctxt->myDoc = newDoc;
8471: } else {
8472: ctxt->myDoc = ctx->myDoc;
8473: newDoc->children->doc = ctx->myDoc;
8474: }
8475:
8476: /*
8477: * Parse a possible text declaration first
8478: */
8479: GROW;
8480: if ((RAW == '<') && (NXT(1) == '?') &&
8481: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8482: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8483: xmlParseTextDecl(ctxt);
8484: }
8485:
8486: /*
8487: * Doing validity checking on chunk doesn't make sense
8488: */
8489: ctxt->instate = XML_PARSER_CONTENT;
8490: ctxt->validate = ctx->validate;
8491: ctxt->depth = ctx->depth + 1;
8492: ctxt->replaceEntities = ctx->replaceEntities;
8493: if (ctxt->validate) {
8494: ctxt->vctxt.error = ctx->vctxt.error;
8495: ctxt->vctxt.warning = ctx->vctxt.warning;
8496: /* Allocate the Node stack */
8497: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1.228 veillard 8498: if (ctxt->vctxt.nodeTab == NULL) {
8499: fprintf(stderr, "xmlParseCtxtExternalEntity: out of memory\n");
8500: ctxt->validate = 0;
8501: ctxt->vctxt.error = NULL;
8502: ctxt->vctxt.warning = NULL;
8503: } else {
8504: ctxt->vctxt.nodeNr = 0;
8505: ctxt->vctxt.nodeMax = 4;
8506: ctxt->vctxt.node = NULL;
8507: }
1.213 veillard 8508: } else {
8509: ctxt->vctxt.error = NULL;
8510: ctxt->vctxt.warning = NULL;
8511: }
8512:
8513: xmlParseContent(ctxt);
8514:
8515: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8516: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8518: ctxt->sax->error(ctxt->userData,
8519: "chunk is not well balanced\n");
8520: ctxt->wellFormed = 0;
8521: ctxt->disableSAX = 1;
8522: } else if (RAW != 0) {
1.230 veillard 8523: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.213 veillard 8524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8525: ctxt->sax->error(ctxt->userData,
8526: "extra content at the end of well balanced chunk\n");
8527: ctxt->wellFormed = 0;
8528: ctxt->disableSAX = 1;
8529: }
8530: if (ctxt->node != newDoc->children) {
1.230 veillard 8531: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8533: ctxt->sax->error(ctxt->userData,
8534: "chunk is not well balanced\n");
8535: ctxt->wellFormed = 0;
8536: ctxt->disableSAX = 1;
8537: }
8538:
8539: if (!ctxt->wellFormed) {
8540: if (ctxt->errNo == 0)
8541: ret = 1;
8542: else
8543: ret = ctxt->errNo;
8544: } else {
8545: if (list != NULL) {
8546: xmlNodePtr cur;
8547:
8548: /*
8549: * Return the newly created nodeset after unlinking it from
8550: * they pseudo parent.
8551: */
8552: cur = newDoc->children->children;
8553: *list = cur;
8554: while (cur != NULL) {
8555: cur->parent = NULL;
8556: cur = cur->next;
8557: }
8558: newDoc->children->children = NULL;
8559: }
8560: ret = 0;
8561: }
8562: ctxt->sax = oldsax;
8563: xmlFreeParserCtxt(ctxt);
8564: newDoc->intSubset = NULL;
8565: newDoc->extSubset = NULL;
8566: xmlFreeDoc(newDoc);
8567:
8568: return(ret);
8569: }
8570:
8571: /**
1.181 daniel 8572: * xmlParseExternalEntity:
8573: * @doc: the document the chunk pertains to
8574: * @sax: the SAX handler bloc (possibly NULL)
8575: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8576: * @depth: Used for loop detection, use 0
1.181 daniel 8577: * @URL: the URL for the entity to load
8578: * @ID: the System ID for the entity to load
8579: * @list: the return value for the set of parsed nodes
8580: *
8581: * Parse an external general entity
8582: * An external general parsed entity is well-formed if it matches the
8583: * production labeled extParsedEnt.
8584: *
8585: * [78] extParsedEnt ::= TextDecl? content
8586: *
8587: * Returns 0 if the entity is well formed, -1 in case of args problem and
8588: * the parser error code otherwise
8589: */
8590:
8591: int
8592: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 8593: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 8594: xmlParserCtxtPtr ctxt;
8595: xmlDocPtr newDoc;
8596: xmlSAXHandlerPtr oldsax = NULL;
8597: int ret = 0;
8598:
1.185 daniel 8599: if (depth > 40) {
8600: return(XML_ERR_ENTITY_LOOP);
8601: }
8602:
8603:
1.181 daniel 8604:
8605: if (list != NULL)
8606: *list = NULL;
8607: if ((URL == NULL) && (ID == NULL))
1.213 veillard 8608: return(-1);
8609: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 8610: return(-1);
8611:
8612:
1.228 veillard 8613: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.181 daniel 8614: if (ctxt == NULL) return(-1);
8615: ctxt->userData = ctxt;
8616: if (sax != NULL) {
8617: oldsax = ctxt->sax;
8618: ctxt->sax = sax;
8619: if (user_data != NULL)
8620: ctxt->userData = user_data;
8621: }
8622: newDoc = xmlNewDoc(BAD_CAST "1.0");
8623: if (newDoc == NULL) {
8624: xmlFreeParserCtxt(ctxt);
8625: return(-1);
8626: }
8627: if (doc != NULL) {
8628: newDoc->intSubset = doc->intSubset;
8629: newDoc->extSubset = doc->extSubset;
8630: }
8631: if (doc->URL != NULL) {
8632: newDoc->URL = xmlStrdup(doc->URL);
8633: }
8634: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8635: if (newDoc->children == NULL) {
8636: if (sax != NULL)
8637: ctxt->sax = oldsax;
8638: xmlFreeParserCtxt(ctxt);
8639: newDoc->intSubset = NULL;
8640: newDoc->extSubset = NULL;
8641: xmlFreeDoc(newDoc);
8642: return(-1);
8643: }
8644: nodePush(ctxt, newDoc->children);
8645: if (doc == NULL) {
8646: ctxt->myDoc = newDoc;
8647: } else {
8648: ctxt->myDoc = doc;
8649: newDoc->children->doc = doc;
8650: }
8651:
8652: /*
8653: * Parse a possible text declaration first
8654: */
8655: GROW;
8656: if ((RAW == '<') && (NXT(1) == '?') &&
8657: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8658: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8659: xmlParseTextDecl(ctxt);
8660: }
8661:
8662: /*
8663: * Doing validity checking on chunk doesn't make sense
8664: */
8665: ctxt->instate = XML_PARSER_CONTENT;
8666: ctxt->validate = 0;
1.185 daniel 8667: ctxt->depth = depth;
1.181 daniel 8668:
8669: xmlParseContent(ctxt);
8670:
8671: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8672: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8674: ctxt->sax->error(ctxt->userData,
8675: "chunk is not well balanced\n");
8676: ctxt->wellFormed = 0;
8677: ctxt->disableSAX = 1;
8678: } else if (RAW != 0) {
1.230 veillard 8679: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.181 daniel 8680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8681: ctxt->sax->error(ctxt->userData,
8682: "extra content at the end of well balanced chunk\n");
8683: ctxt->wellFormed = 0;
8684: ctxt->disableSAX = 1;
8685: }
8686: if (ctxt->node != newDoc->children) {
1.230 veillard 8687: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8688: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8689: ctxt->sax->error(ctxt->userData,
8690: "chunk is not well balanced\n");
8691: ctxt->wellFormed = 0;
8692: ctxt->disableSAX = 1;
8693: }
8694:
8695: if (!ctxt->wellFormed) {
8696: if (ctxt->errNo == 0)
8697: ret = 1;
8698: else
8699: ret = ctxt->errNo;
8700: } else {
8701: if (list != NULL) {
8702: xmlNodePtr cur;
8703:
8704: /*
8705: * Return the newly created nodeset after unlinking it from
8706: * they pseudo parent.
8707: */
8708: cur = newDoc->children->children;
8709: *list = cur;
8710: while (cur != NULL) {
8711: cur->parent = NULL;
8712: cur = cur->next;
8713: }
8714: newDoc->children->children = NULL;
8715: }
8716: ret = 0;
8717: }
8718: if (sax != NULL)
8719: ctxt->sax = oldsax;
8720: xmlFreeParserCtxt(ctxt);
8721: newDoc->intSubset = NULL;
8722: newDoc->extSubset = NULL;
8723: xmlFreeDoc(newDoc);
8724:
8725: return(ret);
8726: }
8727:
8728: /**
8729: * xmlParseBalancedChunk:
1.176 daniel 8730: * @doc: the document the chunk pertains to
8731: * @sax: the SAX handler bloc (possibly NULL)
8732: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8733: * @depth: Used for loop detection, use 0
1.176 daniel 8734: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8735: * @list: the return value for the set of parsed nodes
8736: *
8737: * Parse a well-balanced chunk of an XML document
8738: * called by the parser
8739: * The allowed sequence for the Well Balanced Chunk is the one defined by
8740: * the content production in the XML grammar:
1.144 daniel 8741: *
1.175 daniel 8742: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8743: *
1.176 daniel 8744: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8745: * the parser error code otherwise
1.144 daniel 8746: */
8747:
1.175 daniel 8748: int
8749: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 8750: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 8751: xmlParserCtxtPtr ctxt;
1.175 daniel 8752: xmlDocPtr newDoc;
1.181 daniel 8753: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 8754: int size;
1.176 daniel 8755: int ret = 0;
1.175 daniel 8756:
1.185 daniel 8757: if (depth > 40) {
8758: return(XML_ERR_ENTITY_LOOP);
8759: }
8760:
1.175 daniel 8761:
1.176 daniel 8762: if (list != NULL)
8763: *list = NULL;
8764: if (string == NULL)
8765: return(-1);
8766:
8767: size = xmlStrlen(string);
8768:
1.183 daniel 8769: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 8770: if (ctxt == NULL) return(-1);
8771: ctxt->userData = ctxt;
1.175 daniel 8772: if (sax != NULL) {
1.176 daniel 8773: oldsax = ctxt->sax;
8774: ctxt->sax = sax;
8775: if (user_data != NULL)
8776: ctxt->userData = user_data;
1.175 daniel 8777: }
8778: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 8779: if (newDoc == NULL) {
8780: xmlFreeParserCtxt(ctxt);
8781: return(-1);
8782: }
1.175 daniel 8783: if (doc != NULL) {
8784: newDoc->intSubset = doc->intSubset;
8785: newDoc->extSubset = doc->extSubset;
8786: }
1.176 daniel 8787: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8788: if (newDoc->children == NULL) {
8789: if (sax != NULL)
8790: ctxt->sax = oldsax;
8791: xmlFreeParserCtxt(ctxt);
8792: newDoc->intSubset = NULL;
8793: newDoc->extSubset = NULL;
8794: xmlFreeDoc(newDoc);
8795: return(-1);
8796: }
8797: nodePush(ctxt, newDoc->children);
8798: if (doc == NULL) {
8799: ctxt->myDoc = newDoc;
8800: } else {
8801: ctxt->myDoc = doc;
8802: newDoc->children->doc = doc;
8803: }
8804: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 8805: ctxt->depth = depth;
1.176 daniel 8806:
8807: /*
8808: * Doing validity checking on chunk doesn't make sense
8809: */
8810: ctxt->validate = 0;
8811:
1.175 daniel 8812: xmlParseContent(ctxt);
1.176 daniel 8813:
8814: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8815: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 8816: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8817: ctxt->sax->error(ctxt->userData,
8818: "chunk is not well balanced\n");
8819: ctxt->wellFormed = 0;
1.180 daniel 8820: ctxt->disableSAX = 1;
1.176 daniel 8821: } else if (RAW != 0) {
1.230 veillard 8822: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.176 daniel 8823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8824: ctxt->sax->error(ctxt->userData,
8825: "extra content at the end of well balanced chunk\n");
8826: ctxt->wellFormed = 0;
1.180 daniel 8827: ctxt->disableSAX = 1;
1.176 daniel 8828: }
8829: if (ctxt->node != newDoc->children) {
1.230 veillard 8830: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 8831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8832: ctxt->sax->error(ctxt->userData,
8833: "chunk is not well balanced\n");
8834: ctxt->wellFormed = 0;
1.180 daniel 8835: ctxt->disableSAX = 1;
1.176 daniel 8836: }
1.175 daniel 8837:
1.176 daniel 8838: if (!ctxt->wellFormed) {
8839: if (ctxt->errNo == 0)
8840: ret = 1;
8841: else
8842: ret = ctxt->errNo;
8843: } else {
8844: if (list != NULL) {
8845: xmlNodePtr cur;
1.175 daniel 8846:
1.176 daniel 8847: /*
8848: * Return the newly created nodeset after unlinking it from
8849: * they pseudo parent.
8850: */
8851: cur = newDoc->children->children;
8852: *list = cur;
8853: while (cur != NULL) {
8854: cur->parent = NULL;
8855: cur = cur->next;
8856: }
8857: newDoc->children->children = NULL;
8858: }
8859: ret = 0;
1.175 daniel 8860: }
1.176 daniel 8861: if (sax != NULL)
8862: ctxt->sax = oldsax;
1.175 daniel 8863: xmlFreeParserCtxt(ctxt);
8864: newDoc->intSubset = NULL;
8865: newDoc->extSubset = NULL;
1.176 daniel 8866: xmlFreeDoc(newDoc);
1.175 daniel 8867:
1.176 daniel 8868: return(ret);
1.144 daniel 8869: }
8870:
8871: /**
1.229 veillard 8872: * xmlSAXParseEntity:
8873: * @sax: the SAX handler block
8874: * @filename: the filename
8875: *
8876: * parse an XML external entity out of context and build a tree.
8877: * It use the given SAX function block to handle the parsing callback.
8878: * If sax is NULL, fallback to the default DOM tree building routines.
8879: *
8880: * [78] extParsedEnt ::= TextDecl? content
8881: *
8882: * This correspond to a "Well Balanced" chunk
1.144 daniel 8883: *
1.229 veillard 8884: * Returns the resulting document tree
1.144 daniel 8885: */
8886:
1.229 veillard 8887: xmlDocPtr
8888: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
8889: xmlDocPtr ret;
8890: xmlParserCtxtPtr ctxt;
8891: char *directory = NULL;
8892:
8893: ctxt = xmlCreateFileParserCtxt(filename);
8894: if (ctxt == NULL) {
8895: return(NULL);
8896: }
8897: if (sax != NULL) {
8898: if (ctxt->sax != NULL)
8899: xmlFree(ctxt->sax);
8900: ctxt->sax = sax;
8901: ctxt->userData = NULL;
8902: }
8903:
8904: if ((ctxt->directory == NULL) && (directory == NULL))
8905: directory = xmlParserGetDirectory(filename);
8906:
8907: xmlParseExtParsedEnt(ctxt);
8908:
8909: if (ctxt->wellFormed)
8910: ret = ctxt->myDoc;
8911: else {
8912: ret = NULL;
8913: xmlFreeDoc(ctxt->myDoc);
8914: ctxt->myDoc = NULL;
8915: }
8916: if (sax != NULL)
8917: ctxt->sax = NULL;
8918: xmlFreeParserCtxt(ctxt);
8919:
8920: return(ret);
1.144 daniel 8921: }
8922:
8923: /**
1.229 veillard 8924: * xmlParseEntity:
8925: * @filename: the filename
8926: *
8927: * parse an XML external entity out of context and build a tree.
8928: *
8929: * [78] extParsedEnt ::= TextDecl? content
8930: *
8931: * This correspond to a "Well Balanced" chunk
1.59 daniel 8932: *
1.68 daniel 8933: * Returns the resulting document tree
1.59 daniel 8934: */
8935:
1.69 daniel 8936: xmlDocPtr
1.229 veillard 8937: xmlParseEntity(const char *filename) {
8938: return(xmlSAXParseEntity(NULL, filename));
1.55 daniel 8939: }
8940:
8941: /**
1.181 daniel 8942: * xmlCreateEntityParserCtxt:
8943: * @URL: the entity URL
8944: * @ID: the entity PUBLIC ID
8945: * @base: a posible base for the target URI
8946: *
8947: * Create a parser context for an external entity
8948: * Automatic support for ZLIB/Compress compressed document is provided
8949: * by default if found at compile-time.
8950: *
8951: * Returns the new parser context or NULL
8952: */
8953: xmlParserCtxtPtr
8954: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
8955: const xmlChar *base) {
8956: xmlParserCtxtPtr ctxt;
8957: xmlParserInputPtr inputStream;
8958: char *directory = NULL;
1.210 veillard 8959: xmlChar *uri;
8960:
1.181 daniel 8961: ctxt = xmlNewParserCtxt();
8962: if (ctxt == NULL) {
8963: return(NULL);
8964: }
8965:
1.210 veillard 8966: uri = xmlBuildURI(URL, base);
8967:
8968: if (uri == NULL) {
8969: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
8970: if (inputStream == NULL) {
8971: xmlFreeParserCtxt(ctxt);
8972: return(NULL);
8973: }
8974:
8975: inputPush(ctxt, inputStream);
8976:
8977: if ((ctxt->directory == NULL) && (directory == NULL))
8978: directory = xmlParserGetDirectory((char *)URL);
8979: if ((ctxt->directory == NULL) && (directory != NULL))
8980: ctxt->directory = directory;
8981: } else {
8982: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
8983: if (inputStream == NULL) {
8984: xmlFreeParserCtxt(ctxt);
8985: return(NULL);
8986: }
1.181 daniel 8987:
1.210 veillard 8988: inputPush(ctxt, inputStream);
1.181 daniel 8989:
1.210 veillard 8990: if ((ctxt->directory == NULL) && (directory == NULL))
8991: directory = xmlParserGetDirectory((char *)uri);
8992: if ((ctxt->directory == NULL) && (directory != NULL))
8993: ctxt->directory = directory;
8994: xmlFree(uri);
8995: }
1.181 daniel 8996:
8997: return(ctxt);
8998: }
8999:
1.229 veillard 9000: /************************************************************************
9001: * *
9002: * Front ends when parsing from a file *
9003: * *
9004: ************************************************************************/
9005:
1.181 daniel 9006: /**
9007: * xmlCreateFileParserCtxt:
1.50 daniel 9008: * @filename: the filename
9009: *
1.69 daniel 9010: * Create a parser context for a file content.
9011: * Automatic support for ZLIB/Compress compressed document is provided
9012: * by default if found at compile-time.
1.50 daniel 9013: *
1.69 daniel 9014: * Returns the new parser context or NULL
1.9 httpng 9015: */
1.69 daniel 9016: xmlParserCtxtPtr
9017: xmlCreateFileParserCtxt(const char *filename)
9018: {
9019: xmlParserCtxtPtr ctxt;
1.40 daniel 9020: xmlParserInputPtr inputStream;
1.91 daniel 9021: xmlParserInputBufferPtr buf;
1.111 daniel 9022: char *directory = NULL;
1.9 httpng 9023:
1.91 daniel 9024: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.228 veillard 9025: if (buf == NULL) {
9026: return(NULL);
9027: }
1.9 httpng 9028:
1.97 daniel 9029: ctxt = xmlNewParserCtxt();
1.16 daniel 9030: if (ctxt == NULL) {
1.228 veillard 9031: if (xmlDefaultSAXHandler.error != NULL) {
9032: xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9033: }
1.16 daniel 9034: return(NULL);
9035: }
1.97 daniel 9036:
1.96 daniel 9037: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9038: if (inputStream == NULL) {
1.97 daniel 9039: xmlFreeParserCtxt(ctxt);
1.40 daniel 9040: return(NULL);
9041: }
9042:
1.119 daniel 9043: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9044: inputStream->buf = buf;
9045: inputStream->base = inputStream->buf->buffer->content;
9046: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9047:
1.40 daniel 9048: inputPush(ctxt, inputStream);
1.110 daniel 9049: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9050: directory = xmlParserGetDirectory(filename);
9051: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9052: ctxt->directory = directory;
1.106 daniel 9053:
1.69 daniel 9054: return(ctxt);
9055: }
9056:
9057: /**
1.181 daniel 9058: * xmlSAXParseFile:
1.69 daniel 9059: * @sax: the SAX handler block
9060: * @filename: the filename
9061: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9062: * documents
9063: *
9064: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9065: * compressed document is provided by default if found at compile-time.
9066: * It use the given SAX function block to handle the parsing callback.
9067: * If sax is NULL, fallback to the default DOM tree building routines.
9068: *
9069: * Returns the resulting document tree
9070: */
9071:
1.79 daniel 9072: xmlDocPtr
9073: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9074: int recovery) {
9075: xmlDocPtr ret;
1.229 veillard 9076: xmlParserCtxtPtr ctxt;
9077: char *directory = NULL;
9078:
9079: ctxt = xmlCreateFileParserCtxt(filename);
9080: if (ctxt == NULL) {
9081: return(NULL);
9082: }
9083: if (sax != NULL) {
9084: if (ctxt->sax != NULL)
9085: xmlFree(ctxt->sax);
9086: ctxt->sax = sax;
9087: ctxt->userData = NULL;
9088: }
9089:
9090: if ((ctxt->directory == NULL) && (directory == NULL))
9091: directory = xmlParserGetDirectory(filename);
9092: if ((ctxt->directory == NULL) && (directory != NULL))
9093: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9094:
9095: xmlParseDocument(ctxt);
9096:
9097: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9098: else {
9099: ret = NULL;
9100: xmlFreeDoc(ctxt->myDoc);
9101: ctxt->myDoc = NULL;
9102: }
9103: if (sax != NULL)
9104: ctxt->sax = NULL;
9105: xmlFreeParserCtxt(ctxt);
9106:
9107: return(ret);
9108: }
9109:
9110: /**
9111: * xmlRecoverDoc:
9112: * @cur: a pointer to an array of xmlChar
9113: *
9114: * parse an XML in-memory document and build a tree.
9115: * In the case the document is not Well Formed, a tree is built anyway
9116: *
9117: * Returns the resulting document tree
9118: */
9119:
9120: xmlDocPtr
9121: xmlRecoverDoc(xmlChar *cur) {
9122: return(xmlSAXParseDoc(NULL, cur, 1));
9123: }
9124:
9125: /**
9126: * xmlParseFile:
9127: * @filename: the filename
9128: *
9129: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9130: * compressed document is provided by default if found at compile-time.
9131: *
9132: * Returns the resulting document tree
9133: */
9134:
9135: xmlDocPtr
9136: xmlParseFile(const char *filename) {
9137: return(xmlSAXParseFile(NULL, filename, 0));
9138: }
9139:
9140: /**
9141: * xmlRecoverFile:
9142: * @filename: the filename
9143: *
9144: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9145: * compressed document is provided by default if found at compile-time.
9146: * In the case the document is not Well Formed, a tree is built anyway
9147: *
9148: * Returns the resulting document tree
9149: */
9150:
9151: xmlDocPtr
9152: xmlRecoverFile(const char *filename) {
9153: return(xmlSAXParseFile(NULL, filename, 1));
9154: }
9155:
9156:
9157: /**
9158: * xmlSetupParserForBuffer:
9159: * @ctxt: an XML parser context
9160: * @buffer: a xmlChar * buffer
9161: * @filename: a file name
9162: *
9163: * Setup the parser context to parse a new buffer; Clears any prior
9164: * contents from the parser context. The buffer parameter must not be
9165: * NULL, but the filename parameter can be
9166: */
9167: void
9168: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9169: const char* filename)
9170: {
9171: xmlParserInputPtr input;
9172:
9173: input = xmlNewInputStream(ctxt);
9174: if (input == NULL) {
9175: perror("malloc");
9176: xmlFree(ctxt);
9177: return;
9178: }
9179:
9180: xmlClearParserCtxt(ctxt);
9181: if (filename != NULL)
9182: input->filename = xmlMemStrdup(filename);
9183: input->base = buffer;
9184: input->cur = buffer;
9185: inputPush(ctxt, input);
9186: }
9187:
9188: /**
9189: * xmlSAXUserParseFile:
9190: * @sax: a SAX handler
9191: * @user_data: The user data returned on SAX callbacks
9192: * @filename: a file name
9193: *
9194: * parse an XML file and call the given SAX handler routines.
9195: * Automatic support for ZLIB/Compress compressed document is provided
9196: *
9197: * Returns 0 in case of success or a error number otherwise
9198: */
9199: int
9200: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9201: const char *filename) {
9202: int ret = 0;
9203: xmlParserCtxtPtr ctxt;
9204:
9205: ctxt = xmlCreateFileParserCtxt(filename);
9206: if (ctxt == NULL) return -1;
9207: if (ctxt->sax != &xmlDefaultSAXHandler)
9208: xmlFree(ctxt->sax);
9209: ctxt->sax = sax;
9210: if (user_data != NULL)
9211: ctxt->userData = user_data;
9212:
1.16 daniel 9213: xmlParseDocument(ctxt);
1.229 veillard 9214:
9215: if (ctxt->wellFormed)
9216: ret = 0;
1.59 daniel 9217: else {
1.229 veillard 9218: if (ctxt->errNo != 0)
9219: ret = ctxt->errNo;
9220: else
9221: ret = -1;
1.59 daniel 9222: }
1.86 daniel 9223: if (sax != NULL)
1.229 veillard 9224: ctxt->sax = NULL;
1.69 daniel 9225: xmlFreeParserCtxt(ctxt);
1.20 daniel 9226:
1.229 veillard 9227: return ret;
1.20 daniel 9228: }
9229:
1.229 veillard 9230: /************************************************************************
9231: * *
9232: * Front ends when parsing from memory *
9233: * *
9234: ************************************************************************/
1.32 daniel 9235:
1.50 daniel 9236: /**
1.181 daniel 9237: * xmlCreateMemoryParserCtxt:
1.229 veillard 9238: * @buffer: a pointer to a char array
9239: * @size: the size of the array
1.50 daniel 9240: *
1.69 daniel 9241: * Create a parser context for an XML in-memory document.
1.50 daniel 9242: *
1.69 daniel 9243: * Returns the new parser context or NULL
1.20 daniel 9244: */
1.69 daniel 9245: xmlParserCtxtPtr
9246: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9247: xmlParserCtxtPtr ctxt;
1.40 daniel 9248: xmlParserInputPtr input;
1.209 veillard 9249: xmlParserInputBufferPtr buf;
1.40 daniel 9250:
1.229 veillard 9251: if (buffer == NULL)
9252: return(NULL);
9253: if (size <= 0)
1.181 daniel 9254: return(NULL);
1.40 daniel 9255:
1.97 daniel 9256: ctxt = xmlNewParserCtxt();
1.181 daniel 9257: if (ctxt == NULL)
1.20 daniel 9258: return(NULL);
1.97 daniel 9259:
1.209 veillard 9260: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9261: if (buf == NULL) return(NULL);
9262:
1.96 daniel 9263: input = xmlNewInputStream(ctxt);
1.40 daniel 9264: if (input == NULL) {
1.97 daniel 9265: xmlFreeParserCtxt(ctxt);
1.40 daniel 9266: return(NULL);
9267: }
1.20 daniel 9268:
1.40 daniel 9269: input->filename = NULL;
1.209 veillard 9270: input->buf = buf;
9271: input->base = input->buf->buffer->content;
9272: input->cur = input->buf->buffer->content;
1.20 daniel 9273:
1.40 daniel 9274: inputPush(ctxt, input);
1.69 daniel 9275: return(ctxt);
9276: }
9277:
9278: /**
1.181 daniel 9279: * xmlSAXParseMemory:
1.69 daniel 9280: * @sax: the SAX handler block
9281: * @buffer: an pointer to a char array
1.127 daniel 9282: * @size: the size of the array
9283: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9284: * documents
9285: *
9286: * parse an XML in-memory block and use the given SAX function block
9287: * to handle the parsing callback. If sax is NULL, fallback to the default
9288: * DOM tree building routines.
9289: *
9290: * Returns the resulting document tree
9291: */
9292: xmlDocPtr
9293: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9294: xmlDocPtr ret;
9295: xmlParserCtxtPtr ctxt;
9296:
9297: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9298: if (ctxt == NULL) return(NULL);
1.74 daniel 9299: if (sax != NULL) {
9300: ctxt->sax = sax;
9301: ctxt->userData = NULL;
9302: }
1.20 daniel 9303:
9304: xmlParseDocument(ctxt);
1.40 daniel 9305:
1.72 daniel 9306: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9307: else {
9308: ret = NULL;
1.72 daniel 9309: xmlFreeDoc(ctxt->myDoc);
9310: ctxt->myDoc = NULL;
1.59 daniel 9311: }
1.86 daniel 9312: if (sax != NULL)
9313: ctxt->sax = NULL;
1.69 daniel 9314: xmlFreeParserCtxt(ctxt);
1.16 daniel 9315:
1.9 httpng 9316: return(ret);
1.17 daniel 9317: }
9318:
1.55 daniel 9319: /**
1.181 daniel 9320: * xmlParseMemory:
1.68 daniel 9321: * @buffer: an pointer to a char array
1.55 daniel 9322: * @size: the size of the array
9323: *
9324: * parse an XML in-memory block and build a tree.
9325: *
1.68 daniel 9326: * Returns the resulting document tree
1.55 daniel 9327: */
9328:
9329: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9330: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9331: }
9332:
9333: /**
1.181 daniel 9334: * xmlRecoverMemory:
1.68 daniel 9335: * @buffer: an pointer to a char array
1.59 daniel 9336: * @size: the size of the array
9337: *
9338: * parse an XML in-memory block and build a tree.
9339: * In the case the document is not Well Formed, a tree is built anyway
9340: *
1.68 daniel 9341: * Returns the resulting document tree
1.59 daniel 9342: */
9343:
9344: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9345: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9346: }
9347:
1.123 daniel 9348: /**
9349: * xmlSAXUserParseMemory:
9350: * @sax: a SAX handler
9351: * @user_data: The user data returned on SAX callbacks
9352: * @buffer: an in-memory XML document input
1.127 daniel 9353: * @size: the length of the XML document in bytes
1.123 daniel 9354: *
9355: * A better SAX parsing routine.
9356: * parse an XML in-memory buffer and call the given SAX handler routines.
9357: *
9358: * Returns 0 in case of success or a error number otherwise
9359: */
9360: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9361: char *buffer, int size) {
9362: int ret = 0;
9363: xmlParserCtxtPtr ctxt;
1.218 veillard 9364: xmlSAXHandlerPtr oldsax = NULL;
1.123 daniel 9365:
9366: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9367: if (ctxt == NULL) return -1;
1.216 veillard 9368: if (sax != NULL) {
9369: oldsax = ctxt->sax;
9370: ctxt->sax = sax;
9371: }
1.123 daniel 9372: ctxt->userData = user_data;
9373:
9374: xmlParseDocument(ctxt);
9375:
9376: if (ctxt->wellFormed)
9377: ret = 0;
9378: else {
9379: if (ctxt->errNo != 0)
9380: ret = ctxt->errNo;
9381: else
9382: ret = -1;
9383: }
1.216 veillard 9384: if (sax != NULL) {
9385: ctxt->sax = oldsax;
9386: }
1.123 daniel 9387: xmlFreeParserCtxt(ctxt);
9388:
9389: return ret;
9390: }
9391:
1.132 daniel 9392: /**
1.229 veillard 9393: * xmlCreateDocParserCtxt:
9394: * @cur: a pointer to an array of xmlChar
9395: *
9396: * Creates a parser context for an XML in-memory document.
1.132 daniel 9397: *
1.229 veillard 9398: * Returns the new parser context or NULL
1.132 daniel 9399: */
1.229 veillard 9400: xmlParserCtxtPtr
9401: xmlCreateDocParserCtxt(xmlChar *cur) {
9402: int len;
1.132 daniel 9403:
1.229 veillard 9404: if (cur == NULL)
9405: return(NULL);
9406: len = xmlStrlen(cur);
9407: return(xmlCreateMemoryParserCtxt((char *)cur, len));
1.132 daniel 9408: }
1.98 daniel 9409:
1.50 daniel 9410: /**
1.229 veillard 9411: * xmlSAXParseDoc:
9412: * @sax: the SAX handler block
9413: * @cur: a pointer to an array of xmlChar
9414: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9415: * documents
1.50 daniel 9416: *
1.229 veillard 9417: * parse an XML in-memory document and build a tree.
9418: * It use the given SAX function block to handle the parsing callback.
9419: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 9420: *
1.229 veillard 9421: * Returns the resulting document tree
1.32 daniel 9422: */
9423:
1.229 veillard 9424: xmlDocPtr
9425: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9426: xmlDocPtr ret;
9427: xmlParserCtxtPtr ctxt;
9428:
9429: if (cur == NULL) return(NULL);
1.32 daniel 9430:
9431:
1.229 veillard 9432: ctxt = xmlCreateDocParserCtxt(cur);
9433: if (ctxt == NULL) return(NULL);
9434: if (sax != NULL) {
9435: ctxt->sax = sax;
9436: ctxt->userData = NULL;
9437: }
1.32 daniel 9438:
1.229 veillard 9439: xmlParseDocument(ctxt);
9440: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9441: else {
9442: ret = NULL;
9443: xmlFreeDoc(ctxt->myDoc);
9444: ctxt->myDoc = NULL;
9445: }
9446: if (sax != NULL)
9447: ctxt->sax = NULL;
9448: xmlFreeParserCtxt(ctxt);
9449:
9450: return(ret);
1.32 daniel 9451: }
9452:
1.50 daniel 9453: /**
1.229 veillard 9454: * xmlParseDoc:
9455: * @cur: a pointer to an array of xmlChar
1.50 daniel 9456: *
1.229 veillard 9457: * parse an XML in-memory document and build a tree.
1.50 daniel 9458: *
1.229 veillard 9459: * Returns the resulting document tree
1.32 daniel 9460: */
9461:
1.229 veillard 9462: xmlDocPtr
9463: xmlParseDoc(xmlChar *cur) {
9464: return(xmlSAXParseDoc(NULL, cur, 0));
9465: }
1.32 daniel 9466:
9467:
1.229 veillard 9468: /************************************************************************
9469: * *
9470: * Miscellaneous *
9471: * *
9472: ************************************************************************/
1.32 daniel 9473:
1.50 daniel 9474: /**
1.229 veillard 9475: * xmlCleanupParser:
1.50 daniel 9476: *
1.229 veillard 9477: * Cleanup function for the XML parser. It tries to reclaim all
9478: * parsing related global memory allocated for the parser processing.
9479: * It doesn't deallocate any document related memory. Calling this
9480: * function should not prevent reusing the parser.
1.32 daniel 9481: */
1.229 veillard 9482:
1.55 daniel 9483: void
1.229 veillard 9484: xmlCleanupParser(void) {
9485: xmlCleanupCharEncodingHandlers();
9486: xmlCleanupPredefinedEntities();
1.32 daniel 9487: }
1.220 veillard 9488:
9489: /**
9490: * xmlPedanticParserDefault:
9491: * @val: int 0 or 1
9492: *
9493: * Set and return the previous value for enabling pedantic warnings.
9494: *
9495: * Returns the last value for 0 for no substitution, 1 for substitution.
9496: */
9497:
9498: int
9499: xmlPedanticParserDefault(int val) {
9500: int old = xmlPedanticParserDefaultValue;
9501:
9502: xmlPedanticParserDefaultValue = val;
9503: return(old);
9504: }
1.98 daniel 9505:
9506: /**
1.181 daniel 9507: * xmlSubstituteEntitiesDefault:
1.98 daniel 9508: * @val: int 0 or 1
9509: *
9510: * Set and return the previous value for default entity support.
9511: * Initially the parser always keep entity references instead of substituting
9512: * entity values in the output. This function has to be used to change the
9513: * default parser behaviour
9514: * SAX::subtituteEntities() has to be used for changing that on a file by
9515: * file basis.
9516: *
9517: * Returns the last value for 0 for no substitution, 1 for substitution.
9518: */
9519:
9520: int
9521: xmlSubstituteEntitiesDefault(int val) {
9522: int old = xmlSubstituteEntitiesDefaultValue;
9523:
9524: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 9525: return(old);
9526: }
9527:
9528: /**
9529: * xmlKeepBlanksDefault:
9530: * @val: int 0 or 1
9531: *
9532: * Set and return the previous value for default blanks text nodes support.
9533: * The 1.x version of the parser used an heuristic to try to detect
9534: * ignorable white spaces. As a result the SAX callback was generating
9535: * ignorableWhitespace() callbacks instead of characters() one, and when
9536: * using the DOM output text nodes containing those blanks were not generated.
9537: * The 2.x and later version will switch to the XML standard way and
9538: * ignorableWhitespace() are only generated when running the parser in
9539: * validating mode and when the current element doesn't allow CDATA or
9540: * mixed content.
9541: * This function is provided as a way to force the standard behaviour
9542: * on 1.X libs and to switch back to the old mode for compatibility when
9543: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9544: * by using xmlIsBlankNode() commodity function to detect the "empty"
9545: * nodes generated.
9546: * This value also affect autogeneration of indentation when saving code
9547: * if blanks sections are kept, indentation is not generated.
9548: *
9549: * Returns the last value for 0 for no substitution, 1 for substitution.
9550: */
9551:
9552: int
9553: xmlKeepBlanksDefault(int val) {
9554: int old = xmlKeepBlanksDefaultValue;
9555:
9556: xmlKeepBlanksDefaultValue = val;
9557: xmlIndentTreeOutput = !val;
1.98 daniel 9558: return(old);
9559: }
1.77 daniel 9560:
Webmaster