Annotation of XML/parser.c, revision 1.249
1.1 veillard 1: /*
1.229 veillard 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
1.15 veillard 4: *
1.222 veillard 5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
1.229 veillard 13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscelaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAx callbacks or as standalones functions using a preparsed
26: * document.
27: *
1.15 veillard 28: * See Copyright for the status of this software.
29: *
1.60 daniel 30: * Daniel.Veillard@w3.org
1.246 veillard 31: *
32: * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33: * and xmlDoValidityCheckingDefaultValue for VMS
1.1 veillard 34: */
35:
1.26 daniel 36: #ifdef WIN32
1.138 daniel 37: #include "win32config.h"
1.226 veillard 38: #define XML_DIR_SEP '\\'
1.26 daniel 39: #else
1.121 daniel 40: #include "config.h"
1.226 veillard 41: #define XML_DIR_SEP '/'
1.26 daniel 42: #endif
1.121 daniel 43:
1.1 veillard 44: #include <stdio.h>
1.238 veillard 45: #include <stdlib.h>
1.204 veillard 46: #include <string.h>
1.238 veillard 47: #include <libxml/xmlmemory.h>
48: #include <libxml/tree.h>
49: #include <libxml/parser.h>
50: #include <libxml/parserInternals.h>
51: #include <libxml/valid.h>
52: #include <libxml/entities.h>
53: #include <libxml/xmlerror.h>
54: #include <libxml/encoding.h>
55: #include <libxml/xmlIO.h>
56: #include <libxml/uri.h>
57:
1.121 daniel 58: #ifdef HAVE_CTYPE_H
1.1 veillard 59: #include <ctype.h>
1.121 daniel 60: #endif
61: #ifdef HAVE_STDLIB_H
1.50 daniel 62: #include <stdlib.h>
1.121 daniel 63: #endif
64: #ifdef HAVE_SYS_STAT_H
1.9 httpng 65: #include <sys/stat.h>
1.121 daniel 66: #endif
1.9 httpng 67: #ifdef HAVE_FCNTL_H
68: #include <fcntl.h>
69: #endif
1.10 httpng 70: #ifdef HAVE_UNISTD_H
71: #include <unistd.h>
72: #endif
1.20 daniel 73: #ifdef HAVE_ZLIB_H
74: #include <zlib.h>
75: #endif
1.1 veillard 76:
77:
1.140 daniel 78: #define XML_PARSER_BIG_BUFFER_SIZE 1000
79: #define XML_PARSER_BUFFER_SIZE 100
80:
1.229 veillard 81: /*
82: * Various global defaults for parsing
83: */
1.160 daniel 84: int xmlGetWarningsDefaultValue = 1;
1.220 veillard 85: int xmlParserDebugEntities = 0;
1.246 veillard 86: #ifdef VMS
87: int xmlSubstituteEntitiesDefaultVal = 0;
88: #define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89: int xmlDoValidityCheckingDefaultVal = 0;
90: #define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91: #else
1.229 veillard 92: int xmlSubstituteEntitiesDefaultValue = 0;
93: int xmlDoValidityCheckingDefaultValue = 0;
1.246 veillard 94: #endif
1.229 veillard 95: int xmlPedanticParserDefaultValue = 0;
96: int xmlKeepBlanksDefaultValue = 1;
1.86 daniel 97:
1.139 daniel 98: /*
99: * List of XML prefixed PI allowed by W3C specs
100: */
101:
102: const char *xmlW3CPIs[] = {
103: "xml-stylesheet",
104: NULL
105: };
1.91 daniel 106:
1.229 veillard 107: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
1.151 daniel 108: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110: const xmlChar **str);
1.91 daniel 111:
112:
1.45 daniel 113: /************************************************************************
114: * *
115: * Parser stacks related functions and macros *
116: * *
117: ************************************************************************/
1.79 daniel 118:
1.135 daniel 119: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
120: const xmlChar ** str);
1.79 daniel 121:
1.1 veillard 122: /*
1.40 daniel 123: * Generic function for accessing stacks in the Parser Context
1.1 veillard 124: */
125:
1.140 daniel 126: #define PUSH_AND_POP(scope, type, name) \
127: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 128: if (ctxt->name##Nr >= ctxt->name##Max) { \
129: ctxt->name##Max *= 2; \
1.204 veillard 130: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 131: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
132: if (ctxt->name##Tab == NULL) { \
1.241 veillard 133: xmlGenericError(xmlGenericErrorContext, \
134: "realloc failed !\n"); \
1.145 daniel 135: return(0); \
1.31 daniel 136: } \
137: } \
1.40 daniel 138: ctxt->name##Tab[ctxt->name##Nr] = value; \
139: ctxt->name = value; \
140: return(ctxt->name##Nr++); \
1.31 daniel 141: } \
1.140 daniel 142: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 143: type ret; \
1.40 daniel 144: if (ctxt->name##Nr <= 0) return(0); \
145: ctxt->name##Nr--; \
1.50 daniel 146: if (ctxt->name##Nr > 0) \
147: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
148: else \
149: ctxt->name = NULL; \
1.69 daniel 150: ret = ctxt->name##Tab[ctxt->name##Nr]; \
151: ctxt->name##Tab[ctxt->name##Nr] = 0; \
152: return(ret); \
1.31 daniel 153: } \
154:
1.229 veillard 155: /*
156: * Those macros actually generate the functions
157: */
1.140 daniel 158: PUSH_AND_POP(extern, xmlParserInputPtr, input)
159: PUSH_AND_POP(extern, xmlNodePtr, node)
160: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 161:
1.176 daniel 162: int spacePush(xmlParserCtxtPtr ctxt, int val) {
163: if (ctxt->spaceNr >= ctxt->spaceMax) {
164: ctxt->spaceMax *= 2;
1.204 veillard 165: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 166: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
167: if (ctxt->spaceTab == NULL) {
1.241 veillard 168: xmlGenericError(xmlGenericErrorContext,
169: "realloc failed !\n");
1.176 daniel 170: return(0);
171: }
172: }
173: ctxt->spaceTab[ctxt->spaceNr] = val;
174: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
175: return(ctxt->spaceNr++);
176: }
177:
178: int spacePop(xmlParserCtxtPtr ctxt) {
179: int ret;
180: if (ctxt->spaceNr <= 0) return(0);
181: ctxt->spaceNr--;
182: if (ctxt->spaceNr > 0)
183: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
184: else
185: ctxt->space = NULL;
186: ret = ctxt->spaceTab[ctxt->spaceNr];
187: ctxt->spaceTab[ctxt->spaceNr] = -1;
188: return(ret);
189: }
190:
1.55 daniel 191: /*
192: * Macros for accessing the content. Those should be used only by the parser,
193: * and not exported.
194: *
1.229 veillard 195: * Dirty macros, i.e. one often need to make assumption on the context to
196: * use them
1.55 daniel 197: *
1.123 daniel 198: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 199: * To be used with extreme caution since operations consuming
200: * characters may move the input buffer to a different location !
1.123 daniel 201: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.151 daniel 202: * This should be used internally by the parser
1.55 daniel 203: * only to compare to ASCII values otherwise it would break when
204: * running with UTF-8 encoding.
1.229 veillard 205: * RAW same as CUR but in the input buffer, bypass any token
206: * extraction that may have been done
1.123 daniel 207: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 208: * to compare on ASCII based substring.
1.123 daniel 209: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 210: * strings within the parser.
211: *
1.77 daniel 212: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 213: *
214: * NEXT Skip to the next character, this does the proper decoding
215: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.229 veillard 216: * NEXTL(l) Skip l xmlChars in the input buffer
217: * CUR_CHAR(l) returns the current unicode character (int), set l
218: * to the number of xmlChars used for the encoding [0-5].
219: * CUR_SCHAR same but operate on a string instead of the context
220: * COPY_BUF copy the current unicode char to the target buffer, increment
221: * the index
222: * GROW, SHRINK handling of input buffers
1.55 daniel 223: */
1.45 daniel 224:
1.152 daniel 225: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 226: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 227: #define NXT(val) ctxt->input->cur[(val)]
228: #define CUR_PTR ctxt->input->cur
1.154 daniel 229:
1.240 veillard 230: #define SKIP(val) do { \
231: ctxt->nbChars += (val),ctxt->input->cur += (val); \
1.164 daniel 232: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 233: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
1.168 daniel 234: if ((*ctxt->input->cur == 0) && \
235: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 236: xmlPopInput(ctxt); \
237: } while (0)
1.164 daniel 238:
1.240 veillard 239: #define SHRINK do { \
240: xmlParserInputShrink(ctxt->input); \
1.97 daniel 241: if ((*ctxt->input->cur == 0) && \
242: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 243: xmlPopInput(ctxt); \
244: } while (0)
1.97 daniel 245:
1.240 veillard 246: #define GROW do { \
247: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.97 daniel 248: if ((*ctxt->input->cur == 0) && \
249: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 250: xmlPopInput(ctxt); \
251: } while (0)
1.55 daniel 252:
1.240 veillard 253: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1.154 daniel 254:
1.240 veillard 255: #define NEXT xmlNextChar(ctxt)
1.154 daniel 256:
1.240 veillard 257: #define NEXTL(l) do { \
1.153 daniel 258: if (*(ctxt->input->cur) == '\n') { \
259: ctxt->input->line++; ctxt->input->col = 1; \
260: } else ctxt->input->col++; \
1.154 daniel 261: ctxt->token = 0; ctxt->input->cur += l; \
262: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.240 veillard 263: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
264: } while (0)
1.154 daniel 265:
1.240 veillard 266: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
267: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1.154 daniel 268:
1.152 daniel 269: #define COPY_BUF(l,b,i,v) \
270: if (l == 1) b[i++] = (xmlChar) v; \
1.240 veillard 271: else i += xmlCopyChar(l,&b[i],v)
1.151 daniel 272:
273: /**
1.229 veillard 274: * xmlSkipBlankChars:
1.151 daniel 275: * @ctxt: the XML parser context
276: *
1.229 veillard 277: * skip all blanks character found at that point in the input streams.
278: * It pops up finished entities in the process if allowable at that point.
279: *
280: * Returns the number of space chars skipped
1.151 daniel 281: */
1.55 daniel 282:
1.229 veillard 283: int
284: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
285: int cur, res = 0;
1.201 daniel 286:
1.176 daniel 287: /*
1.229 veillard 288: * It's Okay to use CUR/NEXT here since all the blanks are on
289: * the ASCII range.
290: */
291: do {
292: cur = CUR;
293: while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
294: NEXT;
295: cur = CUR;
296: res++;
1.151 daniel 297: }
1.229 veillard 298: while ((cur == 0) && (ctxt->inputNr > 1) &&
299: (ctxt->instate != XML_PARSER_COMMENT)) {
1.168 daniel 300: xmlPopInput(ctxt);
1.229 veillard 301: cur = CUR;
302: }
1.222 veillard 303: /*
304: * Need to handle support of entities branching here
305: */
1.155 daniel 306: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1.229 veillard 307: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.222 veillard 308: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1.155 daniel 309: return(res);
1.152 daniel 310: }
311:
1.97 daniel 312: /************************************************************************
313: * *
1.229 veillard 314: * Commodity functions to handle entities *
1.97 daniel 315: * *
316: ************************************************************************/
1.40 daniel 317:
1.50 daniel 318: /**
319: * xmlPopInput:
320: * @ctxt: an XML parser context
321: *
1.40 daniel 322: * xmlPopInput: the current input pointed by ctxt->input came to an end
323: * pop it and return the next char.
1.45 daniel 324: *
1.123 daniel 325: * Returns the current xmlChar in the parser context
1.40 daniel 326: */
1.123 daniel 327: xmlChar
1.55 daniel 328: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 329: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.220 veillard 330: if (xmlParserDebugEntities)
1.241 veillard 331: xmlGenericError(xmlGenericErrorContext,
332: "Popping input %d\n", ctxt->inputNr);
1.69 daniel 333: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 334: if ((*ctxt->input->cur == 0) &&
335: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
336: return(xmlPopInput(ctxt));
1.40 daniel 337: return(CUR);
338: }
339:
1.50 daniel 340: /**
1.229 veillard 341: * xmlPushInput:
1.174 daniel 342: * @ctxt: an XML parser context
1.229 veillard 343: * @input: an XML parser input fragment (entity, XML fragment ...).
1.174 daniel 344: *
1.229 veillard 345: * xmlPushInput: switch to a new input stream which is stacked on top
346: * of the previous one(s).
1.174 daniel 347: */
1.229 veillard 348: void
349: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
350: if (input == NULL) return;
1.174 daniel 351:
1.229 veillard 352: if (xmlParserDebugEntities) {
353: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 354: xmlGenericError(xmlGenericErrorContext,
355: "%s(%d): ", ctxt->input->filename,
1.229 veillard 356: ctxt->input->line);
1.241 veillard 357: xmlGenericError(xmlGenericErrorContext,
358: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1.229 veillard 359: }
360: inputPush(ctxt, input);
361: GROW;
1.174 daniel 362: }
1.97 daniel 363:
364: /**
365: * xmlParseCharRef:
366: * @ctxt: an XML parser context
367: *
368: * parse Reference declarations
369: *
370: * [66] CharRef ::= '&#' [0-9]+ ';' |
371: * '&#x' [0-9a-fA-F]+ ';'
372: *
1.98 daniel 373: * [ WFC: Legal Character ]
374: * Characters referred to using character references must match the
375: * production for Char.
376: *
1.135 daniel 377: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 378: */
1.97 daniel 379: int
380: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
381: int val = 0;
1.222 veillard 382: int count = 0;
1.97 daniel 383:
1.111 daniel 384: if (ctxt->token != 0) {
385: val = ctxt->token;
386: ctxt->token = 0;
387: return(val);
388: }
1.222 veillard 389: /*
390: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
391: */
1.152 daniel 392: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 393: (NXT(2) == 'x')) {
394: SKIP(3);
1.222 veillard 395: GROW;
396: while (RAW != ';') { /* loop blocked by count */
397: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 398: val = val * 16 + (CUR - '0');
1.222 veillard 399: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1.97 daniel 400: val = val * 16 + (CUR - 'a') + 10;
1.222 veillard 401: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1.97 daniel 402: val = val * 16 + (CUR - 'A') + 10;
403: else {
1.123 daniel 404: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData,
407: "xmlParseCharRef: invalid hexadecimal value\n");
408: ctxt->wellFormed = 0;
1.180 daniel 409: ctxt->disableSAX = 1;
1.97 daniel 410: val = 0;
411: break;
412: }
413: NEXT;
1.222 veillard 414: count++;
1.97 daniel 415: }
1.164 daniel 416: if (RAW == ';') {
417: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
418: ctxt->nbChars ++;
419: ctxt->input->cur++;
420: }
1.152 daniel 421: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 422: SKIP(2);
1.222 veillard 423: GROW;
424: while (RAW != ';') { /* loop blocked by count */
425: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 426: val = val * 10 + (CUR - '0');
427: else {
1.123 daniel 428: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 429: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
430: ctxt->sax->error(ctxt->userData,
431: "xmlParseCharRef: invalid decimal value\n");
432: ctxt->wellFormed = 0;
1.180 daniel 433: ctxt->disableSAX = 1;
1.97 daniel 434: val = 0;
435: break;
436: }
437: NEXT;
1.222 veillard 438: count++;
1.97 daniel 439: }
1.164 daniel 440: if (RAW == ';') {
441: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
442: ctxt->nbChars ++;
443: ctxt->input->cur++;
444: }
1.97 daniel 445: } else {
1.123 daniel 446: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 448: ctxt->sax->error(ctxt->userData,
449: "xmlParseCharRef: invalid value\n");
1.97 daniel 450: ctxt->wellFormed = 0;
1.180 daniel 451: ctxt->disableSAX = 1;
1.97 daniel 452: }
1.229 veillard 453:
454: /*
455: * [ WFC: Legal Character ]
456: * Characters referred to using character references must match the
457: * production for Char.
458: */
459: if (IS_CHAR(val)) {
460: return(val);
461: } else {
462: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 464: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
465: val);
1.97 daniel 466: ctxt->wellFormed = 0;
1.180 daniel 467: ctxt->disableSAX = 1;
1.97 daniel 468: }
1.229 veillard 469: return(0);
470: }
471:
472: /**
473: * xmlParseStringCharRef:
474: * @ctxt: an XML parser context
475: * @str: a pointer to an index in the string
476: *
477: * parse Reference declarations, variant parsing from a string rather
478: * than an an input flow.
479: *
480: * [66] CharRef ::= '&#' [0-9]+ ';' |
481: * '&#x' [0-9a-fA-F]+ ';'
482: *
483: * [ WFC: Legal Character ]
484: * Characters referred to using character references must match the
485: * production for Char.
486: *
487: * Returns the value parsed (as an int), 0 in case of error, str will be
488: * updated to the current value of the index
489: */
490: int
491: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
492: const xmlChar *ptr;
493: xmlChar cur;
494: int val = 0;
1.98 daniel 495:
1.229 veillard 496: if ((str == NULL) || (*str == NULL)) return(0);
497: ptr = *str;
498: cur = *ptr;
499: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
500: ptr += 3;
501: cur = *ptr;
502: while (cur != ';') { /* Non input consuming loop */
503: if ((cur >= '0') && (cur <= '9'))
504: val = val * 16 + (cur - '0');
505: else if ((cur >= 'a') && (cur <= 'f'))
506: val = val * 16 + (cur - 'a') + 10;
507: else if ((cur >= 'A') && (cur <= 'F'))
508: val = val * 16 + (cur - 'A') + 10;
509: else {
510: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
512: ctxt->sax->error(ctxt->userData,
513: "xmlParseStringCharRef: invalid hexadecimal value\n");
514: ctxt->wellFormed = 0;
515: ctxt->disableSAX = 1;
516: val = 0;
517: break;
518: }
519: ptr++;
520: cur = *ptr;
521: }
522: if (cur == ';')
523: ptr++;
524: } else if ((cur == '&') && (ptr[1] == '#')){
525: ptr += 2;
526: cur = *ptr;
527: while (cur != ';') { /* Non input consuming loops */
528: if ((cur >= '0') && (cur <= '9'))
529: val = val * 10 + (cur - '0');
530: else {
531: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533: ctxt->sax->error(ctxt->userData,
534: "xmlParseStringCharRef: invalid decimal value\n");
535: ctxt->wellFormed = 0;
536: ctxt->disableSAX = 1;
537: val = 0;
538: break;
539: }
540: ptr++;
541: cur = *ptr;
542: }
543: if (cur == ';')
544: ptr++;
545: } else {
546: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 548: ctxt->sax->error(ctxt->userData,
549: "xmlParseCharRef: invalid value\n");
1.97 daniel 550: ctxt->wellFormed = 0;
1.180 daniel 551: ctxt->disableSAX = 1;
1.229 veillard 552: return(0);
1.97 daniel 553: }
1.229 veillard 554: *str = ptr;
1.98 daniel 555:
556: /*
1.229 veillard 557: * [ WFC: Legal Character ]
558: * Characters referred to using character references must match the
559: * production for Char.
1.98 daniel 560: */
1.229 veillard 561: if (IS_CHAR(val)) {
562: return(val);
563: } else {
564: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.98 daniel 565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 566: ctxt->sax->error(ctxt->userData,
567: "CharRef: invalid xmlChar value %d\n", val);
1.98 daniel 568: ctxt->wellFormed = 0;
1.180 daniel 569: ctxt->disableSAX = 1;
1.98 daniel 570: }
1.229 veillard 571: return(0);
1.96 daniel 572: }
573:
574: /**
575: * xmlParserHandlePEReference:
576: * @ctxt: the parser context
577: *
578: * [69] PEReference ::= '%' Name ';'
579: *
1.98 daniel 580: * [ WFC: No Recursion ]
1.229 veillard 581: * A parsed entity must not contain a recursive
1.98 daniel 582: * reference to itself, either directly or indirectly.
583: *
584: * [ WFC: Entity Declared ]
585: * In a document without any DTD, a document with only an internal DTD
586: * subset which contains no parameter entity references, or a document
587: * with "standalone='yes'", ... ... The declaration of a parameter
588: * entity must precede any reference to it...
589: *
590: * [ VC: Entity Declared ]
591: * In a document with an external subset or external parameter entities
592: * with "standalone='no'", ... ... The declaration of a parameter entity
593: * must precede any reference to it...
594: *
595: * [ WFC: In DTD ]
596: * Parameter-entity references may only appear in the DTD.
597: * NOTE: misleading but this is handled.
598: *
599: * A PEReference may have been detected in the current input stream
1.96 daniel 600: * the handling is done accordingly to
601: * http://www.w3.org/TR/REC-xml#entproc
602: * i.e.
603: * - Included in literal in entity values
604: * - Included as Paraemeter Entity reference within DTDs
605: */
606: void
607: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 608: xmlChar *name;
1.96 daniel 609: xmlEntityPtr entity = NULL;
610: xmlParserInputPtr input;
611:
1.126 daniel 612: if (ctxt->token != 0) {
613: return;
614: }
1.152 daniel 615: if (RAW != '%') return;
1.96 daniel 616: switch(ctxt->instate) {
1.109 daniel 617: case XML_PARSER_CDATA_SECTION:
618: return;
1.97 daniel 619: case XML_PARSER_COMMENT:
620: return;
1.140 daniel 621: case XML_PARSER_START_TAG:
622: return;
623: case XML_PARSER_END_TAG:
624: return;
1.96 daniel 625: case XML_PARSER_EOF:
1.123 daniel 626: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
628: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
629: ctxt->wellFormed = 0;
1.180 daniel 630: ctxt->disableSAX = 1;
1.96 daniel 631: return;
632: case XML_PARSER_PROLOG:
1.140 daniel 633: case XML_PARSER_START:
634: case XML_PARSER_MISC:
1.123 daniel 635: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
637: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
638: ctxt->wellFormed = 0;
1.180 daniel 639: ctxt->disableSAX = 1;
1.96 daniel 640: return;
1.97 daniel 641: case XML_PARSER_ENTITY_DECL:
1.96 daniel 642: case XML_PARSER_CONTENT:
643: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 644: case XML_PARSER_PI:
1.168 daniel 645: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 646: /* we just ignore it there */
647: return;
648: case XML_PARSER_EPILOG:
1.123 daniel 649: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 651: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 652: ctxt->wellFormed = 0;
1.180 daniel 653: ctxt->disableSAX = 1;
1.96 daniel 654: return;
1.97 daniel 655: case XML_PARSER_ENTITY_VALUE:
656: /*
657: * NOTE: in the case of entity values, we don't do the
1.127 daniel 658: * substitution here since we need the literal
1.97 daniel 659: * entity value to be able to save the internal
660: * subset of the document.
1.222 veillard 661: * This will be handled by xmlStringDecodeEntities
1.97 daniel 662: */
663: return;
1.96 daniel 664: case XML_PARSER_DTD:
1.98 daniel 665: /*
666: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
667: * In the internal DTD subset, parameter-entity references
668: * can occur only where markup declarations can occur, not
669: * within markup declarations.
670: * In that case this is handled in xmlParseMarkupDecl
671: */
672: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
673: return;
1.245 veillard 674: break;
675: case XML_PARSER_IGNORE:
676: return;
1.96 daniel 677: }
678:
679: NEXT;
680: name = xmlParseName(ctxt);
1.220 veillard 681: if (xmlParserDebugEntities)
1.241 veillard 682: xmlGenericError(xmlGenericErrorContext,
683: "PE Reference: %s\n", name);
1.96 daniel 684: if (name == NULL) {
1.123 daniel 685: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 686: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
688: ctxt->wellFormed = 0;
1.180 daniel 689: ctxt->disableSAX = 1;
1.96 daniel 690: } else {
1.152 daniel 691: if (RAW == ';') {
1.96 daniel 692: NEXT;
1.98 daniel 693: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
694: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 695: if (entity == NULL) {
1.98 daniel 696:
697: /*
698: * [ WFC: Entity Declared ]
699: * In a document without any DTD, a document with only an
700: * internal DTD subset which contains no parameter entity
701: * references, or a document with "standalone='yes'", ...
702: * ... The declaration of a parameter entity must precede
703: * any reference to it...
704: */
705: if ((ctxt->standalone == 1) ||
706: ((ctxt->hasExternalSubset == 0) &&
707: (ctxt->hasPErefs == 0))) {
708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
709: ctxt->sax->error(ctxt->userData,
710: "PEReference: %%%s; not found\n", name);
711: ctxt->wellFormed = 0;
1.180 daniel 712: ctxt->disableSAX = 1;
1.98 daniel 713: } else {
714: /*
715: * [ VC: Entity Declared ]
716: * In a document with an external subset or external
717: * parameter entities with "standalone='no'", ...
718: * ... The declaration of a parameter entity must precede
719: * any reference to it...
720: */
1.220 veillard 721: if ((!ctxt->disableSAX) &&
722: (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1.212 veillard 723: ctxt->vctxt.error(ctxt->vctxt.userData,
724: "PEReference: %%%s; not found\n", name);
1.220 veillard 725: } else if ((!ctxt->disableSAX) &&
726: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 727: ctxt->sax->warning(ctxt->userData,
728: "PEReference: %%%s; not found\n", name);
729: ctxt->valid = 0;
730: }
1.96 daniel 731: } else {
1.159 daniel 732: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
733: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 734: /*
1.229 veillard 735: * handle the extra spaces added before and after
1.96 daniel 736: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.229 veillard 737: * this is done independantly.
1.96 daniel 738: */
739: input = xmlNewEntityInputStream(ctxt, entity);
740: xmlPushInput(ctxt, input);
1.164 daniel 741: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
742: (RAW == '<') && (NXT(1) == '?') &&
743: (NXT(2) == 'x') && (NXT(3) == 'm') &&
744: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 745: xmlParseTextDecl(ctxt);
1.164 daniel 746: }
747: if (ctxt->token == 0)
748: ctxt->token = ' ';
1.96 daniel 749: } else {
750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
751: ctxt->sax->error(ctxt->userData,
752: "xmlHandlePEReference: %s is not a parameter entity\n",
753: name);
754: ctxt->wellFormed = 0;
1.180 daniel 755: ctxt->disableSAX = 1;
1.96 daniel 756: }
757: }
758: } else {
1.123 daniel 759: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
761: ctxt->sax->error(ctxt->userData,
762: "xmlHandlePEReference: expecting ';'\n");
763: ctxt->wellFormed = 0;
1.180 daniel 764: ctxt->disableSAX = 1;
1.96 daniel 765: }
1.119 daniel 766: xmlFree(name);
1.97 daniel 767: }
768: }
769:
770: /*
771: * Macro used to grow the current buffer.
772: */
773: #define growBuffer(buffer) { \
774: buffer##_size *= 2; \
1.145 daniel 775: buffer = (xmlChar *) \
776: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 777: if (buffer == NULL) { \
778: perror("realloc failed"); \
1.145 daniel 779: return(NULL); \
1.97 daniel 780: } \
1.96 daniel 781: }
1.77 daniel 782:
783: /**
1.135 daniel 784: * xmlStringDecodeEntities:
785: * @ctxt: the parser context
786: * @str: the input string
787: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
788: * @end: an end marker xmlChar, 0 if none
789: * @end2: an end marker xmlChar, 0 if none
790: * @end3: an end marker xmlChar, 0 if none
791: *
1.222 veillard 792: * Takes a entity string content and process to do the adequate subtitutions.
793: *
1.135 daniel 794: * [67] Reference ::= EntityRef | CharRef
795: *
796: * [69] PEReference ::= '%' Name ';'
797: *
798: * Returns A newly allocated string with the substitution done. The caller
799: * must deallocate it !
800: */
801: xmlChar *
802: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
803: xmlChar end, xmlChar end2, xmlChar end3) {
804: xmlChar *buffer = NULL;
805: int buffer_size = 0;
806:
807: xmlChar *current = NULL;
808: xmlEntityPtr ent;
1.176 daniel 809: int c,l;
810: int nbchars = 0;
1.135 daniel 811:
1.211 veillard 812: if (str == NULL)
813: return(NULL);
814:
1.185 daniel 815: if (ctxt->depth > 40) {
1.230 veillard 816: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
818: ctxt->sax->error(ctxt->userData,
819: "Detected entity reference loop\n");
820: ctxt->wellFormed = 0;
821: ctxt->disableSAX = 1;
822: return(NULL);
823: }
824:
1.135 daniel 825: /*
826: * allocate a translation buffer.
827: */
1.140 daniel 828: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 829: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
830: if (buffer == NULL) {
831: perror("xmlDecodeEntities: malloc failed");
832: return(NULL);
833: }
834:
835: /*
836: * Ok loop until we reach one of the ending char or a size limit.
1.222 veillard 837: * we are operating on already parsed values.
1.135 daniel 838: */
1.176 daniel 839: c = CUR_SCHAR(str, l);
1.222 veillard 840: while ((c != 0) && (c != end) && /* non input consuming loop */
841: (c != end2) && (c != end3)) {
1.135 daniel 842:
1.176 daniel 843: if (c == 0) break;
844: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 845: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 846: if (val != 0) {
847: COPY_BUF(0,buffer,nbchars,val);
848: }
849: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.220 veillard 850: if (xmlParserDebugEntities)
1.241 veillard 851: xmlGenericError(xmlGenericErrorContext,
852: "String decoding Entity Reference: %.30s\n",
1.220 veillard 853: str);
1.135 daniel 854: ent = xmlParseStringEntityRef(ctxt, &str);
1.222 veillard 855: if ((ent != NULL) &&
856: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.219 veillard 857: if (ent->content != NULL) {
858: COPY_BUF(0,buffer,nbchars,ent->content[0]);
859: } else {
860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
861: ctxt->sax->error(ctxt->userData,
862: "internal error entity has no content\n");
863: }
864: } else if ((ent != NULL) && (ent->content != NULL)) {
1.185 daniel 865: xmlChar *rep;
866:
867: ctxt->depth++;
868: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
869: 0, 0, 0);
870: ctxt->depth--;
871: if (rep != NULL) {
872: current = rep;
1.222 veillard 873: while (*current != 0) { /* non input consuming loop */
1.185 daniel 874: buffer[nbchars++] = *current++;
875: if (nbchars >
876: buffer_size - XML_PARSER_BUFFER_SIZE) {
877: growBuffer(buffer);
878: }
1.135 daniel 879: }
1.185 daniel 880: xmlFree(rep);
1.135 daniel 881: }
882: } else if (ent != NULL) {
883: int i = xmlStrlen(ent->name);
884: const xmlChar *cur = ent->name;
885:
1.176 daniel 886: buffer[nbchars++] = '&';
887: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 888: growBuffer(buffer);
889: }
890: for (;i > 0;i--)
1.176 daniel 891: buffer[nbchars++] = *cur++;
892: buffer[nbchars++] = ';';
1.135 daniel 893: }
1.176 daniel 894: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.220 veillard 895: if (xmlParserDebugEntities)
1.241 veillard 896: xmlGenericError(xmlGenericErrorContext,
897: "String decoding PE Reference: %.30s\n", str);
1.135 daniel 898: ent = xmlParseStringPEReference(ctxt, &str);
899: if (ent != NULL) {
1.185 daniel 900: xmlChar *rep;
901:
902: ctxt->depth++;
903: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
904: 0, 0, 0);
905: ctxt->depth--;
906: if (rep != NULL) {
907: current = rep;
1.222 veillard 908: while (*current != 0) { /* non input consuming loop */
1.185 daniel 909: buffer[nbchars++] = *current++;
910: if (nbchars >
911: buffer_size - XML_PARSER_BUFFER_SIZE) {
912: growBuffer(buffer);
913: }
1.135 daniel 914: }
1.185 daniel 915: xmlFree(rep);
1.135 daniel 916: }
917: }
918: } else {
1.176 daniel 919: COPY_BUF(l,buffer,nbchars,c);
920: str += l;
921: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 922: growBuffer(buffer);
923: }
924: }
1.176 daniel 925: c = CUR_SCHAR(str, l);
1.135 daniel 926: }
1.229 veillard 927: buffer[nbchars++] = 0;
928: return(buffer);
1.172 daniel 929: }
930:
1.229 veillard 931:
932: /************************************************************************
933: * *
1.123 daniel 934: * Commodity functions to handle xmlChars *
1.28 daniel 935: * *
936: ************************************************************************/
937:
1.50 daniel 938: /**
939: * xmlStrndup:
1.123 daniel 940: * @cur: the input xmlChar *
1.50 daniel 941: * @len: the len of @cur
942: *
1.123 daniel 943: * a strndup for array of xmlChar's
1.68 daniel 944: *
1.123 daniel 945: * Returns a new xmlChar * or NULL
1.1 veillard 946: */
1.123 daniel 947: xmlChar *
948: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 949: xmlChar *ret;
950:
951: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 952: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 953: if (ret == NULL) {
1.241 veillard 954: xmlGenericError(xmlGenericErrorContext,
955: "malloc of %ld byte failed\n",
1.123 daniel 956: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 957: return(NULL);
958: }
1.123 daniel 959: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 960: ret[len] = 0;
961: return(ret);
962: }
963:
1.50 daniel 964: /**
965: * xmlStrdup:
1.123 daniel 966: * @cur: the input xmlChar *
1.50 daniel 967: *
1.152 daniel 968: * a strdup for array of xmlChar's. Since they are supposed to be
969: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
970: * a termination mark of '0'.
1.68 daniel 971: *
1.123 daniel 972: * Returns a new xmlChar * or NULL
1.1 veillard 973: */
1.123 daniel 974: xmlChar *
975: xmlStrdup(const xmlChar *cur) {
976: const xmlChar *p = cur;
1.1 veillard 977:
1.135 daniel 978: if (cur == NULL) return(NULL);
1.222 veillard 979: while (*p != 0) p++; /* non input consuming */
1.1 veillard 980: return(xmlStrndup(cur, p - cur));
981: }
982:
1.50 daniel 983: /**
984: * xmlCharStrndup:
985: * @cur: the input char *
986: * @len: the len of @cur
987: *
1.123 daniel 988: * a strndup for char's to xmlChar's
1.68 daniel 989: *
1.123 daniel 990: * Returns a new xmlChar * or NULL
1.45 daniel 991: */
992:
1.123 daniel 993: xmlChar *
1.55 daniel 994: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 995: int i;
1.135 daniel 996: xmlChar *ret;
997:
998: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 999: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 1000: if (ret == NULL) {
1.241 veillard 1001: xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1.123 daniel 1002: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1003: return(NULL);
1004: }
1005: for (i = 0;i < len;i++)
1.123 daniel 1006: ret[i] = (xmlChar) cur[i];
1.45 daniel 1007: ret[len] = 0;
1008: return(ret);
1009: }
1010:
1.50 daniel 1011: /**
1012: * xmlCharStrdup:
1013: * @cur: the input char *
1014: * @len: the len of @cur
1015: *
1.123 daniel 1016: * a strdup for char's to xmlChar's
1.68 daniel 1017: *
1.123 daniel 1018: * Returns a new xmlChar * or NULL
1.45 daniel 1019: */
1020:
1.123 daniel 1021: xmlChar *
1.55 daniel 1022: xmlCharStrdup(const char *cur) {
1.45 daniel 1023: const char *p = cur;
1024:
1.135 daniel 1025: if (cur == NULL) return(NULL);
1.222 veillard 1026: while (*p != '\0') p++; /* non input consuming */
1.45 daniel 1027: return(xmlCharStrndup(cur, p - cur));
1028: }
1029:
1.50 daniel 1030: /**
1031: * xmlStrcmp:
1.123 daniel 1032: * @str1: the first xmlChar *
1033: * @str2: the second xmlChar *
1.50 daniel 1034: *
1.123 daniel 1035: * a strcmp for xmlChar's
1.68 daniel 1036: *
1037: * Returns the integer result of the comparison
1.14 veillard 1038: */
1039:
1.55 daniel 1040: int
1.123 daniel 1041: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1042: register int tmp;
1043:
1.229 veillard 1044: if (str1 == str2) return(0);
1.135 daniel 1045: if (str1 == NULL) return(-1);
1046: if (str2 == NULL) return(1);
1.14 veillard 1047: do {
1.232 veillard 1048: tmp = *str1++ - *str2;
1.14 veillard 1049: if (tmp != 0) return(tmp);
1.232 veillard 1050: } while (*str2++ != 0);
1051: return 0;
1.14 veillard 1052: }
1053:
1.50 daniel 1054: /**
1.236 veillard 1055: * xmlStrEqual:
1056: * @str1: the first xmlChar *
1057: * @str2: the second xmlChar *
1058: *
1059: * Check if both string are equal of have same content
1060: * Should be a bit more readable and faster than xmlStrEqual()
1061: *
1062: * Returns 1 if they are equal, 0 if they are different
1063: */
1064:
1065: int
1066: xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1067: if (str1 == str2) return(1);
1068: if (str1 == NULL) return(0);
1069: if (str2 == NULL) return(0);
1070: do {
1071: if (*str1++ != *str2) return(0);
1072: } while (*str2++);
1073: return(1);
1074: }
1075:
1076: /**
1.50 daniel 1077: * xmlStrncmp:
1.123 daniel 1078: * @str1: the first xmlChar *
1079: * @str2: the second xmlChar *
1.50 daniel 1080: * @len: the max comparison length
1081: *
1.123 daniel 1082: * a strncmp for xmlChar's
1.68 daniel 1083: *
1084: * Returns the integer result of the comparison
1.14 veillard 1085: */
1086:
1.55 daniel 1087: int
1.123 daniel 1088: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1089: register int tmp;
1090:
1091: if (len <= 0) return(0);
1.232 veillard 1092: if (str1 == str2) return(0);
1.135 daniel 1093: if (str1 == NULL) return(-1);
1094: if (str2 == NULL) return(1);
1.14 veillard 1095: do {
1.232 veillard 1096: tmp = *str1++ - *str2;
1097: if (tmp != 0 || --len == 0) return(tmp);
1098: } while (*str2++ != 0);
1099: return 0;
1100: }
1101:
1102: static xmlChar casemap[256] = {
1103: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1104: 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1105: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1106: 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1107: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1108: 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1109: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1110: 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1111: 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1112: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1113: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1114: 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1115: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1116: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1117: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1118: 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1119: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1120: 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1121: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1122: 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1123: 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1124: 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1125: 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1126: 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1127: 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1128: 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1129: 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1130: 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1131: 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1132: 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1133: 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1134: 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1135: };
1136:
1137: /**
1138: * xmlStrcasecmp:
1139: * @str1: the first xmlChar *
1140: * @str2: the second xmlChar *
1141: *
1142: * a strcasecmp for xmlChar's
1143: *
1144: * Returns the integer result of the comparison
1145: */
1146:
1147: int
1148: xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1149: register int tmp;
1150:
1151: if (str1 == str2) return(0);
1152: if (str1 == NULL) return(-1);
1153: if (str2 == NULL) return(1);
1154: do {
1155: tmp = casemap[*str1++] - casemap[*str2];
1.14 veillard 1156: if (tmp != 0) return(tmp);
1.232 veillard 1157: } while (*str2++ != 0);
1158: return 0;
1159: }
1160:
1161: /**
1162: * xmlStrncasecmp:
1163: * @str1: the first xmlChar *
1164: * @str2: the second xmlChar *
1165: * @len: the max comparison length
1166: *
1167: * a strncasecmp for xmlChar's
1168: *
1169: * Returns the integer result of the comparison
1170: */
1171:
1172: int
1173: xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1174: register int tmp;
1175:
1176: if (len <= 0) return(0);
1177: if (str1 == str2) return(0);
1178: if (str1 == NULL) return(-1);
1179: if (str2 == NULL) return(1);
1180: do {
1181: tmp = casemap[*str1++] - casemap[*str2];
1182: if (tmp != 0 || --len == 0) return(tmp);
1183: } while (*str2++ != 0);
1184: return 0;
1.14 veillard 1185: }
1186:
1.50 daniel 1187: /**
1188: * xmlStrchr:
1.123 daniel 1189: * @str: the xmlChar * array
1190: * @val: the xmlChar to search
1.50 daniel 1191: *
1.123 daniel 1192: * a strchr for xmlChar's
1.68 daniel 1193: *
1.123 daniel 1194: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1195: */
1196:
1.123 daniel 1197: const xmlChar *
1198: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 1199: if (str == NULL) return(NULL);
1.222 veillard 1200: while (*str != 0) { /* non input consuming */
1.123 daniel 1201: if (*str == val) return((xmlChar *) str);
1.14 veillard 1202: str++;
1203: }
1204: return(NULL);
1.89 daniel 1205: }
1206:
1207: /**
1208: * xmlStrstr:
1.123 daniel 1209: * @str: the xmlChar * array (haystack)
1210: * @val: the xmlChar to search (needle)
1.89 daniel 1211: *
1.123 daniel 1212: * a strstr for xmlChar's
1.89 daniel 1213: *
1.123 daniel 1214: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1215: */
1216:
1.123 daniel 1217: const xmlChar *
1218: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1219: int n;
1220:
1221: if (str == NULL) return(NULL);
1222: if (val == NULL) return(NULL);
1223: n = xmlStrlen(val);
1224:
1225: if (n == 0) return(str);
1.222 veillard 1226: while (*str != 0) { /* non input consuming */
1.89 daniel 1227: if (*str == *val) {
1.123 daniel 1228: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1229: }
1.232 veillard 1230: str++;
1231: }
1232: return(NULL);
1233: }
1234:
1235: /**
1236: * xmlStrcasestr:
1237: * @str: the xmlChar * array (haystack)
1238: * @val: the xmlChar to search (needle)
1239: *
1240: * a case-ignoring strstr for xmlChar's
1241: *
1242: * Returns the xmlChar * for the first occurence or NULL.
1243: */
1244:
1245: const xmlChar *
1246: xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1247: int n;
1248:
1249: if (str == NULL) return(NULL);
1250: if (val == NULL) return(NULL);
1251: n = xmlStrlen(val);
1252:
1253: if (n == 0) return(str);
1254: while (*str != 0) { /* non input consuming */
1255: if (casemap[*str] == casemap[*val])
1256: if (!xmlStrncasecmp(str, val, n)) return(str);
1.89 daniel 1257: str++;
1258: }
1259: return(NULL);
1260: }
1261:
1262: /**
1263: * xmlStrsub:
1.123 daniel 1264: * @str: the xmlChar * array (haystack)
1.89 daniel 1265: * @start: the index of the first char (zero based)
1266: * @len: the length of the substring
1267: *
1268: * Extract a substring of a given string
1269: *
1.123 daniel 1270: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1271: */
1272:
1.123 daniel 1273: xmlChar *
1274: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1275: int i;
1276:
1277: if (str == NULL) return(NULL);
1278: if (start < 0) return(NULL);
1.90 daniel 1279: if (len < 0) return(NULL);
1.89 daniel 1280:
1281: for (i = 0;i < start;i++) {
1282: if (*str == 0) return(NULL);
1283: str++;
1284: }
1285: if (*str == 0) return(NULL);
1286: return(xmlStrndup(str, len));
1.14 veillard 1287: }
1.28 daniel 1288:
1.50 daniel 1289: /**
1290: * xmlStrlen:
1.123 daniel 1291: * @str: the xmlChar * array
1.50 daniel 1292: *
1.127 daniel 1293: * length of a xmlChar's string
1.68 daniel 1294: *
1.123 daniel 1295: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1296: */
1297:
1.55 daniel 1298: int
1.123 daniel 1299: xmlStrlen(const xmlChar *str) {
1.45 daniel 1300: int len = 0;
1301:
1302: if (str == NULL) return(0);
1.222 veillard 1303: while (*str != 0) { /* non input consuming */
1.45 daniel 1304: str++;
1305: len++;
1306: }
1307: return(len);
1308: }
1309:
1.50 daniel 1310: /**
1311: * xmlStrncat:
1.123 daniel 1312: * @cur: the original xmlChar * array
1313: * @add: the xmlChar * array added
1.50 daniel 1314: * @len: the length of @add
1315: *
1.123 daniel 1316: * a strncat for array of xmlChar's
1.68 daniel 1317: *
1.123 daniel 1318: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1319: */
1320:
1.123 daniel 1321: xmlChar *
1322: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1323: int size;
1.123 daniel 1324: xmlChar *ret;
1.45 daniel 1325:
1326: if ((add == NULL) || (len == 0))
1327: return(cur);
1328: if (cur == NULL)
1329: return(xmlStrndup(add, len));
1330:
1331: size = xmlStrlen(cur);
1.204 veillard 1332: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1333: if (ret == NULL) {
1.241 veillard 1334: xmlGenericError(xmlGenericErrorContext,
1335: "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1336: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1337: return(cur);
1338: }
1.123 daniel 1339: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1340: ret[size + len] = 0;
1341: return(ret);
1342: }
1343:
1.50 daniel 1344: /**
1345: * xmlStrcat:
1.123 daniel 1346: * @cur: the original xmlChar * array
1347: * @add: the xmlChar * array added
1.50 daniel 1348: *
1.152 daniel 1349: * a strcat for array of xmlChar's. Since they are supposed to be
1350: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1351: * a termination mark of '0'.
1.68 daniel 1352: *
1.123 daniel 1353: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1354: */
1.123 daniel 1355: xmlChar *
1356: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1357: const xmlChar *p = add;
1.45 daniel 1358:
1359: if (add == NULL) return(cur);
1360: if (cur == NULL)
1361: return(xmlStrdup(add));
1362:
1.222 veillard 1363: while (*p != 0) p++; /* non input consuming */
1.45 daniel 1364: return(xmlStrncat(cur, add, p - add));
1365: }
1366:
1367: /************************************************************************
1368: * *
1369: * Commodity functions, cleanup needed ? *
1370: * *
1371: ************************************************************************/
1372:
1.50 daniel 1373: /**
1374: * areBlanks:
1375: * @ctxt: an XML parser context
1.123 daniel 1376: * @str: a xmlChar *
1.50 daniel 1377: * @len: the size of @str
1378: *
1.45 daniel 1379: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1380: *
1.68 daniel 1381: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1382: */
1383:
1.123 daniel 1384: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1385: int i, ret;
1.45 daniel 1386: xmlNodePtr lastChild;
1387:
1.176 daniel 1388: /*
1389: * Check for xml:space value.
1390: */
1391: if (*(ctxt->space) == 1)
1392: return(0);
1393:
1394: /*
1395: * Check that the string is made of blanks
1396: */
1.45 daniel 1397: for (i = 0;i < len;i++)
1398: if (!(IS_BLANK(str[i]))) return(0);
1399:
1.176 daniel 1400: /*
1401: * Look if the element is mixed content in the Dtd if available
1402: */
1.104 daniel 1403: if (ctxt->myDoc != NULL) {
1404: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1405: if (ret == 0) return(1);
1406: if (ret == 1) return(0);
1407: }
1.176 daniel 1408:
1.104 daniel 1409: /*
1.176 daniel 1410: * Otherwise, heuristic :-\
1.104 daniel 1411: */
1.179 daniel 1412: if (ctxt->keepBlanks)
1413: return(0);
1414: if (RAW != '<') return(0);
1415: if (ctxt->node == NULL) return(0);
1416: if ((ctxt->node->children == NULL) &&
1417: (RAW == '<') && (NXT(1) == '/')) return(0);
1418:
1.45 daniel 1419: lastChild = xmlGetLastChild(ctxt->node);
1420: if (lastChild == NULL) {
1421: if (ctxt->node->content != NULL) return(0);
1422: } else if (xmlNodeIsText(lastChild))
1423: return(0);
1.157 daniel 1424: else if ((ctxt->node->children != NULL) &&
1425: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 1426: return(0);
1.45 daniel 1427: return(1);
1428: }
1429:
1430: /*
1431: * Forward definition for recusive behaviour.
1432: */
1.77 daniel 1433: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1434: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1435:
1.28 daniel 1436: /************************************************************************
1437: * *
1438: * Extra stuff for namespace support *
1439: * Relates to http://www.w3.org/TR/WD-xml-names *
1440: * *
1441: ************************************************************************/
1442:
1.50 daniel 1443: /**
1.72 daniel 1444: * xmlSplitQName:
1.162 daniel 1445: * @ctxt: an XML parser context
1.72 daniel 1446: * @name: an XML parser context
1.123 daniel 1447: * @prefix: a xmlChar **
1.72 daniel 1448: *
1.206 veillard 1449: * parse an UTF8 encoded XML qualified name string
1.72 daniel 1450: *
1451: * [NS 5] QName ::= (Prefix ':')? LocalPart
1452: *
1453: * [NS 6] Prefix ::= NCName
1454: *
1455: * [NS 7] LocalPart ::= NCName
1456: *
1.127 daniel 1457: * Returns the local part, and prefix is updated
1.72 daniel 1458: * to get the Prefix if any.
1459: */
1460:
1.123 daniel 1461: xmlChar *
1.162 daniel 1462: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1463: xmlChar buf[XML_MAX_NAMELEN + 5];
1.222 veillard 1464: xmlChar *buffer = NULL;
1.162 daniel 1465: int len = 0;
1.222 veillard 1466: int max = XML_MAX_NAMELEN;
1.123 daniel 1467: xmlChar *ret = NULL;
1468: const xmlChar *cur = name;
1.206 veillard 1469: int c;
1.72 daniel 1470:
1471: *prefix = NULL;
1.113 daniel 1472:
1473: /* xml: prefix is not really a namespace */
1474: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1475: (cur[2] == 'l') && (cur[3] == ':'))
1476: return(xmlStrdup(name));
1477:
1.162 daniel 1478: /* nasty but valid */
1479: if (cur[0] == ':')
1480: return(xmlStrdup(name));
1481:
1.206 veillard 1482: c = *cur++;
1.222 veillard 1483: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1.206 veillard 1484: buf[len++] = c;
1485: c = *cur++;
1.162 daniel 1486: }
1.222 veillard 1487: if (len >= max) {
1488: /*
1489: * Okay someone managed to make a huge name, so he's ready to pay
1490: * for the processing speed.
1491: */
1492: max = len * 2;
1493:
1494: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1495: if (buffer == NULL) {
1496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1497: ctxt->sax->error(ctxt->userData,
1498: "xmlSplitQName: out of memory\n");
1499: return(NULL);
1500: }
1501: memcpy(buffer, buf, len);
1502: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1503: if (len + 10 > max) {
1504: max *= 2;
1505: buffer = (xmlChar *) xmlRealloc(buffer,
1506: max * sizeof(xmlChar));
1507: if (buffer == NULL) {
1508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1509: ctxt->sax->error(ctxt->userData,
1510: "xmlSplitQName: out of memory\n");
1511: return(NULL);
1512: }
1513: }
1514: buffer[len++] = c;
1515: c = *cur++;
1516: }
1517: buffer[len] = 0;
1518: }
1.72 daniel 1519:
1.222 veillard 1520: if (buffer == NULL)
1521: ret = xmlStrndup(buf, len);
1522: else {
1523: ret = buffer;
1524: buffer = NULL;
1525: max = XML_MAX_NAMELEN;
1526: }
1527:
1.72 daniel 1528:
1.162 daniel 1529: if (c == ':') {
1.206 veillard 1530: c = *cur++;
1531: if (c == 0) return(ret);
1.72 daniel 1532: *prefix = ret;
1.162 daniel 1533: len = 0;
1.72 daniel 1534:
1.222 veillard 1535: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1.206 veillard 1536: buf[len++] = c;
1537: c = *cur++;
1.162 daniel 1538: }
1.222 veillard 1539: if (len >= max) {
1540: /*
1541: * Okay someone managed to make a huge name, so he's ready to pay
1542: * for the processing speed.
1543: */
1.229 veillard 1544: max = len * 2;
1545:
1546: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1547: if (buffer == NULL) {
1.55 daniel 1548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 1549: ctxt->sax->error(ctxt->userData,
1.229 veillard 1550: "xmlSplitQName: out of memory\n");
1551: return(NULL);
1552: }
1553: memcpy(buffer, buf, len);
1554: while (c != 0) { /* tested bigname2.xml */
1555: if (len + 10 > max) {
1556: max *= 2;
1557: buffer = (xmlChar *) xmlRealloc(buffer,
1558: max * sizeof(xmlChar));
1559: if (buffer == NULL) {
1560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561: ctxt->sax->error(ctxt->userData,
1562: "xmlSplitQName: out of memory\n");
1563: return(NULL);
1564: }
1565: }
1566: buffer[len++] = c;
1567: c = *cur++;
1.122 daniel 1568: }
1.229 veillard 1569: buffer[len] = 0;
1570: }
1571:
1572: if (buffer == NULL)
1573: ret = xmlStrndup(buf, len);
1574: else {
1575: ret = buffer;
1576: }
1.45 daniel 1577: }
1578:
1.229 veillard 1579: return(ret);
1.45 daniel 1580: }
1581:
1.28 daniel 1582: /************************************************************************
1583: * *
1584: * The parser itself *
1585: * Relates to http://www.w3.org/TR/REC-xml *
1586: * *
1587: ************************************************************************/
1.14 veillard 1588:
1.50 daniel 1589: /**
1590: * xmlParseName:
1591: * @ctxt: an XML parser context
1592: *
1593: * parse an XML name.
1.22 daniel 1594: *
1595: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1596: * CombiningChar | Extender
1597: *
1598: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1599: *
1600: * [6] Names ::= Name (S Name)*
1.68 daniel 1601: *
1602: * Returns the Name parsed or NULL
1.1 veillard 1603: */
1604:
1.123 daniel 1605: xmlChar *
1.55 daniel 1606: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 1607: xmlChar buf[XML_MAX_NAMELEN + 5];
1608: int len = 0, l;
1609: int c;
1.222 veillard 1610: int count = 0;
1.1 veillard 1611:
1.91 daniel 1612: GROW;
1.160 daniel 1613: c = CUR_CHAR(l);
1.190 daniel 1614: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1615: (!IS_LETTER(c) && (c != '_') &&
1616: (c != ':'))) {
1.91 daniel 1617: return(NULL);
1618: }
1.40 daniel 1619:
1.222 veillard 1620: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1.190 daniel 1621: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1622: (c == '.') || (c == '-') ||
1623: (c == '_') || (c == ':') ||
1624: (IS_COMBINING(c)) ||
1625: (IS_EXTENDER(c)))) {
1.222 veillard 1626: if (count++ > 100) {
1627: count = 0;
1628: GROW;
1629: }
1.160 daniel 1630: COPY_BUF(l,buf,len,c);
1631: NEXTL(l);
1632: c = CUR_CHAR(l);
1.91 daniel 1633: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1634: /*
1635: * Okay someone managed to make a huge name, so he's ready to pay
1636: * for the processing speed.
1637: */
1638: xmlChar *buffer;
1639: int max = len * 2;
1640:
1641: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1642: if (buffer == NULL) {
1643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644: ctxt->sax->error(ctxt->userData,
1645: "xmlParseName: out of memory\n");
1646: return(NULL);
1647: }
1648: memcpy(buffer, buf, len);
1649: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1.160 daniel 1650: (c == '.') || (c == '-') ||
1651: (c == '_') || (c == ':') ||
1652: (IS_COMBINING(c)) ||
1653: (IS_EXTENDER(c))) {
1.222 veillard 1654: if (count++ > 100) {
1655: count = 0;
1656: GROW;
1657: }
1658: if (len + 10 > max) {
1659: max *= 2;
1660: buffer = (xmlChar *) xmlRealloc(buffer,
1661: max * sizeof(xmlChar));
1662: if (buffer == NULL) {
1663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664: ctxt->sax->error(ctxt->userData,
1665: "xmlParseName: out of memory\n");
1666: return(NULL);
1667: }
1668: }
1669: COPY_BUF(l,buffer,len,c);
1.160 daniel 1670: NEXTL(l);
1671: c = CUR_CHAR(l);
1.97 daniel 1672: }
1.222 veillard 1673: buffer[len] = 0;
1674: return(buffer);
1.91 daniel 1675: }
1676: }
1677: return(xmlStrndup(buf, len));
1.22 daniel 1678: }
1679:
1.50 daniel 1680: /**
1.135 daniel 1681: * xmlParseStringName:
1682: * @ctxt: an XML parser context
1.229 veillard 1683: * @str: a pointer to the string pointer (IN/OUT)
1.135 daniel 1684: *
1685: * parse an XML name.
1686: *
1687: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1688: * CombiningChar | Extender
1689: *
1690: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1691: *
1692: * [6] Names ::= Name (S Name)*
1693: *
1694: * Returns the Name parsed or NULL. The str pointer
1695: * is updated to the current location in the string.
1696: */
1697:
1698: xmlChar *
1699: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 1700: xmlChar buf[XML_MAX_NAMELEN + 5];
1701: const xmlChar *cur = *str;
1702: int len = 0, l;
1703: int c;
1.135 daniel 1704:
1.176 daniel 1705: c = CUR_SCHAR(cur, l);
1706: if (!IS_LETTER(c) && (c != '_') &&
1707: (c != ':')) {
1.135 daniel 1708: return(NULL);
1709: }
1710:
1.222 veillard 1711: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1712: (c == '.') || (c == '-') ||
1713: (c == '_') || (c == ':') ||
1714: (IS_COMBINING(c)) ||
1715: (IS_EXTENDER(c))) {
1716: COPY_BUF(l,buf,len,c);
1717: cur += l;
1718: c = CUR_SCHAR(cur, l);
1.222 veillard 1719: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1720: /*
1721: * Okay someone managed to make a huge name, so he's ready to pay
1722: * for the processing speed.
1723: */
1724: xmlChar *buffer;
1725: int max = len * 2;
1726:
1727: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1728: if (buffer == NULL) {
1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt->userData,
1731: "xmlParseStringName: out of memory\n");
1732: return(NULL);
1733: }
1734: memcpy(buffer, buf, len);
1735: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1736: (c == '.') || (c == '-') ||
1737: (c == '_') || (c == ':') ||
1738: (IS_COMBINING(c)) ||
1739: (IS_EXTENDER(c))) {
1.222 veillard 1740: if (len + 10 > max) {
1741: max *= 2;
1742: buffer = (xmlChar *) xmlRealloc(buffer,
1743: max * sizeof(xmlChar));
1744: if (buffer == NULL) {
1745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1746: ctxt->sax->error(ctxt->userData,
1747: "xmlParseStringName: out of memory\n");
1748: return(NULL);
1749: }
1750: }
1751: COPY_BUF(l,buffer,len,c);
1.176 daniel 1752: cur += l;
1753: c = CUR_SCHAR(cur, l);
1754: }
1.222 veillard 1755: buffer[len] = 0;
1756: *str = cur;
1757: return(buffer);
1.176 daniel 1758: }
1.135 daniel 1759: }
1.176 daniel 1760: *str = cur;
1761: return(xmlStrndup(buf, len));
1.135 daniel 1762: }
1763:
1764: /**
1.50 daniel 1765: * xmlParseNmtoken:
1766: * @ctxt: an XML parser context
1767: *
1768: * parse an XML Nmtoken.
1.22 daniel 1769: *
1770: * [7] Nmtoken ::= (NameChar)+
1771: *
1772: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1773: *
1774: * Returns the Nmtoken parsed or NULL
1.22 daniel 1775: */
1776:
1.123 daniel 1777: xmlChar *
1.55 daniel 1778: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.222 veillard 1779: xmlChar buf[XML_MAX_NAMELEN + 5];
1780: int len = 0, l;
1781: int c;
1782: int count = 0;
1.22 daniel 1783:
1.91 daniel 1784: GROW;
1.160 daniel 1785: c = CUR_CHAR(l);
1.222 veillard 1786:
1787: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1788: (c == '.') || (c == '-') ||
1789: (c == '_') || (c == ':') ||
1790: (IS_COMBINING(c)) ||
1791: (IS_EXTENDER(c))) {
1.222 veillard 1792: if (count++ > 100) {
1793: count = 0;
1794: GROW;
1795: }
1.160 daniel 1796: COPY_BUF(l,buf,len,c);
1797: NEXTL(l);
1798: c = CUR_CHAR(l);
1.91 daniel 1799: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1800: /*
1801: * Okay someone managed to make a huge token, so he's ready to pay
1802: * for the processing speed.
1803: */
1804: xmlChar *buffer;
1805: int max = len * 2;
1806:
1807: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1808: if (buffer == NULL) {
1809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1810: ctxt->sax->error(ctxt->userData,
1811: "xmlParseNmtoken: out of memory\n");
1812: return(NULL);
1813: }
1814: memcpy(buffer, buf, len);
1815: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1816: (c == '.') || (c == '-') ||
1817: (c == '_') || (c == ':') ||
1818: (IS_COMBINING(c)) ||
1819: (IS_EXTENDER(c))) {
1.222 veillard 1820: if (count++ > 100) {
1821: count = 0;
1822: GROW;
1823: }
1824: if (len + 10 > max) {
1825: max *= 2;
1826: buffer = (xmlChar *) xmlRealloc(buffer,
1827: max * sizeof(xmlChar));
1828: if (buffer == NULL) {
1829: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1830: ctxt->sax->error(ctxt->userData,
1831: "xmlParseName: out of memory\n");
1832: return(NULL);
1833: }
1834: }
1835: COPY_BUF(l,buffer,len,c);
1.160 daniel 1836: NEXTL(l);
1837: c = CUR_CHAR(l);
1838: }
1.222 veillard 1839: buffer[len] = 0;
1840: return(buffer);
1.91 daniel 1841: }
1842: }
1.168 daniel 1843: if (len == 0)
1844: return(NULL);
1.91 daniel 1845: return(xmlStrndup(buf, len));
1.1 veillard 1846: }
1847:
1.50 daniel 1848: /**
1849: * xmlParseEntityValue:
1850: * @ctxt: an XML parser context
1.78 daniel 1851: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1852: *
1.229 veillard 1853: * parse a value for ENTITY declarations
1.24 daniel 1854: *
1855: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1856: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1857: *
1.78 daniel 1858: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1859: */
1860:
1.123 daniel 1861: xmlChar *
1862: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 1863: xmlChar *buf = NULL;
1864: int len = 0;
1.140 daniel 1865: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 1866: int c, l;
1.135 daniel 1867: xmlChar stop;
1.123 daniel 1868: xmlChar *ret = NULL;
1.176 daniel 1869: const xmlChar *cur = NULL;
1.98 daniel 1870: xmlParserInputPtr input;
1.24 daniel 1871:
1.152 daniel 1872: if (RAW == '"') stop = '"';
1873: else if (RAW == '\'') stop = '\'';
1.135 daniel 1874: else {
1875: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1878: ctxt->wellFormed = 0;
1.180 daniel 1879: ctxt->disableSAX = 1;
1.135 daniel 1880: return(NULL);
1881: }
1882: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1883: if (buf == NULL) {
1.241 veillard 1884: xmlGenericError(xmlGenericErrorContext,
1885: "malloc of %d byte failed\n", size);
1.135 daniel 1886: return(NULL);
1887: }
1.94 daniel 1888:
1.135 daniel 1889: /*
1890: * The content of the entity definition is copied in a buffer.
1891: */
1.94 daniel 1892:
1.135 daniel 1893: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1894: input = ctxt->input;
1895: GROW;
1896: NEXT;
1.152 daniel 1897: c = CUR_CHAR(l);
1.135 daniel 1898: /*
1899: * NOTE: 4.4.5 Included in Literal
1900: * When a parameter entity reference appears in a literal entity
1901: * value, ... a single or double quote character in the replacement
1902: * text is always treated as a normal data character and will not
1903: * terminate the literal.
1904: * In practice it means we stop the loop only when back at parsing
1905: * the initial entity and the quote is found
1906: */
1.222 veillard 1907: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1908: (ctxt->input != input))) {
1.152 daniel 1909: if (len + 5 >= size) {
1.135 daniel 1910: size *= 2;
1.204 veillard 1911: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 1912: if (buf == NULL) {
1.241 veillard 1913: xmlGenericError(xmlGenericErrorContext,
1914: "realloc of %d byte failed\n", size);
1.135 daniel 1915: return(NULL);
1.94 daniel 1916: }
1.79 daniel 1917: }
1.152 daniel 1918: COPY_BUF(l,buf,len,c);
1919: NEXTL(l);
1.98 daniel 1920: /*
1.135 daniel 1921: * Pop-up of finished entities.
1.98 daniel 1922: */
1.222 veillard 1923: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1.135 daniel 1924: xmlPopInput(ctxt);
1.152 daniel 1925:
1.221 veillard 1926: GROW;
1.152 daniel 1927: c = CUR_CHAR(l);
1.135 daniel 1928: if (c == 0) {
1.94 daniel 1929: GROW;
1.152 daniel 1930: c = CUR_CHAR(l);
1.79 daniel 1931: }
1.135 daniel 1932: }
1933: buf[len] = 0;
1934:
1935: /*
1.176 daniel 1936: * Raise problem w.r.t. '&' and '%' being used in non-entities
1937: * reference constructs. Note Charref will be handled in
1938: * xmlStringDecodeEntities()
1939: */
1940: cur = buf;
1.223 veillard 1941: while (*cur != 0) { /* non input consuming */
1.176 daniel 1942: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1943: xmlChar *name;
1944: xmlChar tmp = *cur;
1945:
1946: cur++;
1947: name = xmlParseStringName(ctxt, &cur);
1948: if ((name == NULL) || (*cur != ';')) {
1.230 veillard 1949: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1.176 daniel 1950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1951: ctxt->sax->error(ctxt->userData,
1952: "EntityValue: '%c' forbidden except for entities references\n",
1953: tmp);
1954: ctxt->wellFormed = 0;
1.180 daniel 1955: ctxt->disableSAX = 1;
1.176 daniel 1956: }
1957: if ((ctxt->inSubset == 1) && (tmp == '%')) {
1.230 veillard 1958: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1.176 daniel 1959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1960: ctxt->sax->error(ctxt->userData,
1961: "EntityValue: PEReferences forbidden in internal subset\n",
1962: tmp);
1963: ctxt->wellFormed = 0;
1.180 daniel 1964: ctxt->disableSAX = 1;
1.176 daniel 1965: }
1966: if (name != NULL)
1967: xmlFree(name);
1968: }
1969: cur++;
1970: }
1971:
1972: /*
1.135 daniel 1973: * Then PEReference entities are substituted.
1974: */
1975: if (c != stop) {
1976: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 1977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 1978: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1979: ctxt->wellFormed = 0;
1.180 daniel 1980: ctxt->disableSAX = 1;
1.170 daniel 1981: xmlFree(buf);
1.135 daniel 1982: } else {
1983: NEXT;
1984: /*
1985: * NOTE: 4.4.7 Bypassed
1986: * When a general entity reference appears in the EntityValue in
1987: * an entity declaration, it is bypassed and left as is.
1.176 daniel 1988: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 1989: */
1990: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1991: 0, 0, 0);
1992: if (orig != NULL)
1993: *orig = buf;
1994: else
1995: xmlFree(buf);
1.24 daniel 1996: }
1997:
1998: return(ret);
1999: }
2000:
1.50 daniel 2001: /**
2002: * xmlParseAttValue:
2003: * @ctxt: an XML parser context
2004: *
2005: * parse a value for an attribute
1.78 daniel 2006: * Note: the parser won't do substitution of entities here, this
1.113 daniel 2007: * will be handled later in xmlStringGetNodeList
1.29 daniel 2008: *
2009: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2010: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2011: *
1.129 daniel 2012: * 3.3.3 Attribute-Value Normalization:
2013: * Before the value of an attribute is passed to the application or
2014: * checked for validity, the XML processor must normalize it as follows:
2015: * - a character reference is processed by appending the referenced
2016: * character to the attribute value
2017: * - an entity reference is processed by recursively processing the
2018: * replacement text of the entity
2019: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2020: * appending #x20 to the normalized value, except that only a single
2021: * #x20 is appended for a "#xD#xA" sequence that is part of an external
2022: * parsed entity or the literal entity value of an internal parsed entity
2023: * - other characters are processed by appending them to the normalized value
1.130 daniel 2024: * If the declared value is not CDATA, then the XML processor must further
2025: * process the normalized attribute value by discarding any leading and
2026: * trailing space (#x20) characters, and by replacing sequences of space
2027: * (#x20) characters by a single space (#x20) character.
2028: * All attributes for which no declaration has been read should be treated
2029: * by a non-validating parser as if declared CDATA.
1.129 daniel 2030: *
2031: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 2032: */
2033:
1.123 daniel 2034: xmlChar *
1.55 daniel 2035: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 2036: xmlChar limit = 0;
1.198 daniel 2037: xmlChar *buf = NULL;
2038: int len = 0;
2039: int buf_size = 0;
2040: int c, l;
1.129 daniel 2041: xmlChar *current = NULL;
2042: xmlEntityPtr ent;
2043:
1.29 daniel 2044:
1.91 daniel 2045: SHRINK;
1.151 daniel 2046: if (NXT(0) == '"') {
1.96 daniel 2047: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 2048: limit = '"';
1.40 daniel 2049: NEXT;
1.151 daniel 2050: } else if (NXT(0) == '\'') {
1.129 daniel 2051: limit = '\'';
1.96 daniel 2052: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2053: NEXT;
1.29 daniel 2054: } else {
1.123 daniel 2055: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2057: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2058: ctxt->wellFormed = 0;
1.180 daniel 2059: ctxt->disableSAX = 1;
1.129 daniel 2060: return(NULL);
1.29 daniel 2061: }
2062:
1.129 daniel 2063: /*
2064: * allocate a translation buffer.
2065: */
1.198 daniel 2066: buf_size = XML_PARSER_BUFFER_SIZE;
2067: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2068: if (buf == NULL) {
1.129 daniel 2069: perror("xmlParseAttValue: malloc failed");
2070: return(NULL);
2071: }
2072:
2073: /*
2074: * Ok loop until we reach one of the ending char or a size limit.
2075: */
1.198 daniel 2076: c = CUR_CHAR(l);
1.223 veillard 2077: while (((NXT(0) != limit) && /* checked */
2078: (c != '<')) || (ctxt->token != 0)) {
1.198 daniel 2079: if (c == 0) break;
1.205 veillard 2080: if (ctxt->token == '&') {
1.229 veillard 2081: /*
2082: * The reparsing will be done in xmlStringGetNodeList()
2083: * called by the attribute() function in SAX.c
2084: */
1.205 veillard 2085: static xmlChar buffer[6] = "&";
2086:
2087: if (len > buf_size - 10) {
2088: growBuffer(buf);
2089: }
2090: current = &buffer[0];
1.223 veillard 2091: while (*current != 0) { /* non input consuming */
1.205 veillard 2092: buf[len++] = *current++;
2093: }
2094: ctxt->token = 0;
1.249 ! veillard 2095: } else if (c == '&') {
! 2096: if (NXT(1) == '#') {
! 2097: int val = xmlParseCharRef(ctxt);
! 2098: if (val == '&') {
! 2099: /*
! 2100: * The reparsing will be done in xmlStringGetNodeList()
! 2101: * called by the attribute() function in SAX.c
! 2102: */
! 2103: static xmlChar buffer[6] = "&";
1.229 veillard 2104:
1.249 ! veillard 2105: if (len > buf_size - 10) {
! 2106: growBuffer(buf);
! 2107: }
! 2108: current = &buffer[0];
! 2109: while (*current != 0) { /* non input consuming */
! 2110: buf[len++] = *current++;
! 2111: }
! 2112: } else {
! 2113: len += xmlCopyChar(0, &buf[len], val);
1.229 veillard 2114: }
2115: } else {
1.249 ! veillard 2116: ent = xmlParseEntityRef(ctxt);
! 2117: if ((ent != NULL) &&
! 2118: (ctxt->replaceEntities != 0)) {
! 2119: xmlChar *rep;
1.185 daniel 2120:
1.249 ! veillard 2121: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
! 2122: rep = xmlStringDecodeEntities(ctxt, ent->content,
! 2123: XML_SUBSTITUTE_REF, 0, 0, 0);
! 2124: if (rep != NULL) {
! 2125: current = rep;
! 2126: while (*current != 0) { /* non input consuming */
! 2127: buf[len++] = *current++;
! 2128: if (len > buf_size - 10) {
! 2129: growBuffer(buf);
! 2130: }
1.186 daniel 2131: }
1.249 ! veillard 2132: xmlFree(rep);
1.185 daniel 2133: }
1.249 ! veillard 2134: } else {
! 2135: if (ent->content != NULL)
! 2136: buf[len++] = ent->content[0];
1.129 daniel 2137: }
1.249 ! veillard 2138: } else if (ent != NULL) {
! 2139: int i = xmlStrlen(ent->name);
! 2140: const xmlChar *cur = ent->name;
1.129 daniel 2141:
1.249 ! veillard 2142: /*
! 2143: * This may look absurd but is needed to detect
! 2144: * entities problems
! 2145: */
! 2146: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
! 2147: (ent->content != NULL)) {
! 2148: xmlChar *rep;
! 2149: rep = xmlStringDecodeEntities(ctxt, ent->content,
! 2150: XML_SUBSTITUTE_REF, 0, 0, 0);
! 2151: if (rep != NULL)
! 2152: xmlFree(rep);
! 2153: }
1.186 daniel 2154:
1.249 ! veillard 2155: /*
! 2156: * Just output the reference
! 2157: */
! 2158: buf[len++] = '&';
! 2159: if (len > buf_size - i - 10) {
! 2160: growBuffer(buf);
! 2161: }
! 2162: for (;i > 0;i--)
! 2163: buf[len++] = *cur++;
! 2164: buf[len++] = ';';
1.129 daniel 2165: }
2166: }
2167: } else {
1.198 daniel 2168: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2169: COPY_BUF(l,buf,len,0x20);
2170: if (len > buf_size - 10) {
2171: growBuffer(buf);
1.129 daniel 2172: }
2173: } else {
1.198 daniel 2174: COPY_BUF(l,buf,len,c);
2175: if (len > buf_size - 10) {
2176: growBuffer(buf);
1.129 daniel 2177: }
2178: }
1.198 daniel 2179: NEXTL(l);
1.129 daniel 2180: }
1.198 daniel 2181: GROW;
2182: c = CUR_CHAR(l);
1.129 daniel 2183: }
1.198 daniel 2184: buf[len++] = 0;
1.152 daniel 2185: if (RAW == '<') {
1.230 veillard 2186: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.129 daniel 2187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2188: ctxt->sax->error(ctxt->userData,
2189: "Unescaped '<' not allowed in attributes values\n");
2190: ctxt->wellFormed = 0;
1.180 daniel 2191: ctxt->disableSAX = 1;
1.152 daniel 2192: } else if (RAW != limit) {
1.230 veillard 2193: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.129 daniel 2194: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2195: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2196: ctxt->wellFormed = 0;
1.180 daniel 2197: ctxt->disableSAX = 1;
1.129 daniel 2198: } else
2199: NEXT;
1.198 daniel 2200: return(buf);
1.29 daniel 2201: }
2202:
1.50 daniel 2203: /**
2204: * xmlParseSystemLiteral:
2205: * @ctxt: an XML parser context
2206: *
2207: * parse an XML Literal
1.21 daniel 2208: *
1.22 daniel 2209: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2210: *
2211: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2212: */
2213:
1.123 daniel 2214: xmlChar *
1.55 daniel 2215: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2216: xmlChar *buf = NULL;
2217: int len = 0;
1.140 daniel 2218: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2219: int cur, l;
1.135 daniel 2220: xmlChar stop;
1.168 daniel 2221: int state = ctxt->instate;
1.223 veillard 2222: int count = 0;
1.21 daniel 2223:
1.91 daniel 2224: SHRINK;
1.152 daniel 2225: if (RAW == '"') {
1.40 daniel 2226: NEXT;
1.135 daniel 2227: stop = '"';
1.152 daniel 2228: } else if (RAW == '\'') {
1.40 daniel 2229: NEXT;
1.135 daniel 2230: stop = '\'';
1.21 daniel 2231: } else {
1.230 veillard 2232: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2233: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2234: ctxt->sax->error(ctxt->userData,
2235: "SystemLiteral \" or ' expected\n");
1.59 daniel 2236: ctxt->wellFormed = 0;
1.180 daniel 2237: ctxt->disableSAX = 1;
1.135 daniel 2238: return(NULL);
1.21 daniel 2239: }
2240:
1.135 daniel 2241: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2242: if (buf == NULL) {
1.241 veillard 2243: xmlGenericError(xmlGenericErrorContext,
2244: "malloc of %d byte failed\n", size);
1.135 daniel 2245: return(NULL);
2246: }
1.168 daniel 2247: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 2248: cur = CUR_CHAR(l);
1.223 veillard 2249: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
1.152 daniel 2250: if (len + 5 >= size) {
1.135 daniel 2251: size *= 2;
1.204 veillard 2252: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2253: if (buf == NULL) {
1.241 veillard 2254: xmlGenericError(xmlGenericErrorContext,
2255: "realloc of %d byte failed\n", size);
1.204 veillard 2256: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2257: return(NULL);
2258: }
2259: }
1.223 veillard 2260: count++;
2261: if (count > 50) {
2262: GROW;
2263: count = 0;
2264: }
1.152 daniel 2265: COPY_BUF(l,buf,len,cur);
2266: NEXTL(l);
2267: cur = CUR_CHAR(l);
1.135 daniel 2268: if (cur == 0) {
2269: GROW;
2270: SHRINK;
1.152 daniel 2271: cur = CUR_CHAR(l);
1.135 daniel 2272: }
2273: }
2274: buf[len] = 0;
1.204 veillard 2275: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2276: if (!IS_CHAR(cur)) {
1.230 veillard 2277: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2278: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2279: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2280: ctxt->wellFormed = 0;
1.180 daniel 2281: ctxt->disableSAX = 1;
1.135 daniel 2282: } else {
2283: NEXT;
2284: }
2285: return(buf);
1.21 daniel 2286: }
2287:
1.50 daniel 2288: /**
2289: * xmlParsePubidLiteral:
2290: * @ctxt: an XML parser context
1.21 daniel 2291: *
1.50 daniel 2292: * parse an XML public literal
1.68 daniel 2293: *
2294: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2295: *
2296: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2297: */
2298:
1.123 daniel 2299: xmlChar *
1.55 daniel 2300: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2301: xmlChar *buf = NULL;
2302: int len = 0;
1.140 daniel 2303: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 2304: xmlChar cur;
2305: xmlChar stop;
1.223 veillard 2306: int count = 0;
1.125 daniel 2307:
1.91 daniel 2308: SHRINK;
1.152 daniel 2309: if (RAW == '"') {
1.40 daniel 2310: NEXT;
1.135 daniel 2311: stop = '"';
1.152 daniel 2312: } else if (RAW == '\'') {
1.40 daniel 2313: NEXT;
1.135 daniel 2314: stop = '\'';
1.21 daniel 2315: } else {
1.230 veillard 2316: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2318: ctxt->sax->error(ctxt->userData,
2319: "SystemLiteral \" or ' expected\n");
1.59 daniel 2320: ctxt->wellFormed = 0;
1.180 daniel 2321: ctxt->disableSAX = 1;
1.135 daniel 2322: return(NULL);
2323: }
2324: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2325: if (buf == NULL) {
1.241 veillard 2326: xmlGenericError(xmlGenericErrorContext,
2327: "malloc of %d byte failed\n", size);
1.135 daniel 2328: return(NULL);
2329: }
2330: cur = CUR;
1.223 veillard 2331: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
1.135 daniel 2332: if (len + 1 >= size) {
2333: size *= 2;
1.204 veillard 2334: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2335: if (buf == NULL) {
1.241 veillard 2336: xmlGenericError(xmlGenericErrorContext,
2337: "realloc of %d byte failed\n", size);
1.135 daniel 2338: return(NULL);
2339: }
2340: }
2341: buf[len++] = cur;
1.223 veillard 2342: count++;
2343: if (count > 50) {
2344: GROW;
2345: count = 0;
2346: }
1.135 daniel 2347: NEXT;
2348: cur = CUR;
2349: if (cur == 0) {
2350: GROW;
2351: SHRINK;
2352: cur = CUR;
2353: }
2354: }
2355: buf[len] = 0;
2356: if (cur != stop) {
1.230 veillard 2357: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2359: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2360: ctxt->wellFormed = 0;
1.180 daniel 2361: ctxt->disableSAX = 1;
1.135 daniel 2362: } else {
2363: NEXT;
1.21 daniel 2364: }
1.135 daniel 2365: return(buf);
1.21 daniel 2366: }
2367:
1.50 daniel 2368: /**
2369: * xmlParseCharData:
2370: * @ctxt: an XML parser context
2371: * @cdata: int indicating whether we are within a CDATA section
2372: *
2373: * parse a CharData section.
2374: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2375: *
1.151 daniel 2376: * The right angle bracket (>) may be represented using the string ">",
2377: * and must, for compatibility, be escaped using ">" or a character
2378: * reference when it appears in the string "]]>" in content, when that
2379: * string is not marking the end of a CDATA section.
2380: *
1.27 daniel 2381: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2382: */
2383:
1.55 daniel 2384: void
2385: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 2386: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 2387: int nbchar = 0;
1.152 daniel 2388: int cur, l;
1.223 veillard 2389: int count = 0;
1.27 daniel 2390:
1.91 daniel 2391: SHRINK;
1.223 veillard 2392: GROW;
1.152 daniel 2393: cur = CUR_CHAR(l);
1.223 veillard 2394: while (((cur != '<') || (ctxt->token == '<')) && /* checked */
1.190 daniel 2395: ((cur != '&') || (ctxt->token == '&')) &&
1.229 veillard 2396: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
1.97 daniel 2397: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2398: (NXT(2) == '>')) {
2399: if (cdata) break;
2400: else {
1.230 veillard 2401: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 2403: ctxt->sax->error(ctxt->userData,
1.59 daniel 2404: "Sequence ']]>' not allowed in content\n");
1.151 daniel 2405: /* Should this be relaxed ??? I see a "must here */
2406: ctxt->wellFormed = 0;
1.180 daniel 2407: ctxt->disableSAX = 1;
1.59 daniel 2408: }
2409: }
1.152 daniel 2410: COPY_BUF(l,buf,nbchar,cur);
2411: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 2412: /*
2413: * Ok the segment is to be consumed as chars.
2414: */
1.171 daniel 2415: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2416: if (areBlanks(ctxt, buf, nbchar)) {
2417: if (ctxt->sax->ignorableWhitespace != NULL)
2418: ctxt->sax->ignorableWhitespace(ctxt->userData,
2419: buf, nbchar);
2420: } else {
2421: if (ctxt->sax->characters != NULL)
2422: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2423: }
2424: }
2425: nbchar = 0;
2426: }
1.223 veillard 2427: count++;
2428: if (count > 50) {
2429: GROW;
2430: count = 0;
2431: }
1.152 daniel 2432: NEXTL(l);
2433: cur = CUR_CHAR(l);
1.27 daniel 2434: }
1.91 daniel 2435: if (nbchar != 0) {
2436: /*
2437: * Ok the segment is to be consumed as chars.
2438: */
1.171 daniel 2439: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2440: if (areBlanks(ctxt, buf, nbchar)) {
2441: if (ctxt->sax->ignorableWhitespace != NULL)
2442: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2443: } else {
2444: if (ctxt->sax->characters != NULL)
2445: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2446: }
2447: }
1.45 daniel 2448: }
1.27 daniel 2449: }
2450:
1.50 daniel 2451: /**
2452: * xmlParseExternalID:
2453: * @ctxt: an XML parser context
1.123 daniel 2454: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2455: * @strict: indicate whether we should restrict parsing to only
2456: * production [75], see NOTE below
1.50 daniel 2457: *
1.67 daniel 2458: * Parse an External ID or a Public ID
2459: *
2460: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2461: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2462: *
2463: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2464: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2465: *
2466: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2467: *
1.68 daniel 2468: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2469: * case publicID receives PubidLiteral, is strict is off
2470: * it is possible to return NULL and have publicID set.
1.22 daniel 2471: */
2472:
1.123 daniel 2473: xmlChar *
2474: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2475: xmlChar *URI = NULL;
1.22 daniel 2476:
1.91 daniel 2477: SHRINK;
1.152 daniel 2478: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 2479: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2480: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2481: SKIP(6);
1.59 daniel 2482: if (!IS_BLANK(CUR)) {
1.230 veillard 2483: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2485: ctxt->sax->error(ctxt->userData,
1.59 daniel 2486: "Space required after 'SYSTEM'\n");
2487: ctxt->wellFormed = 0;
1.180 daniel 2488: ctxt->disableSAX = 1;
1.59 daniel 2489: }
1.42 daniel 2490: SKIP_BLANKS;
1.39 daniel 2491: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2492: if (URI == NULL) {
1.230 veillard 2493: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2494: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2495: ctxt->sax->error(ctxt->userData,
1.39 daniel 2496: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2497: ctxt->wellFormed = 0;
1.180 daniel 2498: ctxt->disableSAX = 1;
1.59 daniel 2499: }
1.152 daniel 2500: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 2501: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2502: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2503: SKIP(6);
1.59 daniel 2504: if (!IS_BLANK(CUR)) {
1.230 veillard 2505: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2506: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2507: ctxt->sax->error(ctxt->userData,
1.59 daniel 2508: "Space required after 'PUBLIC'\n");
2509: ctxt->wellFormed = 0;
1.180 daniel 2510: ctxt->disableSAX = 1;
1.59 daniel 2511: }
1.42 daniel 2512: SKIP_BLANKS;
1.39 daniel 2513: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2514: if (*publicID == NULL) {
1.230 veillard 2515: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.55 daniel 2516: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2517: ctxt->sax->error(ctxt->userData,
1.39 daniel 2518: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2519: ctxt->wellFormed = 0;
1.180 daniel 2520: ctxt->disableSAX = 1;
1.59 daniel 2521: }
1.67 daniel 2522: if (strict) {
2523: /*
2524: * We don't handle [83] so "S SystemLiteral" is required.
2525: */
2526: if (!IS_BLANK(CUR)) {
1.230 veillard 2527: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2528: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2529: ctxt->sax->error(ctxt->userData,
1.67 daniel 2530: "Space required after the Public Identifier\n");
2531: ctxt->wellFormed = 0;
1.180 daniel 2532: ctxt->disableSAX = 1;
1.67 daniel 2533: }
2534: } else {
2535: /*
2536: * We handle [83] so we return immediately, if
2537: * "S SystemLiteral" is not detected. From a purely parsing
2538: * point of view that's a nice mess.
2539: */
1.135 daniel 2540: const xmlChar *ptr;
2541: GROW;
2542:
2543: ptr = CUR_PTR;
1.67 daniel 2544: if (!IS_BLANK(*ptr)) return(NULL);
2545:
1.223 veillard 2546: while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
1.173 daniel 2547: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 2548: }
1.42 daniel 2549: SKIP_BLANKS;
1.39 daniel 2550: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2551: if (URI == NULL) {
1.230 veillard 2552: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2554: ctxt->sax->error(ctxt->userData,
1.39 daniel 2555: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2556: ctxt->wellFormed = 0;
1.180 daniel 2557: ctxt->disableSAX = 1;
1.59 daniel 2558: }
1.22 daniel 2559: }
1.39 daniel 2560: return(URI);
1.22 daniel 2561: }
2562:
1.50 daniel 2563: /**
2564: * xmlParseComment:
1.69 daniel 2565: * @ctxt: an XML parser context
1.50 daniel 2566: *
1.3 veillard 2567: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2568: * The spec says that "For compatibility, the string "--" (double-hyphen)
2569: * must not occur within comments. "
1.22 daniel 2570: *
2571: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2572: */
1.72 daniel 2573: void
1.114 daniel 2574: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 2575: xmlChar *buf = NULL;
1.195 daniel 2576: int len;
1.140 daniel 2577: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2578: int q, ql;
2579: int r, rl;
2580: int cur, l;
1.140 daniel 2581: xmlParserInputState state;
1.187 daniel 2582: xmlParserInputPtr input = ctxt->input;
1.223 veillard 2583: int count = 0;
1.3 veillard 2584:
2585: /*
1.22 daniel 2586: * Check that there is a comment right here.
1.3 veillard 2587: */
1.152 daniel 2588: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 2589: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2590:
1.140 daniel 2591: state = ctxt->instate;
1.97 daniel 2592: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2593: SHRINK;
1.40 daniel 2594: SKIP(4);
1.135 daniel 2595: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2596: if (buf == NULL) {
1.241 veillard 2597: xmlGenericError(xmlGenericErrorContext,
2598: "malloc of %d byte failed\n", size);
1.140 daniel 2599: ctxt->instate = state;
1.135 daniel 2600: return;
2601: }
1.152 daniel 2602: q = CUR_CHAR(ql);
2603: NEXTL(ql);
2604: r = CUR_CHAR(rl);
2605: NEXTL(rl);
2606: cur = CUR_CHAR(l);
1.195 daniel 2607: len = 0;
1.223 veillard 2608: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2609: ((cur != '>') ||
2610: (r != '-') || (q != '-'))) {
1.195 daniel 2611: if ((r == '-') && (q == '-') && (len > 1)) {
1.230 veillard 2612: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.55 daniel 2613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2614: ctxt->sax->error(ctxt->userData,
1.38 daniel 2615: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2616: ctxt->wellFormed = 0;
1.180 daniel 2617: ctxt->disableSAX = 1;
1.59 daniel 2618: }
1.152 daniel 2619: if (len + 5 >= size) {
1.135 daniel 2620: size *= 2;
1.204 veillard 2621: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2622: if (buf == NULL) {
1.241 veillard 2623: xmlGenericError(xmlGenericErrorContext,
2624: "realloc of %d byte failed\n", size);
1.140 daniel 2625: ctxt->instate = state;
1.135 daniel 2626: return;
2627: }
2628: }
1.152 daniel 2629: COPY_BUF(ql,buf,len,q);
1.135 daniel 2630: q = r;
1.152 daniel 2631: ql = rl;
1.135 daniel 2632: r = cur;
1.152 daniel 2633: rl = l;
1.223 veillard 2634:
2635: count++;
2636: if (count > 50) {
2637: GROW;
2638: count = 0;
2639: }
1.152 daniel 2640: NEXTL(l);
2641: cur = CUR_CHAR(l);
1.135 daniel 2642: if (cur == 0) {
2643: SHRINK;
2644: GROW;
1.152 daniel 2645: cur = CUR_CHAR(l);
1.135 daniel 2646: }
1.3 veillard 2647: }
1.135 daniel 2648: buf[len] = 0;
2649: if (!IS_CHAR(cur)) {
1.230 veillard 2650: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.55 daniel 2651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2652: ctxt->sax->error(ctxt->userData,
1.135 daniel 2653: "Comment not terminated \n<!--%.50s\n", buf);
1.59 daniel 2654: ctxt->wellFormed = 0;
1.180 daniel 2655: ctxt->disableSAX = 1;
1.178 daniel 2656: xmlFree(buf);
1.3 veillard 2657: } else {
1.187 daniel 2658: if (input != ctxt->input) {
1.230 veillard 2659: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2661: ctxt->sax->error(ctxt->userData,
2662: "Comment doesn't start and stop in the same entity\n");
2663: ctxt->wellFormed = 0;
2664: ctxt->disableSAX = 1;
2665: }
1.40 daniel 2666: NEXT;
1.171 daniel 2667: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2668: (!ctxt->disableSAX))
1.135 daniel 2669: ctxt->sax->comment(ctxt->userData, buf);
2670: xmlFree(buf);
1.3 veillard 2671: }
1.140 daniel 2672: ctxt->instate = state;
1.3 veillard 2673: }
2674:
1.50 daniel 2675: /**
2676: * xmlParsePITarget:
2677: * @ctxt: an XML parser context
2678: *
2679: * parse the name of a PI
1.22 daniel 2680: *
2681: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2682: *
2683: * Returns the PITarget name or NULL
1.22 daniel 2684: */
2685:
1.123 daniel 2686: xmlChar *
1.55 daniel 2687: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2688: xmlChar *name;
1.22 daniel 2689:
2690: name = xmlParseName(ctxt);
1.139 daniel 2691: if ((name != NULL) &&
1.22 daniel 2692: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2693: ((name[1] == 'm') || (name[1] == 'M')) &&
2694: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 2695: int i;
1.177 daniel 2696: if ((name[0] == 'x') && (name[1] == 'm') &&
2697: (name[2] == 'l') && (name[3] == 0)) {
1.230 veillard 2698: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2700: ctxt->sax->error(ctxt->userData,
2701: "XML declaration allowed only at the start of the document\n");
2702: ctxt->wellFormed = 0;
1.180 daniel 2703: ctxt->disableSAX = 1;
1.151 daniel 2704: return(name);
2705: } else if (name[3] == 0) {
1.230 veillard 2706: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2708: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2709: ctxt->wellFormed = 0;
1.180 daniel 2710: ctxt->disableSAX = 1;
1.151 daniel 2711: return(name);
2712: }
1.139 daniel 2713: for (i = 0;;i++) {
2714: if (xmlW3CPIs[i] == NULL) break;
1.236 veillard 2715: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
1.139 daniel 2716: return(name);
2717: }
2718: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
1.230 veillard 2719: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.139 daniel 2720: ctxt->sax->warning(ctxt->userData,
1.122 daniel 2721: "xmlParsePItarget: invalid name prefix 'xml'\n");
2722: }
1.22 daniel 2723: }
2724: return(name);
2725: }
2726:
1.50 daniel 2727: /**
2728: * xmlParsePI:
2729: * @ctxt: an XML parser context
2730: *
2731: * parse an XML Processing Instruction.
1.22 daniel 2732: *
2733: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2734: *
1.69 daniel 2735: * The processing is transfered to SAX once parsed.
1.3 veillard 2736: */
2737:
1.55 daniel 2738: void
2739: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 2740: xmlChar *buf = NULL;
2741: int len = 0;
1.140 daniel 2742: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2743: int cur, l;
1.123 daniel 2744: xmlChar *target;
1.140 daniel 2745: xmlParserInputState state;
1.223 veillard 2746: int count = 0;
1.22 daniel 2747:
1.152 daniel 2748: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 2749: xmlParserInputPtr input = ctxt->input;
1.140 daniel 2750: state = ctxt->instate;
2751: ctxt->instate = XML_PARSER_PI;
1.3 veillard 2752: /*
2753: * this is a Processing Instruction.
2754: */
1.40 daniel 2755: SKIP(2);
1.91 daniel 2756: SHRINK;
1.3 veillard 2757:
2758: /*
1.22 daniel 2759: * Parse the target name and check for special support like
2760: * namespace.
1.3 veillard 2761: */
1.22 daniel 2762: target = xmlParsePITarget(ctxt);
2763: if (target != NULL) {
1.156 daniel 2764: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 2765: if (input != ctxt->input) {
1.230 veillard 2766: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2767: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2768: ctxt->sax->error(ctxt->userData,
2769: "PI declaration doesn't start and stop in the same entity\n");
2770: ctxt->wellFormed = 0;
2771: ctxt->disableSAX = 1;
2772: }
1.156 daniel 2773: SKIP(2);
2774:
2775: /*
2776: * SAX: PI detected.
2777: */
1.171 daniel 2778: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 2779: (ctxt->sax->processingInstruction != NULL))
2780: ctxt->sax->processingInstruction(ctxt->userData,
2781: target, NULL);
2782: ctxt->instate = state;
1.170 daniel 2783: xmlFree(target);
1.156 daniel 2784: return;
2785: }
1.135 daniel 2786: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2787: if (buf == NULL) {
1.241 veillard 2788: xmlGenericError(xmlGenericErrorContext,
2789: "malloc of %d byte failed\n", size);
1.140 daniel 2790: ctxt->instate = state;
1.135 daniel 2791: return;
2792: }
2793: cur = CUR;
2794: if (!IS_BLANK(cur)) {
1.230 veillard 2795: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2797: ctxt->sax->error(ctxt->userData,
2798: "xmlParsePI: PI %s space expected\n", target);
2799: ctxt->wellFormed = 0;
1.180 daniel 2800: ctxt->disableSAX = 1;
1.114 daniel 2801: }
2802: SKIP_BLANKS;
1.152 daniel 2803: cur = CUR_CHAR(l);
1.223 veillard 2804: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2805: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 2806: if (len + 5 >= size) {
1.135 daniel 2807: size *= 2;
1.204 veillard 2808: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2809: if (buf == NULL) {
1.241 veillard 2810: xmlGenericError(xmlGenericErrorContext,
2811: "realloc of %d byte failed\n", size);
1.140 daniel 2812: ctxt->instate = state;
1.135 daniel 2813: return;
2814: }
1.223 veillard 2815: }
2816: count++;
2817: if (count > 50) {
2818: GROW;
2819: count = 0;
1.135 daniel 2820: }
1.152 daniel 2821: COPY_BUF(l,buf,len,cur);
2822: NEXTL(l);
2823: cur = CUR_CHAR(l);
1.135 daniel 2824: if (cur == 0) {
2825: SHRINK;
2826: GROW;
1.152 daniel 2827: cur = CUR_CHAR(l);
1.135 daniel 2828: }
2829: }
2830: buf[len] = 0;
1.152 daniel 2831: if (cur != '?') {
1.230 veillard 2832: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2833: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2834: ctxt->sax->error(ctxt->userData,
1.72 daniel 2835: "xmlParsePI: PI %s never end ...\n", target);
2836: ctxt->wellFormed = 0;
1.180 daniel 2837: ctxt->disableSAX = 1;
1.22 daniel 2838: } else {
1.187 daniel 2839: if (input != ctxt->input) {
1.230 veillard 2840: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2841: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2842: ctxt->sax->error(ctxt->userData,
2843: "PI declaration doesn't start and stop in the same entity\n");
2844: ctxt->wellFormed = 0;
2845: ctxt->disableSAX = 1;
2846: }
1.72 daniel 2847: SKIP(2);
1.44 daniel 2848:
1.72 daniel 2849: /*
2850: * SAX: PI detected.
2851: */
1.171 daniel 2852: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 2853: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2854: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 2855: target, buf);
1.22 daniel 2856: }
1.135 daniel 2857: xmlFree(buf);
1.119 daniel 2858: xmlFree(target);
1.3 veillard 2859: } else {
1.230 veillard 2860: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.55 daniel 2861: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2862: ctxt->sax->error(ctxt->userData,
2863: "xmlParsePI : no target name\n");
1.59 daniel 2864: ctxt->wellFormed = 0;
1.180 daniel 2865: ctxt->disableSAX = 1;
1.22 daniel 2866: }
1.140 daniel 2867: ctxt->instate = state;
1.22 daniel 2868: }
2869: }
2870:
1.50 daniel 2871: /**
2872: * xmlParseNotationDecl:
2873: * @ctxt: an XML parser context
2874: *
2875: * parse a notation declaration
1.22 daniel 2876: *
2877: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2878: *
2879: * Hence there is actually 3 choices:
2880: * 'PUBLIC' S PubidLiteral
2881: * 'PUBLIC' S PubidLiteral S SystemLiteral
2882: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2883: *
1.67 daniel 2884: * See the NOTE on xmlParseExternalID().
1.22 daniel 2885: */
2886:
1.55 daniel 2887: void
2888: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2889: xmlChar *name;
2890: xmlChar *Pubid;
2891: xmlChar *Systemid;
1.22 daniel 2892:
1.152 daniel 2893: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2894: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2895: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2896: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2897: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 2898: xmlParserInputPtr input = ctxt->input;
1.91 daniel 2899: SHRINK;
1.40 daniel 2900: SKIP(10);
1.67 daniel 2901: if (!IS_BLANK(CUR)) {
1.230 veillard 2902: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2904: ctxt->sax->error(ctxt->userData,
2905: "Space required after '<!NOTATION'\n");
1.67 daniel 2906: ctxt->wellFormed = 0;
1.180 daniel 2907: ctxt->disableSAX = 1;
1.67 daniel 2908: return;
2909: }
2910: SKIP_BLANKS;
1.22 daniel 2911:
2912: name = xmlParseName(ctxt);
2913: if (name == NULL) {
1.230 veillard 2914: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.55 daniel 2915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2916: ctxt->sax->error(ctxt->userData,
2917: "NOTATION: Name expected here\n");
1.67 daniel 2918: ctxt->wellFormed = 0;
1.180 daniel 2919: ctxt->disableSAX = 1;
1.67 daniel 2920: return;
2921: }
2922: if (!IS_BLANK(CUR)) {
1.230 veillard 2923: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2925: ctxt->sax->error(ctxt->userData,
1.67 daniel 2926: "Space required after the NOTATION name'\n");
1.59 daniel 2927: ctxt->wellFormed = 0;
1.180 daniel 2928: ctxt->disableSAX = 1;
1.22 daniel 2929: return;
2930: }
1.42 daniel 2931: SKIP_BLANKS;
1.67 daniel 2932:
1.22 daniel 2933: /*
1.67 daniel 2934: * Parse the IDs.
1.22 daniel 2935: */
1.160 daniel 2936: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 2937: SKIP_BLANKS;
2938:
1.152 daniel 2939: if (RAW == '>') {
1.187 daniel 2940: if (input != ctxt->input) {
1.230 veillard 2941: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2943: ctxt->sax->error(ctxt->userData,
2944: "Notation declaration doesn't start and stop in the same entity\n");
2945: ctxt->wellFormed = 0;
2946: ctxt->disableSAX = 1;
2947: }
1.40 daniel 2948: NEXT;
1.171 daniel 2949: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2950: (ctxt->sax->notationDecl != NULL))
1.74 daniel 2951: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2952: } else {
1.230 veillard 2953: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 2954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2955: ctxt->sax->error(ctxt->userData,
1.67 daniel 2956: "'>' required to close NOTATION declaration\n");
2957: ctxt->wellFormed = 0;
1.180 daniel 2958: ctxt->disableSAX = 1;
1.67 daniel 2959: }
1.119 daniel 2960: xmlFree(name);
2961: if (Systemid != NULL) xmlFree(Systemid);
2962: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 2963: }
2964: }
2965:
1.50 daniel 2966: /**
2967: * xmlParseEntityDecl:
2968: * @ctxt: an XML parser context
2969: *
2970: * parse <!ENTITY declarations
1.22 daniel 2971: *
2972: * [70] EntityDecl ::= GEDecl | PEDecl
2973: *
2974: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2975: *
2976: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2977: *
2978: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2979: *
2980: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2981: *
2982: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2983: *
2984: * [ VC: Notation Declared ]
1.116 daniel 2985: * The Name must match the declared name of a notation.
1.22 daniel 2986: */
2987:
1.55 daniel 2988: void
2989: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2990: xmlChar *name = NULL;
2991: xmlChar *value = NULL;
2992: xmlChar *URI = NULL, *literal = NULL;
2993: xmlChar *ndata = NULL;
1.39 daniel 2994: int isParameter = 0;
1.123 daniel 2995: xmlChar *orig = NULL;
1.22 daniel 2996:
1.94 daniel 2997: GROW;
1.152 daniel 2998: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2999: (NXT(2) == 'E') && (NXT(3) == 'N') &&
3000: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 3001: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 3002: xmlParserInputPtr input = ctxt->input;
1.96 daniel 3003: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 3004: SHRINK;
1.40 daniel 3005: SKIP(8);
1.59 daniel 3006: if (!IS_BLANK(CUR)) {
1.230 veillard 3007: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3008: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3009: ctxt->sax->error(ctxt->userData,
3010: "Space required after '<!ENTITY'\n");
1.59 daniel 3011: ctxt->wellFormed = 0;
1.180 daniel 3012: ctxt->disableSAX = 1;
1.59 daniel 3013: }
3014: SKIP_BLANKS;
1.40 daniel 3015:
1.152 daniel 3016: if (RAW == '%') {
1.40 daniel 3017: NEXT;
1.59 daniel 3018: if (!IS_BLANK(CUR)) {
1.230 veillard 3019: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3021: ctxt->sax->error(ctxt->userData,
3022: "Space required after '%'\n");
1.59 daniel 3023: ctxt->wellFormed = 0;
1.180 daniel 3024: ctxt->disableSAX = 1;
1.59 daniel 3025: }
1.42 daniel 3026: SKIP_BLANKS;
1.39 daniel 3027: isParameter = 1;
1.22 daniel 3028: }
3029:
3030: name = xmlParseName(ctxt);
1.24 daniel 3031: if (name == NULL) {
1.230 veillard 3032: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3034: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3035: ctxt->wellFormed = 0;
1.180 daniel 3036: ctxt->disableSAX = 1;
1.24 daniel 3037: return;
3038: }
1.59 daniel 3039: if (!IS_BLANK(CUR)) {
1.230 veillard 3040: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3042: ctxt->sax->error(ctxt->userData,
1.59 daniel 3043: "Space required after the entity name\n");
3044: ctxt->wellFormed = 0;
1.180 daniel 3045: ctxt->disableSAX = 1;
1.59 daniel 3046: }
1.42 daniel 3047: SKIP_BLANKS;
1.24 daniel 3048:
1.22 daniel 3049: /*
1.68 daniel 3050: * handle the various case of definitions...
1.22 daniel 3051: */
1.39 daniel 3052: if (isParameter) {
1.225 veillard 3053: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3054: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3055: if (value) {
1.171 daniel 3056: if ((ctxt->sax != NULL) &&
3057: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3058: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3059: XML_INTERNAL_PARAMETER_ENTITY,
3060: NULL, NULL, value);
3061: }
1.225 veillard 3062: } else {
1.67 daniel 3063: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3064: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3065: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067: ctxt->sax->error(ctxt->userData,
3068: "Entity value required\n");
3069: ctxt->wellFormed = 0;
1.180 daniel 3070: ctxt->disableSAX = 1;
1.169 daniel 3071: }
1.39 daniel 3072: if (URI) {
1.193 daniel 3073: xmlURIPtr uri;
3074:
3075: uri = xmlParseURI((const char *) URI);
3076: if (uri == NULL) {
1.230 veillard 3077: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3078: if ((ctxt->sax != NULL) &&
3079: (!ctxt->disableSAX) &&
3080: (ctxt->sax->error != NULL))
3081: ctxt->sax->error(ctxt->userData,
3082: "Invalid URI: %s\n", URI);
3083: ctxt->wellFormed = 0;
3084: } else {
3085: if (uri->fragment != NULL) {
1.230 veillard 3086: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3087: if ((ctxt->sax != NULL) &&
3088: (!ctxt->disableSAX) &&
3089: (ctxt->sax->error != NULL))
3090: ctxt->sax->error(ctxt->userData,
3091: "Fragment not allowed: %s\n", URI);
3092: ctxt->wellFormed = 0;
3093: } else {
3094: if ((ctxt->sax != NULL) &&
3095: (!ctxt->disableSAX) &&
3096: (ctxt->sax->entityDecl != NULL))
3097: ctxt->sax->entityDecl(ctxt->userData, name,
3098: XML_EXTERNAL_PARAMETER_ENTITY,
3099: literal, URI, NULL);
3100: }
3101: xmlFreeURI(uri);
3102: }
1.39 daniel 3103: }
1.24 daniel 3104: }
3105: } else {
1.152 daniel 3106: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3107: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 3108: if ((ctxt->sax != NULL) &&
3109: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3110: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3111: XML_INTERNAL_GENERAL_ENTITY,
3112: NULL, NULL, value);
3113: } else {
1.67 daniel 3114: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3115: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3116: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3117: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3118: ctxt->sax->error(ctxt->userData,
3119: "Entity value required\n");
3120: ctxt->wellFormed = 0;
1.180 daniel 3121: ctxt->disableSAX = 1;
1.169 daniel 3122: }
1.193 daniel 3123: if (URI) {
3124: xmlURIPtr uri;
3125:
3126: uri = xmlParseURI((const char *)URI);
3127: if (uri == NULL) {
1.230 veillard 3128: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3129: if ((ctxt->sax != NULL) &&
3130: (!ctxt->disableSAX) &&
3131: (ctxt->sax->error != NULL))
3132: ctxt->sax->error(ctxt->userData,
3133: "Invalid URI: %s\n", URI);
3134: ctxt->wellFormed = 0;
3135: } else {
3136: if (uri->fragment != NULL) {
1.230 veillard 3137: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3138: if ((ctxt->sax != NULL) &&
3139: (!ctxt->disableSAX) &&
3140: (ctxt->sax->error != NULL))
3141: ctxt->sax->error(ctxt->userData,
3142: "Fragment not allowed: %s\n", URI);
3143: ctxt->wellFormed = 0;
3144: }
3145: xmlFreeURI(uri);
3146: }
3147: }
1.152 daniel 3148: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.230 veillard 3149: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3150: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3151: ctxt->sax->error(ctxt->userData,
1.59 daniel 3152: "Space required before 'NDATA'\n");
3153: ctxt->wellFormed = 0;
1.180 daniel 3154: ctxt->disableSAX = 1;
1.59 daniel 3155: }
1.42 daniel 3156: SKIP_BLANKS;
1.152 daniel 3157: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 3158: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3159: (NXT(4) == 'A')) {
3160: SKIP(5);
1.59 daniel 3161: if (!IS_BLANK(CUR)) {
1.230 veillard 3162: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3164: ctxt->sax->error(ctxt->userData,
1.59 daniel 3165: "Space required after 'NDATA'\n");
3166: ctxt->wellFormed = 0;
1.180 daniel 3167: ctxt->disableSAX = 1;
1.59 daniel 3168: }
1.42 daniel 3169: SKIP_BLANKS;
1.24 daniel 3170: ndata = xmlParseName(ctxt);
1.171 daniel 3171: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 3172: (ctxt->sax->unparsedEntityDecl != NULL))
3173: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3174: literal, URI, ndata);
3175: } else {
1.171 daniel 3176: if ((ctxt->sax != NULL) &&
3177: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3178: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3179: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3180: literal, URI, NULL);
1.24 daniel 3181: }
3182: }
3183: }
1.42 daniel 3184: SKIP_BLANKS;
1.152 daniel 3185: if (RAW != '>') {
1.230 veillard 3186: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3188: ctxt->sax->error(ctxt->userData,
1.31 daniel 3189: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3190: ctxt->wellFormed = 0;
1.180 daniel 3191: ctxt->disableSAX = 1;
1.187 daniel 3192: } else {
3193: if (input != ctxt->input) {
1.230 veillard 3194: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3196: ctxt->sax->error(ctxt->userData,
3197: "Entity declaration doesn't start and stop in the same entity\n");
3198: ctxt->wellFormed = 0;
3199: ctxt->disableSAX = 1;
3200: }
1.40 daniel 3201: NEXT;
1.187 daniel 3202: }
1.78 daniel 3203: if (orig != NULL) {
3204: /*
1.98 daniel 3205: * Ugly mechanism to save the raw entity value.
1.78 daniel 3206: */
3207: xmlEntityPtr cur = NULL;
3208:
1.98 daniel 3209: if (isParameter) {
3210: if ((ctxt->sax != NULL) &&
3211: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3212: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3213: } else {
3214: if ((ctxt->sax != NULL) &&
3215: (ctxt->sax->getEntity != NULL))
1.120 daniel 3216: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3217: }
3218: if (cur != NULL) {
3219: if (cur->orig != NULL)
1.119 daniel 3220: xmlFree(orig);
1.98 daniel 3221: else
3222: cur->orig = orig;
3223: } else
1.119 daniel 3224: xmlFree(orig);
1.78 daniel 3225: }
1.119 daniel 3226: if (name != NULL) xmlFree(name);
3227: if (value != NULL) xmlFree(value);
3228: if (URI != NULL) xmlFree(URI);
3229: if (literal != NULL) xmlFree(literal);
3230: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3231: }
3232: }
3233:
1.50 daniel 3234: /**
1.59 daniel 3235: * xmlParseDefaultDecl:
3236: * @ctxt: an XML parser context
3237: * @value: Receive a possible fixed default value for the attribute
3238: *
3239: * Parse an attribute default declaration
3240: *
3241: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3242: *
1.99 daniel 3243: * [ VC: Required Attribute ]
1.117 daniel 3244: * if the default declaration is the keyword #REQUIRED, then the
3245: * attribute must be specified for all elements of the type in the
3246: * attribute-list declaration.
1.99 daniel 3247: *
3248: * [ VC: Attribute Default Legal ]
1.102 daniel 3249: * The declared default value must meet the lexical constraints of
3250: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3251: *
3252: * [ VC: Fixed Attribute Default ]
1.117 daniel 3253: * if an attribute has a default value declared with the #FIXED
3254: * keyword, instances of that attribute must match the default value.
1.99 daniel 3255: *
3256: * [ WFC: No < in Attribute Values ]
3257: * handled in xmlParseAttValue()
3258: *
1.59 daniel 3259: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3260: * or XML_ATTRIBUTE_FIXED.
3261: */
3262:
3263: int
1.123 daniel 3264: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3265: int val;
1.123 daniel 3266: xmlChar *ret;
1.59 daniel 3267:
3268: *value = NULL;
1.152 daniel 3269: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 3270: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3271: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3272: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3273: (NXT(8) == 'D')) {
3274: SKIP(9);
3275: return(XML_ATTRIBUTE_REQUIRED);
3276: }
1.152 daniel 3277: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 3278: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3279: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3280: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3281: SKIP(8);
3282: return(XML_ATTRIBUTE_IMPLIED);
3283: }
3284: val = XML_ATTRIBUTE_NONE;
1.152 daniel 3285: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 3286: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3287: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3288: SKIP(6);
3289: val = XML_ATTRIBUTE_FIXED;
3290: if (!IS_BLANK(CUR)) {
1.230 veillard 3291: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3293: ctxt->sax->error(ctxt->userData,
3294: "Space required after '#FIXED'\n");
1.59 daniel 3295: ctxt->wellFormed = 0;
1.180 daniel 3296: ctxt->disableSAX = 1;
1.59 daniel 3297: }
3298: SKIP_BLANKS;
3299: }
3300: ret = xmlParseAttValue(ctxt);
1.96 daniel 3301: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3302: if (ret == NULL) {
3303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3304: ctxt->sax->error(ctxt->userData,
1.59 daniel 3305: "Attribute default value declaration error\n");
3306: ctxt->wellFormed = 0;
1.180 daniel 3307: ctxt->disableSAX = 1;
1.59 daniel 3308: } else
3309: *value = ret;
3310: return(val);
3311: }
3312:
3313: /**
1.66 daniel 3314: * xmlParseNotationType:
3315: * @ctxt: an XML parser context
3316: *
3317: * parse an Notation attribute type.
3318: *
1.99 daniel 3319: * Note: the leading 'NOTATION' S part has already being parsed...
3320: *
1.66 daniel 3321: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3322: *
1.99 daniel 3323: * [ VC: Notation Attributes ]
1.117 daniel 3324: * Values of this type must match one of the notation names included
1.99 daniel 3325: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3326: *
3327: * Returns: the notation attribute tree built while parsing
3328: */
3329:
3330: xmlEnumerationPtr
3331: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3332: xmlChar *name;
1.66 daniel 3333: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3334:
1.152 daniel 3335: if (RAW != '(') {
1.230 veillard 3336: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3338: ctxt->sax->error(ctxt->userData,
3339: "'(' required to start 'NOTATION'\n");
1.66 daniel 3340: ctxt->wellFormed = 0;
1.180 daniel 3341: ctxt->disableSAX = 1;
1.66 daniel 3342: return(NULL);
3343: }
1.91 daniel 3344: SHRINK;
1.66 daniel 3345: do {
3346: NEXT;
3347: SKIP_BLANKS;
3348: name = xmlParseName(ctxt);
3349: if (name == NULL) {
1.230 veillard 3350: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3352: ctxt->sax->error(ctxt->userData,
1.66 daniel 3353: "Name expected in NOTATION declaration\n");
3354: ctxt->wellFormed = 0;
1.180 daniel 3355: ctxt->disableSAX = 1;
1.66 daniel 3356: return(ret);
3357: }
3358: cur = xmlCreateEnumeration(name);
1.119 daniel 3359: xmlFree(name);
1.66 daniel 3360: if (cur == NULL) return(ret);
3361: if (last == NULL) ret = last = cur;
3362: else {
3363: last->next = cur;
3364: last = cur;
3365: }
3366: SKIP_BLANKS;
1.152 daniel 3367: } while (RAW == '|');
3368: if (RAW != ')') {
1.230 veillard 3369: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3371: ctxt->sax->error(ctxt->userData,
1.66 daniel 3372: "')' required to finish NOTATION declaration\n");
3373: ctxt->wellFormed = 0;
1.180 daniel 3374: ctxt->disableSAX = 1;
1.170 daniel 3375: if ((last != NULL) && (last != ret))
3376: xmlFreeEnumeration(last);
1.66 daniel 3377: return(ret);
3378: }
3379: NEXT;
3380: return(ret);
3381: }
3382:
3383: /**
3384: * xmlParseEnumerationType:
3385: * @ctxt: an XML parser context
3386: *
3387: * parse an Enumeration attribute type.
3388: *
3389: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3390: *
1.99 daniel 3391: * [ VC: Enumeration ]
1.117 daniel 3392: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3393: * the declaration
3394: *
1.66 daniel 3395: * Returns: the enumeration attribute tree built while parsing
3396: */
3397:
3398: xmlEnumerationPtr
3399: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3400: xmlChar *name;
1.66 daniel 3401: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3402:
1.152 daniel 3403: if (RAW != '(') {
1.230 veillard 3404: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3406: ctxt->sax->error(ctxt->userData,
1.66 daniel 3407: "'(' required to start ATTLIST enumeration\n");
3408: ctxt->wellFormed = 0;
1.180 daniel 3409: ctxt->disableSAX = 1;
1.66 daniel 3410: return(NULL);
3411: }
1.91 daniel 3412: SHRINK;
1.66 daniel 3413: do {
3414: NEXT;
3415: SKIP_BLANKS;
3416: name = xmlParseNmtoken(ctxt);
3417: if (name == NULL) {
1.230 veillard 3418: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3420: ctxt->sax->error(ctxt->userData,
1.66 daniel 3421: "NmToken expected in ATTLIST enumeration\n");
3422: ctxt->wellFormed = 0;
1.180 daniel 3423: ctxt->disableSAX = 1;
1.66 daniel 3424: return(ret);
3425: }
3426: cur = xmlCreateEnumeration(name);
1.119 daniel 3427: xmlFree(name);
1.66 daniel 3428: if (cur == NULL) return(ret);
3429: if (last == NULL) ret = last = cur;
3430: else {
3431: last->next = cur;
3432: last = cur;
3433: }
3434: SKIP_BLANKS;
1.152 daniel 3435: } while (RAW == '|');
3436: if (RAW != ')') {
1.230 veillard 3437: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3439: ctxt->sax->error(ctxt->userData,
1.66 daniel 3440: "')' required to finish ATTLIST enumeration\n");
3441: ctxt->wellFormed = 0;
1.180 daniel 3442: ctxt->disableSAX = 1;
1.66 daniel 3443: return(ret);
3444: }
3445: NEXT;
3446: return(ret);
3447: }
3448:
3449: /**
1.50 daniel 3450: * xmlParseEnumeratedType:
3451: * @ctxt: an XML parser context
1.66 daniel 3452: * @tree: the enumeration tree built while parsing
1.50 daniel 3453: *
1.66 daniel 3454: * parse an Enumerated attribute type.
1.22 daniel 3455: *
3456: * [57] EnumeratedType ::= NotationType | Enumeration
3457: *
3458: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3459: *
1.50 daniel 3460: *
1.66 daniel 3461: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3462: */
3463:
1.66 daniel 3464: int
3465: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 3466: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 3467: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3468: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3469: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3470: SKIP(8);
3471: if (!IS_BLANK(CUR)) {
1.230 veillard 3472: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3474: ctxt->sax->error(ctxt->userData,
3475: "Space required after 'NOTATION'\n");
1.66 daniel 3476: ctxt->wellFormed = 0;
1.180 daniel 3477: ctxt->disableSAX = 1;
1.66 daniel 3478: return(0);
3479: }
3480: SKIP_BLANKS;
3481: *tree = xmlParseNotationType(ctxt);
3482: if (*tree == NULL) return(0);
3483: return(XML_ATTRIBUTE_NOTATION);
3484: }
3485: *tree = xmlParseEnumerationType(ctxt);
3486: if (*tree == NULL) return(0);
3487: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3488: }
3489:
1.50 daniel 3490: /**
3491: * xmlParseAttributeType:
3492: * @ctxt: an XML parser context
1.66 daniel 3493: * @tree: the enumeration tree built while parsing
1.50 daniel 3494: *
1.59 daniel 3495: * parse the Attribute list def for an element
1.22 daniel 3496: *
3497: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3498: *
3499: * [55] StringType ::= 'CDATA'
3500: *
3501: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3502: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3503: *
1.102 daniel 3504: * Validity constraints for attribute values syntax are checked in
3505: * xmlValidateAttributeValue()
3506: *
1.99 daniel 3507: * [ VC: ID ]
1.117 daniel 3508: * Values of type ID must match the Name production. A name must not
1.99 daniel 3509: * appear more than once in an XML document as a value of this type;
3510: * i.e., ID values must uniquely identify the elements which bear them.
3511: *
3512: * [ VC: One ID per Element Type ]
1.117 daniel 3513: * No element type may have more than one ID attribute specified.
1.99 daniel 3514: *
3515: * [ VC: ID Attribute Default ]
1.117 daniel 3516: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3517: *
3518: * [ VC: IDREF ]
1.102 daniel 3519: * Values of type IDREF must match the Name production, and values
1.140 daniel 3520: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 3521: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3522: * values must match the value of some ID attribute.
3523: *
3524: * [ VC: Entity Name ]
1.102 daniel 3525: * Values of type ENTITY must match the Name production, values
1.140 daniel 3526: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 3527: * name of an unparsed entity declared in the DTD.
1.99 daniel 3528: *
3529: * [ VC: Name Token ]
1.102 daniel 3530: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3531: * of type NMTOKENS must match Nmtokens.
3532: *
1.69 daniel 3533: * Returns the attribute type
1.22 daniel 3534: */
1.59 daniel 3535: int
1.66 daniel 3536: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3537: SHRINK;
1.152 daniel 3538: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 3539: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3540: (NXT(4) == 'A')) {
3541: SKIP(5);
1.66 daniel 3542: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 3543: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 3544: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3545: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3546: SKIP(6);
3547: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 3548: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 3549: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3550: (NXT(4) == 'F')) {
3551: SKIP(5);
1.59 daniel 3552: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 3553: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 3554: SKIP(2);
3555: return(XML_ATTRIBUTE_ID);
1.152 daniel 3556: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3557: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3558: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3559: SKIP(6);
1.59 daniel 3560: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 3561: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3562: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3563: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3564: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3565: SKIP(8);
1.59 daniel 3566: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 3567: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 3568: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3569: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3570: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3571: SKIP(8);
3572: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 3573: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 3574: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3575: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3576: (NXT(6) == 'N')) {
3577: SKIP(7);
1.59 daniel 3578: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3579: }
1.66 daniel 3580: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3581: }
3582:
1.50 daniel 3583: /**
3584: * xmlParseAttributeListDecl:
3585: * @ctxt: an XML parser context
3586: *
3587: * : parse the Attribute list def for an element
1.22 daniel 3588: *
3589: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3590: *
3591: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3592: *
1.22 daniel 3593: */
1.55 daniel 3594: void
3595: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3596: xmlChar *elemName;
3597: xmlChar *attrName;
1.103 daniel 3598: xmlEnumerationPtr tree;
1.22 daniel 3599:
1.152 daniel 3600: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3601: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3602: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3603: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3604: (NXT(8) == 'T')) {
1.187 daniel 3605: xmlParserInputPtr input = ctxt->input;
3606:
1.40 daniel 3607: SKIP(9);
1.59 daniel 3608: if (!IS_BLANK(CUR)) {
1.230 veillard 3609: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3611: ctxt->sax->error(ctxt->userData,
3612: "Space required after '<!ATTLIST'\n");
1.59 daniel 3613: ctxt->wellFormed = 0;
1.180 daniel 3614: ctxt->disableSAX = 1;
1.59 daniel 3615: }
1.42 daniel 3616: SKIP_BLANKS;
1.59 daniel 3617: elemName = xmlParseName(ctxt);
3618: if (elemName == NULL) {
1.230 veillard 3619: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3621: ctxt->sax->error(ctxt->userData,
3622: "ATTLIST: no name for Element\n");
1.59 daniel 3623: ctxt->wellFormed = 0;
1.180 daniel 3624: ctxt->disableSAX = 1;
1.22 daniel 3625: return;
3626: }
1.42 daniel 3627: SKIP_BLANKS;
1.220 veillard 3628: GROW;
1.152 daniel 3629: while (RAW != '>') {
1.123 daniel 3630: const xmlChar *check = CUR_PTR;
1.59 daniel 3631: int type;
3632: int def;
1.123 daniel 3633: xmlChar *defaultValue = NULL;
1.59 daniel 3634:
1.220 veillard 3635: GROW;
1.103 daniel 3636: tree = NULL;
1.59 daniel 3637: attrName = xmlParseName(ctxt);
3638: if (attrName == NULL) {
1.230 veillard 3639: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3641: ctxt->sax->error(ctxt->userData,
3642: "ATTLIST: no name for Attribute\n");
1.59 daniel 3643: ctxt->wellFormed = 0;
1.180 daniel 3644: ctxt->disableSAX = 1;
1.59 daniel 3645: break;
3646: }
1.97 daniel 3647: GROW;
1.59 daniel 3648: if (!IS_BLANK(CUR)) {
1.230 veillard 3649: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3651: ctxt->sax->error(ctxt->userData,
1.59 daniel 3652: "Space required after the attribute name\n");
3653: ctxt->wellFormed = 0;
1.180 daniel 3654: ctxt->disableSAX = 1;
1.170 daniel 3655: if (attrName != NULL)
3656: xmlFree(attrName);
3657: if (defaultValue != NULL)
3658: xmlFree(defaultValue);
1.59 daniel 3659: break;
3660: }
3661: SKIP_BLANKS;
3662:
1.66 daniel 3663: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 3664: if (type <= 0) {
3665: if (attrName != NULL)
3666: xmlFree(attrName);
3667: if (defaultValue != NULL)
3668: xmlFree(defaultValue);
3669: break;
3670: }
1.22 daniel 3671:
1.97 daniel 3672: GROW;
1.59 daniel 3673: if (!IS_BLANK(CUR)) {
1.230 veillard 3674: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3675: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3676: ctxt->sax->error(ctxt->userData,
1.59 daniel 3677: "Space required after the attribute type\n");
3678: ctxt->wellFormed = 0;
1.180 daniel 3679: ctxt->disableSAX = 1;
1.170 daniel 3680: if (attrName != NULL)
3681: xmlFree(attrName);
3682: if (defaultValue != NULL)
3683: xmlFree(defaultValue);
3684: if (tree != NULL)
3685: xmlFreeEnumeration(tree);
1.59 daniel 3686: break;
3687: }
1.42 daniel 3688: SKIP_BLANKS;
1.59 daniel 3689:
3690: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 3691: if (def <= 0) {
3692: if (attrName != NULL)
3693: xmlFree(attrName);
3694: if (defaultValue != NULL)
3695: xmlFree(defaultValue);
3696: if (tree != NULL)
3697: xmlFreeEnumeration(tree);
3698: break;
3699: }
1.59 daniel 3700:
1.97 daniel 3701: GROW;
1.152 daniel 3702: if (RAW != '>') {
1.59 daniel 3703: if (!IS_BLANK(CUR)) {
1.230 veillard 3704: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3706: ctxt->sax->error(ctxt->userData,
1.59 daniel 3707: "Space required after the attribute default value\n");
3708: ctxt->wellFormed = 0;
1.180 daniel 3709: ctxt->disableSAX = 1;
1.170 daniel 3710: if (attrName != NULL)
3711: xmlFree(attrName);
3712: if (defaultValue != NULL)
3713: xmlFree(defaultValue);
3714: if (tree != NULL)
3715: xmlFreeEnumeration(tree);
1.59 daniel 3716: break;
3717: }
3718: SKIP_BLANKS;
3719: }
1.40 daniel 3720: if (check == CUR_PTR) {
1.230 veillard 3721: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3723: ctxt->sax->error(ctxt->userData,
1.59 daniel 3724: "xmlParseAttributeListDecl: detected internal error\n");
1.170 daniel 3725: if (attrName != NULL)
3726: xmlFree(attrName);
3727: if (defaultValue != NULL)
3728: xmlFree(defaultValue);
3729: if (tree != NULL)
3730: xmlFreeEnumeration(tree);
1.22 daniel 3731: break;
3732: }
1.171 daniel 3733: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3734: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3735: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3736: type, def, defaultValue, tree);
1.59 daniel 3737: if (attrName != NULL)
1.119 daniel 3738: xmlFree(attrName);
1.59 daniel 3739: if (defaultValue != NULL)
1.119 daniel 3740: xmlFree(defaultValue);
1.97 daniel 3741: GROW;
1.22 daniel 3742: }
1.187 daniel 3743: if (RAW == '>') {
3744: if (input != ctxt->input) {
1.230 veillard 3745: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3747: ctxt->sax->error(ctxt->userData,
3748: "Attribute list declaration doesn't start and stop in the same entity\n");
3749: ctxt->wellFormed = 0;
3750: ctxt->disableSAX = 1;
3751: }
1.40 daniel 3752: NEXT;
1.187 daniel 3753: }
1.22 daniel 3754:
1.119 daniel 3755: xmlFree(elemName);
1.22 daniel 3756: }
3757: }
3758:
1.50 daniel 3759: /**
1.61 daniel 3760: * xmlParseElementMixedContentDecl:
3761: * @ctxt: an XML parser context
3762: *
3763: * parse the declaration for a Mixed Element content
3764: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3765: *
3766: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3767: * '(' S? '#PCDATA' S? ')'
3768: *
1.99 daniel 3769: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3770: *
3771: * [ VC: No Duplicate Types ]
1.117 daniel 3772: * The same name must not appear more than once in a single
3773: * mixed-content declaration.
1.99 daniel 3774: *
1.61 daniel 3775: * returns: the list of the xmlElementContentPtr describing the element choices
3776: */
3777: xmlElementContentPtr
1.62 daniel 3778: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3779: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3780: xmlChar *elem = NULL;
1.61 daniel 3781:
1.97 daniel 3782: GROW;
1.152 daniel 3783: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 3784: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3785: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3786: (NXT(6) == 'A')) {
3787: SKIP(7);
3788: SKIP_BLANKS;
1.91 daniel 3789: SHRINK;
1.152 daniel 3790: if (RAW == ')') {
1.187 daniel 3791: ctxt->entity = ctxt->input;
1.63 daniel 3792: NEXT;
3793: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 3794: if (RAW == '*') {
1.136 daniel 3795: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3796: NEXT;
3797: }
1.63 daniel 3798: return(ret);
3799: }
1.152 daniel 3800: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 3801: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3802: if (ret == NULL) return(NULL);
1.99 daniel 3803: }
1.152 daniel 3804: while (RAW == '|') {
1.64 daniel 3805: NEXT;
1.61 daniel 3806: if (elem == NULL) {
3807: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3808: if (ret == NULL) return(NULL);
3809: ret->c1 = cur;
1.64 daniel 3810: cur = ret;
1.61 daniel 3811: } else {
1.64 daniel 3812: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3813: if (n == NULL) return(NULL);
3814: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3815: cur->c2 = n;
3816: cur = n;
1.119 daniel 3817: xmlFree(elem);
1.61 daniel 3818: }
3819: SKIP_BLANKS;
3820: elem = xmlParseName(ctxt);
3821: if (elem == NULL) {
1.230 veillard 3822: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3824: ctxt->sax->error(ctxt->userData,
1.61 daniel 3825: "xmlParseElementMixedContentDecl : Name expected\n");
3826: ctxt->wellFormed = 0;
1.180 daniel 3827: ctxt->disableSAX = 1;
1.61 daniel 3828: xmlFreeElementContent(cur);
3829: return(NULL);
3830: }
3831: SKIP_BLANKS;
1.97 daniel 3832: GROW;
1.61 daniel 3833: }
1.152 daniel 3834: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 3835: if (elem != NULL) {
1.61 daniel 3836: cur->c2 = xmlNewElementContent(elem,
3837: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3838: xmlFree(elem);
1.66 daniel 3839: }
1.65 daniel 3840: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 3841: ctxt->entity = ctxt->input;
1.64 daniel 3842: SKIP(2);
1.61 daniel 3843: } else {
1.119 daniel 3844: if (elem != NULL) xmlFree(elem);
1.230 veillard 3845: xmlFreeElementContent(ret);
3846: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3848: ctxt->sax->error(ctxt->userData,
1.63 daniel 3849: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3850: ctxt->wellFormed = 0;
1.180 daniel 3851: ctxt->disableSAX = 1;
1.61 daniel 3852: return(NULL);
3853: }
3854:
3855: } else {
1.230 veillard 3856: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3857: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3858: ctxt->sax->error(ctxt->userData,
1.61 daniel 3859: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3860: ctxt->wellFormed = 0;
1.180 daniel 3861: ctxt->disableSAX = 1;
1.61 daniel 3862: }
3863: return(ret);
3864: }
3865:
3866: /**
3867: * xmlParseElementChildrenContentDecl:
1.50 daniel 3868: * @ctxt: an XML parser context
3869: *
1.61 daniel 3870: * parse the declaration for a Mixed Element content
3871: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3872: *
1.61 daniel 3873: *
1.22 daniel 3874: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3875: *
3876: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3877: *
3878: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3879: *
3880: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3881: *
1.99 daniel 3882: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3883: * TODO Parameter-entity replacement text must be properly nested
3884: * with parenthetized groups. That is to say, if either of the
3885: * opening or closing parentheses in a choice, seq, or Mixed
3886: * construct is contained in the replacement text for a parameter
3887: * entity, both must be contained in the same replacement text. For
3888: * interoperability, if a parameter-entity reference appears in a
3889: * choice, seq, or Mixed construct, its replacement text should not
3890: * be empty, and neither the first nor last non-blank character of
3891: * the replacement text should be a connector (| or ,).
3892: *
1.62 daniel 3893: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3894: * hierarchy.
3895: */
3896: xmlElementContentPtr
1.246 veillard 3897: #ifdef VMS
3898: xmlParseElementChildrenContentD
3899: #else
3900: xmlParseElementChildrenContentDecl
3901: #endif
3902: (xmlParserCtxtPtr ctxt) {
1.63 daniel 3903: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 3904: xmlChar *elem;
3905: xmlChar type = 0;
1.62 daniel 3906:
3907: SKIP_BLANKS;
1.94 daniel 3908: GROW;
1.152 daniel 3909: if (RAW == '(') {
1.63 daniel 3910: /* Recurse on first child */
1.62 daniel 3911: NEXT;
3912: SKIP_BLANKS;
3913: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3914: SKIP_BLANKS;
1.101 daniel 3915: GROW;
1.62 daniel 3916: } else {
3917: elem = xmlParseName(ctxt);
3918: if (elem == NULL) {
1.230 veillard 3919: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3920: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3921: ctxt->sax->error(ctxt->userData,
1.62 daniel 3922: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3923: ctxt->wellFormed = 0;
1.180 daniel 3924: ctxt->disableSAX = 1;
1.62 daniel 3925: return(NULL);
3926: }
3927: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3928: GROW;
1.152 daniel 3929: if (RAW == '?') {
1.104 daniel 3930: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3931: NEXT;
1.152 daniel 3932: } else if (RAW == '*') {
1.104 daniel 3933: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3934: NEXT;
1.152 daniel 3935: } else if (RAW == '+') {
1.104 daniel 3936: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3937: NEXT;
3938: } else {
1.104 daniel 3939: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3940: }
1.119 daniel 3941: xmlFree(elem);
1.101 daniel 3942: GROW;
1.62 daniel 3943: }
3944: SKIP_BLANKS;
1.91 daniel 3945: SHRINK;
1.152 daniel 3946: while (RAW != ')') {
1.63 daniel 3947: /*
3948: * Each loop we parse one separator and one element.
3949: */
1.152 daniel 3950: if (RAW == ',') {
1.62 daniel 3951: if (type == 0) type = CUR;
3952:
3953: /*
3954: * Detect "Name | Name , Name" error
3955: */
3956: else if (type != CUR) {
1.230 veillard 3957: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3959: ctxt->sax->error(ctxt->userData,
1.62 daniel 3960: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3961: type);
3962: ctxt->wellFormed = 0;
1.180 daniel 3963: ctxt->disableSAX = 1;
1.170 daniel 3964: if ((op != NULL) && (op != ret))
3965: xmlFreeElementContent(op);
1.211 veillard 3966: if ((last != NULL) && (last != ret) &&
3967: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3968: xmlFreeElementContent(last);
3969: if (ret != NULL)
3970: xmlFreeElementContent(ret);
1.62 daniel 3971: return(NULL);
3972: }
1.64 daniel 3973: NEXT;
1.62 daniel 3974:
1.63 daniel 3975: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3976: if (op == NULL) {
3977: xmlFreeElementContent(ret);
3978: return(NULL);
3979: }
3980: if (last == NULL) {
3981: op->c1 = ret;
1.65 daniel 3982: ret = cur = op;
1.63 daniel 3983: } else {
3984: cur->c2 = op;
3985: op->c1 = last;
3986: cur =op;
1.65 daniel 3987: last = NULL;
1.63 daniel 3988: }
1.152 daniel 3989: } else if (RAW == '|') {
1.62 daniel 3990: if (type == 0) type = CUR;
3991:
3992: /*
1.63 daniel 3993: * Detect "Name , Name | Name" error
1.62 daniel 3994: */
3995: else if (type != CUR) {
1.230 veillard 3996: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3998: ctxt->sax->error(ctxt->userData,
1.62 daniel 3999: "xmlParseElementChildrenContentDecl : '%c' expected\n",
4000: type);
4001: ctxt->wellFormed = 0;
1.180 daniel 4002: ctxt->disableSAX = 1;
1.211 veillard 4003: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 4004: xmlFreeElementContent(op);
1.211 veillard 4005: if ((last != NULL) && (last != ret) &&
4006: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4007: xmlFreeElementContent(last);
4008: if (ret != NULL)
4009: xmlFreeElementContent(ret);
1.62 daniel 4010: return(NULL);
4011: }
1.64 daniel 4012: NEXT;
1.62 daniel 4013:
1.63 daniel 4014: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4015: if (op == NULL) {
1.170 daniel 4016: if ((op != NULL) && (op != ret))
4017: xmlFreeElementContent(op);
1.211 veillard 4018: if ((last != NULL) && (last != ret) &&
4019: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4020: xmlFreeElementContent(last);
4021: if (ret != NULL)
4022: xmlFreeElementContent(ret);
1.63 daniel 4023: return(NULL);
4024: }
4025: if (last == NULL) {
4026: op->c1 = ret;
1.65 daniel 4027: ret = cur = op;
1.63 daniel 4028: } else {
4029: cur->c2 = op;
4030: op->c1 = last;
4031: cur =op;
1.65 daniel 4032: last = NULL;
1.63 daniel 4033: }
1.62 daniel 4034: } else {
1.230 veillard 4035: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 4036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4037: ctxt->sax->error(ctxt->userData,
1.62 daniel 4038: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4039: ctxt->wellFormed = 0;
1.180 daniel 4040: ctxt->disableSAX = 1;
1.170 daniel 4041: if ((op != NULL) && (op != ret))
4042: xmlFreeElementContent(op);
1.211 veillard 4043: if ((last != NULL) && (last != ret) &&
4044: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4045: xmlFreeElementContent(last);
4046: if (ret != NULL)
4047: xmlFreeElementContent(ret);
1.62 daniel 4048: return(NULL);
4049: }
1.101 daniel 4050: GROW;
1.62 daniel 4051: SKIP_BLANKS;
1.101 daniel 4052: GROW;
1.152 daniel 4053: if (RAW == '(') {
1.63 daniel 4054: /* Recurse on second child */
1.62 daniel 4055: NEXT;
4056: SKIP_BLANKS;
1.65 daniel 4057: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4058: SKIP_BLANKS;
4059: } else {
4060: elem = xmlParseName(ctxt);
4061: if (elem == NULL) {
1.230 veillard 4062: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4064: ctxt->sax->error(ctxt->userData,
1.122 daniel 4065: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.62 daniel 4066: ctxt->wellFormed = 0;
1.180 daniel 4067: ctxt->disableSAX = 1;
1.170 daniel 4068: if ((op != NULL) && (op != ret))
4069: xmlFreeElementContent(op);
1.211 veillard 4070: if ((last != NULL) && (last != ret) &&
4071: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4072: xmlFreeElementContent(last);
4073: if (ret != NULL)
4074: xmlFreeElementContent(ret);
1.62 daniel 4075: return(NULL);
4076: }
1.65 daniel 4077: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4078: xmlFree(elem);
1.152 daniel 4079: if (RAW == '?') {
1.105 daniel 4080: last->ocur = XML_ELEMENT_CONTENT_OPT;
4081: NEXT;
1.152 daniel 4082: } else if (RAW == '*') {
1.105 daniel 4083: last->ocur = XML_ELEMENT_CONTENT_MULT;
4084: NEXT;
1.152 daniel 4085: } else if (RAW == '+') {
1.105 daniel 4086: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4087: NEXT;
4088: } else {
4089: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4090: }
1.63 daniel 4091: }
4092: SKIP_BLANKS;
1.97 daniel 4093: GROW;
1.64 daniel 4094: }
1.65 daniel 4095: if ((cur != NULL) && (last != NULL)) {
4096: cur->c2 = last;
1.62 daniel 4097: }
1.187 daniel 4098: ctxt->entity = ctxt->input;
1.62 daniel 4099: NEXT;
1.152 daniel 4100: if (RAW == '?') {
1.62 daniel 4101: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4102: NEXT;
1.152 daniel 4103: } else if (RAW == '*') {
1.62 daniel 4104: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4105: NEXT;
1.152 daniel 4106: } else if (RAW == '+') {
1.62 daniel 4107: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4108: NEXT;
4109: }
4110: return(ret);
1.61 daniel 4111: }
4112:
4113: /**
4114: * xmlParseElementContentDecl:
4115: * @ctxt: an XML parser context
4116: * @name: the name of the element being defined.
4117: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4118: *
1.61 daniel 4119: * parse the declaration for an Element content either Mixed or Children,
4120: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4121: *
4122: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4123: *
1.61 daniel 4124: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4125: */
4126:
1.61 daniel 4127: int
1.123 daniel 4128: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4129: xmlElementContentPtr *result) {
4130:
4131: xmlElementContentPtr tree = NULL;
1.187 daniel 4132: xmlParserInputPtr input = ctxt->input;
1.61 daniel 4133: int res;
4134:
4135: *result = NULL;
4136:
1.152 daniel 4137: if (RAW != '(') {
1.230 veillard 4138: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4139: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4140: ctxt->sax->error(ctxt->userData,
1.61 daniel 4141: "xmlParseElementContentDecl : '(' expected\n");
4142: ctxt->wellFormed = 0;
1.180 daniel 4143: ctxt->disableSAX = 1;
1.61 daniel 4144: return(-1);
4145: }
4146: NEXT;
1.97 daniel 4147: GROW;
1.61 daniel 4148: SKIP_BLANKS;
1.152 daniel 4149: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4150: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4151: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4152: (NXT(6) == 'A')) {
1.62 daniel 4153: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4154: res = XML_ELEMENT_TYPE_MIXED;
4155: } else {
1.62 daniel 4156: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4157: res = XML_ELEMENT_TYPE_ELEMENT;
4158: }
1.187 daniel 4159: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
1.230 veillard 4160: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4162: ctxt->sax->error(ctxt->userData,
4163: "Element content declaration doesn't start and stop in the same entity\n");
4164: ctxt->wellFormed = 0;
4165: ctxt->disableSAX = 1;
4166: }
1.61 daniel 4167: SKIP_BLANKS;
1.63 daniel 4168: *result = tree;
1.61 daniel 4169: return(res);
1.22 daniel 4170: }
4171:
1.50 daniel 4172: /**
4173: * xmlParseElementDecl:
4174: * @ctxt: an XML parser context
4175: *
4176: * parse an Element declaration.
1.22 daniel 4177: *
4178: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4179: *
1.99 daniel 4180: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4181: * No element type may be declared more than once
1.69 daniel 4182: *
4183: * Returns the type of the element, or -1 in case of error
1.22 daniel 4184: */
1.59 daniel 4185: int
1.55 daniel 4186: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4187: xmlChar *name;
1.59 daniel 4188: int ret = -1;
1.61 daniel 4189: xmlElementContentPtr content = NULL;
1.22 daniel 4190:
1.97 daniel 4191: GROW;
1.152 daniel 4192: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4193: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4194: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4195: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4196: (NXT(8) == 'T')) {
1.187 daniel 4197: xmlParserInputPtr input = ctxt->input;
4198:
1.40 daniel 4199: SKIP(9);
1.59 daniel 4200: if (!IS_BLANK(CUR)) {
1.230 veillard 4201: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4202: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4203: ctxt->sax->error(ctxt->userData,
1.59 daniel 4204: "Space required after 'ELEMENT'\n");
4205: ctxt->wellFormed = 0;
1.180 daniel 4206: ctxt->disableSAX = 1;
1.59 daniel 4207: }
1.42 daniel 4208: SKIP_BLANKS;
1.22 daniel 4209: name = xmlParseName(ctxt);
4210: if (name == NULL) {
1.230 veillard 4211: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4213: ctxt->sax->error(ctxt->userData,
1.59 daniel 4214: "xmlParseElementDecl: no name for Element\n");
4215: ctxt->wellFormed = 0;
1.180 daniel 4216: ctxt->disableSAX = 1;
1.59 daniel 4217: return(-1);
4218: }
4219: if (!IS_BLANK(CUR)) {
1.230 veillard 4220: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4222: ctxt->sax->error(ctxt->userData,
1.59 daniel 4223: "Space required after the element name\n");
4224: ctxt->wellFormed = 0;
1.180 daniel 4225: ctxt->disableSAX = 1;
1.22 daniel 4226: }
1.42 daniel 4227: SKIP_BLANKS;
1.152 daniel 4228: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 4229: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4230: (NXT(4) == 'Y')) {
4231: SKIP(5);
1.22 daniel 4232: /*
4233: * Element must always be empty.
4234: */
1.59 daniel 4235: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 4236: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 4237: (NXT(2) == 'Y')) {
4238: SKIP(3);
1.22 daniel 4239: /*
4240: * Element is a generic container.
4241: */
1.59 daniel 4242: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 4243: } else if (RAW == '(') {
1.61 daniel 4244: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4245: } else {
1.98 daniel 4246: /*
4247: * [ WFC: PEs in Internal Subset ] error handling.
4248: */
1.152 daniel 4249: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 4250: (ctxt->inputNr == 1)) {
1.230 veillard 4251: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4253: ctxt->sax->error(ctxt->userData,
4254: "PEReference: forbidden within markup decl in internal subset\n");
4255: } else {
1.230 veillard 4256: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4258: ctxt->sax->error(ctxt->userData,
4259: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4260: }
1.61 daniel 4261: ctxt->wellFormed = 0;
1.180 daniel 4262: ctxt->disableSAX = 1;
1.119 daniel 4263: if (name != NULL) xmlFree(name);
1.61 daniel 4264: return(-1);
1.22 daniel 4265: }
1.142 daniel 4266:
4267: SKIP_BLANKS;
4268: /*
4269: * Pop-up of finished entities.
4270: */
1.152 daniel 4271: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 4272: xmlPopInput(ctxt);
1.42 daniel 4273: SKIP_BLANKS;
1.142 daniel 4274:
1.152 daniel 4275: if (RAW != '>') {
1.230 veillard 4276: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 4277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4278: ctxt->sax->error(ctxt->userData,
1.31 daniel 4279: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4280: ctxt->wellFormed = 0;
1.180 daniel 4281: ctxt->disableSAX = 1;
1.61 daniel 4282: } else {
1.187 daniel 4283: if (input != ctxt->input) {
1.230 veillard 4284: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4285: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4286: ctxt->sax->error(ctxt->userData,
4287: "Element declaration doesn't start and stop in the same entity\n");
4288: ctxt->wellFormed = 0;
4289: ctxt->disableSAX = 1;
4290: }
4291:
1.40 daniel 4292: NEXT;
1.171 daniel 4293: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4294: (ctxt->sax->elementDecl != NULL))
1.76 daniel 4295: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4296: content);
1.61 daniel 4297: }
1.84 daniel 4298: if (content != NULL) {
4299: xmlFreeElementContent(content);
4300: }
1.61 daniel 4301: if (name != NULL) {
1.119 daniel 4302: xmlFree(name);
1.61 daniel 4303: }
1.22 daniel 4304: }
1.59 daniel 4305: return(ret);
1.22 daniel 4306: }
4307:
1.50 daniel 4308: /**
4309: * xmlParseMarkupDecl:
4310: * @ctxt: an XML parser context
4311: *
4312: * parse Markup declarations
1.22 daniel 4313: *
4314: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4315: * NotationDecl | PI | Comment
4316: *
1.98 daniel 4317: * [ VC: Proper Declaration/PE Nesting ]
1.229 veillard 4318: * Parameter-entity replacement text must be properly nested with
1.98 daniel 4319: * markup declarations. That is to say, if either the first character
4320: * or the last character of a markup declaration (markupdecl above) is
4321: * contained in the replacement text for a parameter-entity reference,
4322: * both must be contained in the same replacement text.
4323: *
4324: * [ WFC: PEs in Internal Subset ]
4325: * In the internal DTD subset, parameter-entity references can occur
4326: * only where markup declarations can occur, not within markup declarations.
4327: * (This does not apply to references that occur in external parameter
4328: * entities or to the external subset.)
1.22 daniel 4329: */
1.55 daniel 4330: void
4331: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4332: GROW;
1.22 daniel 4333: xmlParseElementDecl(ctxt);
4334: xmlParseAttributeListDecl(ctxt);
4335: xmlParseEntityDecl(ctxt);
4336: xmlParseNotationDecl(ctxt);
4337: xmlParsePI(ctxt);
1.114 daniel 4338: xmlParseComment(ctxt);
1.98 daniel 4339: /*
4340: * This is only for internal subset. On external entities,
4341: * the replacement is done before parsing stage
4342: */
4343: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4344: xmlParsePEReference(ctxt);
1.97 daniel 4345: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4346: }
4347:
1.50 daniel 4348: /**
1.76 daniel 4349: * xmlParseTextDecl:
4350: * @ctxt: an XML parser context
4351: *
4352: * parse an XML declaration header for external entities
4353: *
4354: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 4355: *
4356: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 4357: */
4358:
1.172 daniel 4359: void
1.76 daniel 4360: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4361: xmlChar *version;
1.76 daniel 4362:
4363: /*
4364: * We know that '<?xml' is here.
4365: */
1.193 daniel 4366: if ((RAW == '<') && (NXT(1) == '?') &&
4367: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4368: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4369: SKIP(5);
4370: } else {
1.230 veillard 4371: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
1.193 daniel 4372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4373: ctxt->sax->error(ctxt->userData,
4374: "Text declaration '<?xml' required\n");
4375: ctxt->wellFormed = 0;
4376: ctxt->disableSAX = 1;
4377:
4378: return;
4379: }
1.76 daniel 4380:
4381: if (!IS_BLANK(CUR)) {
1.230 veillard 4382: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4384: ctxt->sax->error(ctxt->userData,
4385: "Space needed after '<?xml'\n");
1.76 daniel 4386: ctxt->wellFormed = 0;
1.180 daniel 4387: ctxt->disableSAX = 1;
1.76 daniel 4388: }
4389: SKIP_BLANKS;
4390:
4391: /*
4392: * We may have the VersionInfo here.
4393: */
4394: version = xmlParseVersionInfo(ctxt);
4395: if (version == NULL)
4396: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 4397: ctxt->input->version = version;
1.76 daniel 4398:
4399: /*
4400: * We must have the encoding declaration
4401: */
4402: if (!IS_BLANK(CUR)) {
1.230 veillard 4403: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4405: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.76 daniel 4406: ctxt->wellFormed = 0;
1.180 daniel 4407: ctxt->disableSAX = 1;
1.76 daniel 4408: }
1.195 daniel 4409: xmlParseEncodingDecl(ctxt);
1.193 daniel 4410: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4411: /*
4412: * The XML REC instructs us to stop parsing right here
4413: */
4414: return;
4415: }
1.76 daniel 4416:
4417: SKIP_BLANKS;
1.152 daniel 4418: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 4419: SKIP(2);
1.152 daniel 4420: } else if (RAW == '>') {
1.76 daniel 4421: /* Deprecated old WD ... */
1.230 veillard 4422: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4424: ctxt->sax->error(ctxt->userData,
4425: "XML declaration must end-up with '?>'\n");
1.76 daniel 4426: ctxt->wellFormed = 0;
1.180 daniel 4427: ctxt->disableSAX = 1;
1.76 daniel 4428: NEXT;
4429: } else {
1.230 veillard 4430: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4431: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4432: ctxt->sax->error(ctxt->userData,
4433: "parsing XML declaration: '?>' expected\n");
1.76 daniel 4434: ctxt->wellFormed = 0;
1.180 daniel 4435: ctxt->disableSAX = 1;
1.76 daniel 4436: MOVETO_ENDTAG(CUR_PTR);
4437: NEXT;
4438: }
4439: }
4440:
4441: /*
4442: * xmlParseConditionalSections
4443: * @ctxt: an XML parser context
4444: *
4445: * [61] conditionalSect ::= includeSect | ignoreSect
4446: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4447: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4448: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4449: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4450: */
4451:
4452: void
4453: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 4454: SKIP(3);
4455: SKIP_BLANKS;
1.168 daniel 4456: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4457: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4458: (NXT(6) == 'E')) {
1.165 daniel 4459: SKIP(7);
1.168 daniel 4460: SKIP_BLANKS;
4461: if (RAW != '[') {
1.230 veillard 4462: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4464: ctxt->sax->error(ctxt->userData,
4465: "XML conditional section '[' expected\n");
4466: ctxt->wellFormed = 0;
1.180 daniel 4467: ctxt->disableSAX = 1;
1.168 daniel 4468: } else {
4469: NEXT;
4470: }
1.220 veillard 4471: if (xmlParserDebugEntities) {
4472: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4473: xmlGenericError(xmlGenericErrorContext,
4474: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4475: ctxt->input->line);
1.241 veillard 4476: xmlGenericError(xmlGenericErrorContext,
4477: "Entering INCLUDE Conditional Section\n");
1.220 veillard 4478: }
4479:
1.165 daniel 4480: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4481: (NXT(2) != '>'))) {
4482: const xmlChar *check = CUR_PTR;
4483: int cons = ctxt->input->consumed;
4484: int tok = ctxt->token;
4485:
4486: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4487: xmlParseConditionalSections(ctxt);
4488: } else if (IS_BLANK(CUR)) {
4489: NEXT;
4490: } else if (RAW == '%') {
4491: xmlParsePEReference(ctxt);
4492: } else
4493: xmlParseMarkupDecl(ctxt);
4494:
4495: /*
4496: * Pop-up of finished entities.
4497: */
4498: while ((RAW == 0) && (ctxt->inputNr > 1))
4499: xmlPopInput(ctxt);
4500:
4501: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4502: (tok == ctxt->token)) {
1.230 veillard 4503: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.165 daniel 4504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4505: ctxt->sax->error(ctxt->userData,
4506: "Content error in the external subset\n");
4507: ctxt->wellFormed = 0;
1.180 daniel 4508: ctxt->disableSAX = 1;
1.165 daniel 4509: break;
4510: }
4511: }
1.220 veillard 4512: if (xmlParserDebugEntities) {
4513: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4514: xmlGenericError(xmlGenericErrorContext,
4515: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4516: ctxt->input->line);
1.241 veillard 4517: xmlGenericError(xmlGenericErrorContext,
4518: "Leaving INCLUDE Conditional Section\n");
1.220 veillard 4519: }
4520:
1.168 daniel 4521: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4522: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 4523: int state;
1.245 veillard 4524: int instate;
4525: int depth = 0;
1.171 daniel 4526:
1.168 daniel 4527: SKIP(6);
4528: SKIP_BLANKS;
4529: if (RAW != '[') {
1.230 veillard 4530: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4532: ctxt->sax->error(ctxt->userData,
4533: "XML conditional section '[' expected\n");
4534: ctxt->wellFormed = 0;
1.180 daniel 4535: ctxt->disableSAX = 1;
1.168 daniel 4536: } else {
4537: NEXT;
4538: }
1.220 veillard 4539: if (xmlParserDebugEntities) {
4540: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4541: xmlGenericError(xmlGenericErrorContext,
4542: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4543: ctxt->input->line);
1.241 veillard 4544: xmlGenericError(xmlGenericErrorContext,
4545: "Entering IGNORE Conditional Section\n");
1.220 veillard 4546: }
1.171 daniel 4547:
1.143 daniel 4548: /*
1.171 daniel 4549: * Parse up to the end of the conditionnal section
4550: * But disable SAX event generating DTD building in the meantime
1.143 daniel 4551: */
1.171 daniel 4552: state = ctxt->disableSAX;
1.245 veillard 4553: instate = ctxt->instate;
1.220 veillard 4554: ctxt->disableSAX = 1;
1.245 veillard 4555: ctxt->instate = XML_PARSER_IGNORE;
1.171 daniel 4556:
1.245 veillard 4557: while (depth >= 0) {
4558: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4559: depth++;
4560: SKIP(3);
4561: continue;
4562: }
4563: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4564: if (--depth >= 0) SKIP(3);
4565: continue;
4566: }
4567: NEXT;
4568: continue;
4569: }
1.171 daniel 4570:
1.245 veillard 4571: ctxt->disableSAX = state;
4572: ctxt->instate = instate;
1.143 daniel 4573:
1.220 veillard 4574: if (xmlParserDebugEntities) {
4575: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4576: xmlGenericError(xmlGenericErrorContext,
4577: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4578: ctxt->input->line);
1.241 veillard 4579: xmlGenericError(xmlGenericErrorContext,
4580: "Leaving IGNORE Conditional Section\n");
1.220 veillard 4581: }
4582:
1.168 daniel 4583: } else {
1.230 veillard 4584: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4586: ctxt->sax->error(ctxt->userData,
4587: "XML conditional section INCLUDE or IGNORE keyword expected\n");
4588: ctxt->wellFormed = 0;
1.180 daniel 4589: ctxt->disableSAX = 1;
1.143 daniel 4590: }
4591:
1.152 daniel 4592: if (RAW == 0)
1.143 daniel 4593: SHRINK;
4594:
1.152 daniel 4595: if (RAW == 0) {
1.230 veillard 4596: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4598: ctxt->sax->error(ctxt->userData,
4599: "XML conditional section not closed\n");
4600: ctxt->wellFormed = 0;
1.180 daniel 4601: ctxt->disableSAX = 1;
1.143 daniel 4602: } else {
4603: SKIP(3);
1.76 daniel 4604: }
4605: }
4606:
4607: /**
1.124 daniel 4608: * xmlParseExternalSubset:
1.76 daniel 4609: * @ctxt: an XML parser context
1.124 daniel 4610: * @ExternalID: the external identifier
4611: * @SystemID: the system identifier (or URL)
1.76 daniel 4612: *
4613: * parse Markup declarations from an external subset
4614: *
4615: * [30] extSubset ::= textDecl? extSubsetDecl
4616: *
4617: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4618: */
4619: void
1.123 daniel 4620: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4621: const xmlChar *SystemID) {
1.132 daniel 4622: GROW;
1.152 daniel 4623: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 4624: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4625: (NXT(4) == 'l')) {
1.172 daniel 4626: xmlParseTextDecl(ctxt);
1.193 daniel 4627: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4628: /*
4629: * The XML REC instructs us to stop parsing right here
4630: */
4631: ctxt->instate = XML_PARSER_EOF;
4632: return;
4633: }
1.76 daniel 4634: }
1.79 daniel 4635: if (ctxt->myDoc == NULL) {
1.116 daniel 4636: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4637: }
4638: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4639: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4640:
1.96 daniel 4641: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4642: ctxt->external = 1;
1.152 daniel 4643: while (((RAW == '<') && (NXT(1) == '?')) ||
4644: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 4645: IS_BLANK(CUR)) {
1.123 daniel 4646: const xmlChar *check = CUR_PTR;
1.115 daniel 4647: int cons = ctxt->input->consumed;
1.164 daniel 4648: int tok = ctxt->token;
1.115 daniel 4649:
1.221 veillard 4650: GROW;
1.152 daniel 4651: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 4652: xmlParseConditionalSections(ctxt);
4653: } else if (IS_BLANK(CUR)) {
4654: NEXT;
1.152 daniel 4655: } else if (RAW == '%') {
1.76 daniel 4656: xmlParsePEReference(ctxt);
4657: } else
4658: xmlParseMarkupDecl(ctxt);
1.77 daniel 4659:
4660: /*
4661: * Pop-up of finished entities.
4662: */
1.166 daniel 4663: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 4664: xmlPopInput(ctxt);
4665:
1.164 daniel 4666: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4667: (tok == ctxt->token)) {
1.230 veillard 4668: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670: ctxt->sax->error(ctxt->userData,
4671: "Content error in the external subset\n");
4672: ctxt->wellFormed = 0;
1.180 daniel 4673: ctxt->disableSAX = 1;
1.115 daniel 4674: break;
4675: }
1.76 daniel 4676: }
4677:
1.152 daniel 4678: if (RAW != 0) {
1.230 veillard 4679: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4681: ctxt->sax->error(ctxt->userData,
4682: "Extra content at the end of the document\n");
4683: ctxt->wellFormed = 0;
1.180 daniel 4684: ctxt->disableSAX = 1;
1.76 daniel 4685: }
4686:
4687: }
4688:
4689: /**
1.77 daniel 4690: * xmlParseReference:
4691: * @ctxt: an XML parser context
4692: *
4693: * parse and handle entity references in content, depending on the SAX
4694: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4695: * CharRef, a predefined entity, if there is no reference() callback.
4696: * or if the parser was asked to switch to that mode.
1.77 daniel 4697: *
4698: * [67] Reference ::= EntityRef | CharRef
4699: */
4700: void
4701: xmlParseReference(xmlParserCtxtPtr ctxt) {
4702: xmlEntityPtr ent;
1.123 daniel 4703: xmlChar *val;
1.152 daniel 4704: if (RAW != '&') return;
1.77 daniel 4705:
4706: if (NXT(1) == '#') {
1.152 daniel 4707: int i = 0;
1.153 daniel 4708: xmlChar out[10];
4709: int hex = NXT(2);
1.77 daniel 4710: int val = xmlParseCharRef(ctxt);
1.152 daniel 4711:
1.198 daniel 4712: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 4713: /*
4714: * So we are using non-UTF-8 buffers
4715: * Check that the char fit on 8bits, if not
4716: * generate a CharRef.
4717: */
4718: if (val <= 0xFF) {
4719: out[0] = val;
4720: out[1] = 0;
1.171 daniel 4721: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4722: (!ctxt->disableSAX))
1.153 daniel 4723: ctxt->sax->characters(ctxt->userData, out, 1);
4724: } else {
4725: if ((hex == 'x') || (hex == 'X'))
4726: sprintf((char *)out, "#x%X", val);
4727: else
4728: sprintf((char *)out, "#%d", val);
1.171 daniel 4729: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4730: (!ctxt->disableSAX))
1.153 daniel 4731: ctxt->sax->reference(ctxt->userData, out);
4732: }
4733: } else {
4734: /*
4735: * Just encode the value in UTF-8
4736: */
4737: COPY_BUF(0 ,out, i, val);
4738: out[i] = 0;
1.171 daniel 4739: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4740: (!ctxt->disableSAX))
1.153 daniel 4741: ctxt->sax->characters(ctxt->userData, out, i);
4742: }
1.77 daniel 4743: } else {
4744: ent = xmlParseEntityRef(ctxt);
4745: if (ent == NULL) return;
4746: if ((ent->name != NULL) &&
1.159 daniel 4747: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 4748: xmlNodePtr list = NULL;
4749: int ret;
4750:
4751:
4752: /*
4753: * The first reference to the entity trigger a parsing phase
4754: * where the ent->children is filled with the result from
4755: * the parsing.
4756: */
4757: if (ent->children == NULL) {
4758: xmlChar *value;
4759: value = ent->content;
4760:
4761: /*
4762: * Check that this entity is well formed
4763: */
4764: if ((value != NULL) &&
4765: (value[1] == 0) && (value[0] == '<') &&
1.236 veillard 4766: (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
1.180 daniel 4767: /*
1.222 veillard 4768: * DONE: get definite answer on this !!!
1.180 daniel 4769: * Lots of entity decls are used to declare a single
4770: * char
4771: * <!ENTITY lt "<">
4772: * Which seems to be valid since
4773: * 2.4: The ampersand character (&) and the left angle
4774: * bracket (<) may appear in their literal form only
4775: * when used ... They are also legal within the literal
4776: * entity value of an internal entity declaration;i
4777: * see "4.3.2 Well-Formed Parsed Entities".
4778: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4779: * Looking at the OASIS test suite and James Clark
4780: * tests, this is broken. However the XML REC uses
4781: * it. Is the XML REC not well-formed ????
4782: * This is a hack to avoid this problem
1.222 veillard 4783: *
4784: * ANSWER: since lt gt amp .. are already defined,
4785: * this is a redefinition and hence the fact that the
4786: * contentis not well balanced is not a Wf error, this
4787: * is lousy but acceptable.
1.180 daniel 4788: */
4789: list = xmlNewDocText(ctxt->myDoc, value);
4790: if (list != NULL) {
4791: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4792: (ent->children == NULL)) {
4793: ent->children = list;
4794: ent->last = list;
4795: list->parent = (xmlNodePtr) ent;
4796: } else {
4797: xmlFreeNodeList(list);
4798: }
4799: } else if (list != NULL) {
4800: xmlFreeNodeList(list);
4801: }
1.181 daniel 4802: } else {
1.180 daniel 4803: /*
4804: * 4.3.2: An internal general parsed entity is well-formed
4805: * if its replacement text matches the production labeled
4806: * content.
4807: */
1.185 daniel 4808: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4809: ctxt->depth++;
1.180 daniel 4810: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 4811: ctxt->sax, NULL, ctxt->depth,
4812: value, &list);
4813: ctxt->depth--;
4814: } else if (ent->etype ==
4815: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4816: ctxt->depth++;
1.180 daniel 4817: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 4818: ctxt->sax, NULL, ctxt->depth,
1.228 veillard 4819: ent->URI, ent->ExternalID, &list);
1.185 daniel 4820: ctxt->depth--;
4821: } else {
1.180 daniel 4822: ret = -1;
4823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4824: ctxt->sax->error(ctxt->userData,
4825: "Internal: invalid entity type\n");
4826: }
1.185 daniel 4827: if (ret == XML_ERR_ENTITY_LOOP) {
1.230 veillard 4828: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 4829: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4830: ctxt->sax->error(ctxt->userData,
4831: "Detected entity reference loop\n");
4832: ctxt->wellFormed = 0;
4833: ctxt->disableSAX = 1;
4834: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 4835: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4836: (ent->children == NULL)) {
4837: ent->children = list;
4838: while (list != NULL) {
4839: list->parent = (xmlNodePtr) ent;
4840: if (list->next == NULL)
4841: ent->last = list;
4842: list = list->next;
4843: }
4844: } else {
4845: xmlFreeNodeList(list);
4846: }
4847: } else if (ret > 0) {
1.230 veillard 4848: ctxt->errNo = ret;
1.180 daniel 4849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4850: ctxt->sax->error(ctxt->userData,
4851: "Entity value required\n");
4852: ctxt->wellFormed = 0;
4853: ctxt->disableSAX = 1;
4854: } else if (list != NULL) {
4855: xmlFreeNodeList(list);
4856: }
4857: }
4858: }
1.113 daniel 4859: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 4860: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 4861: /*
4862: * Create a node.
4863: */
4864: ctxt->sax->reference(ctxt->userData, ent->name);
4865: return;
4866: } else if (ctxt->replaceEntities) {
1.222 veillard 4867: if ((ctxt->node != NULL) && (ent->children != NULL)) {
4868: /*
4869: * Seems we are generating the DOM content, do
4870: * a simple tree copy
4871: */
4872: xmlNodePtr new;
4873: new = xmlCopyNodeList(ent->children);
4874:
4875: xmlAddChildList(ctxt->node, new);
4876: /*
4877: * This is to avoid a nasty side effect, see
4878: * characters() in SAX.c
4879: */
4880: ctxt->nodemem = 0;
4881: ctxt->nodelen = 0;
4882: return;
4883: } else {
4884: /*
4885: * Probably running in SAX mode
4886: */
4887: xmlParserInputPtr input;
1.79 daniel 4888:
1.222 veillard 4889: input = xmlNewEntityInputStream(ctxt, ent);
4890: xmlPushInput(ctxt, input);
4891: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4892: (RAW == '<') && (NXT(1) == '?') &&
4893: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4894: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4895: xmlParseTextDecl(ctxt);
4896: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4897: /*
4898: * The XML REC instructs us to stop parsing right here
4899: */
4900: ctxt->instate = XML_PARSER_EOF;
4901: return;
4902: }
4903: if (input->standalone == 1) {
1.230 veillard 4904: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
1.222 veillard 4905: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4906: ctxt->sax->error(ctxt->userData,
4907: "external parsed entities cannot be standalone\n");
4908: ctxt->wellFormed = 0;
4909: ctxt->disableSAX = 1;
4910: }
1.167 daniel 4911: }
1.222 veillard 4912: return;
1.167 daniel 4913: }
1.113 daniel 4914: }
1.222 veillard 4915: } else {
4916: val = ent->content;
4917: if (val == NULL) return;
4918: /*
4919: * inline the entity.
4920: */
4921: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4922: (!ctxt->disableSAX))
4923: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
1.77 daniel 4924: }
4925: }
1.24 daniel 4926: }
4927:
1.50 daniel 4928: /**
4929: * xmlParseEntityRef:
4930: * @ctxt: an XML parser context
4931: *
4932: * parse ENTITY references declarations
1.24 daniel 4933: *
4934: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4935: *
1.98 daniel 4936: * [ WFC: Entity Declared ]
4937: * In a document without any DTD, a document with only an internal DTD
4938: * subset which contains no parameter entity references, or a document
4939: * with "standalone='yes'", the Name given in the entity reference
4940: * must match that in an entity declaration, except that well-formed
4941: * documents need not declare any of the following entities: amp, lt,
4942: * gt, apos, quot. The declaration of a parameter entity must precede
4943: * any reference to it. Similarly, the declaration of a general entity
4944: * must precede any reference to it which appears in a default value in an
4945: * attribute-list declaration. Note that if entities are declared in the
4946: * external subset or in external parameter entities, a non-validating
4947: * processor is not obligated to read and process their declarations;
4948: * for such documents, the rule that an entity must be declared is a
4949: * well-formedness constraint only if standalone='yes'.
4950: *
4951: * [ WFC: Parsed Entity ]
4952: * An entity reference must not contain the name of an unparsed entity
4953: *
1.77 daniel 4954: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4955: */
1.77 daniel 4956: xmlEntityPtr
1.55 daniel 4957: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 4958: xmlChar *name;
1.72 daniel 4959: xmlEntityPtr ent = NULL;
1.24 daniel 4960:
1.91 daniel 4961: GROW;
1.111 daniel 4962:
1.152 daniel 4963: if (RAW == '&') {
1.40 daniel 4964: NEXT;
1.24 daniel 4965: name = xmlParseName(ctxt);
4966: if (name == NULL) {
1.230 veillard 4967: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4968: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4969: ctxt->sax->error(ctxt->userData,
4970: "xmlParseEntityRef: no name\n");
1.59 daniel 4971: ctxt->wellFormed = 0;
1.180 daniel 4972: ctxt->disableSAX = 1;
1.24 daniel 4973: } else {
1.152 daniel 4974: if (RAW == ';') {
1.40 daniel 4975: NEXT;
1.24 daniel 4976: /*
1.77 daniel 4977: * Ask first SAX for entity resolution, otherwise try the
4978: * predefined set.
4979: */
4980: if (ctxt->sax != NULL) {
4981: if (ctxt->sax->getEntity != NULL)
4982: ent = ctxt->sax->getEntity(ctxt->userData, name);
4983: if (ent == NULL)
4984: ent = xmlGetPredefinedEntity(name);
4985: }
4986: /*
1.98 daniel 4987: * [ WFC: Entity Declared ]
4988: * In a document without any DTD, a document with only an
4989: * internal DTD subset which contains no parameter entity
4990: * references, or a document with "standalone='yes'", the
4991: * Name given in the entity reference must match that in an
4992: * entity declaration, except that well-formed documents
4993: * need not declare any of the following entities: amp, lt,
4994: * gt, apos, quot.
4995: * The declaration of a parameter entity must precede any
4996: * reference to it.
4997: * Similarly, the declaration of a general entity must
4998: * precede any reference to it which appears in a default
4999: * value in an attribute-list declaration. Note that if
5000: * entities are declared in the external subset or in
5001: * external parameter entities, a non-validating processor
5002: * is not obligated to read and process their declarations;
5003: * for such documents, the rule that an entity must be
5004: * declared is a well-formedness constraint only if
5005: * standalone='yes'.
1.59 daniel 5006: */
1.77 daniel 5007: if (ent == NULL) {
1.98 daniel 5008: if ((ctxt->standalone == 1) ||
5009: ((ctxt->hasExternalSubset == 0) &&
5010: (ctxt->hasPErefs == 0))) {
1.230 veillard 5011: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 5012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5013: ctxt->sax->error(ctxt->userData,
5014: "Entity '%s' not defined\n", name);
5015: ctxt->wellFormed = 0;
1.180 daniel 5016: ctxt->disableSAX = 1;
1.77 daniel 5017: } else {
1.230 veillard 5018: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.98 daniel 5019: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5020: ctxt->sax->warning(ctxt->userData,
5021: "Entity '%s' not defined\n", name);
1.59 daniel 5022: }
1.77 daniel 5023: }
1.59 daniel 5024:
5025: /*
1.98 daniel 5026: * [ WFC: Parsed Entity ]
5027: * An entity reference must not contain the name of an
5028: * unparsed entity
5029: */
1.159 daniel 5030: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5031: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5032: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5033: ctxt->sax->error(ctxt->userData,
5034: "Entity reference to unparsed entity %s\n", name);
5035: ctxt->wellFormed = 0;
1.180 daniel 5036: ctxt->disableSAX = 1;
1.98 daniel 5037: }
5038:
5039: /*
5040: * [ WFC: No External Entity References ]
5041: * Attribute values cannot contain direct or indirect
5042: * entity references to external entities.
5043: */
5044: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5045: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5046: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5047: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5048: ctxt->sax->error(ctxt->userData,
5049: "Attribute references external entity '%s'\n", name);
5050: ctxt->wellFormed = 0;
1.180 daniel 5051: ctxt->disableSAX = 1;
1.98 daniel 5052: }
5053: /*
5054: * [ WFC: No < in Attribute Values ]
5055: * The replacement text of any entity referred to directly or
5056: * indirectly in an attribute value (other than "<") must
5057: * not contain a <.
1.59 daniel 5058: */
1.98 daniel 5059: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5060: (ent != NULL) &&
1.236 veillard 5061: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5062: (ent->content != NULL) &&
5063: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5064: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5066: ctxt->sax->error(ctxt->userData,
5067: "'<' in entity '%s' is not allowed in attributes values\n", name);
5068: ctxt->wellFormed = 0;
1.180 daniel 5069: ctxt->disableSAX = 1;
1.98 daniel 5070: }
5071:
5072: /*
5073: * Internal check, no parameter entities here ...
5074: */
5075: else {
1.159 daniel 5076: switch (ent->etype) {
1.59 daniel 5077: case XML_INTERNAL_PARAMETER_ENTITY:
5078: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5079: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5081: ctxt->sax->error(ctxt->userData,
1.59 daniel 5082: "Attempt to reference the parameter entity '%s'\n", name);
5083: ctxt->wellFormed = 0;
1.180 daniel 5084: ctxt->disableSAX = 1;
5085: break;
5086: default:
1.59 daniel 5087: break;
5088: }
5089: }
5090:
5091: /*
1.98 daniel 5092: * [ WFC: No Recursion ]
1.229 veillard 5093: * A parsed entity must not contain a recursive reference
1.117 daniel 5094: * to itself, either directly or indirectly.
1.229 veillard 5095: * Done somewhere else
1.59 daniel 5096: */
1.77 daniel 5097:
1.24 daniel 5098: } else {
1.230 veillard 5099: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5100: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5101: ctxt->sax->error(ctxt->userData,
1.59 daniel 5102: "xmlParseEntityRef: expecting ';'\n");
5103: ctxt->wellFormed = 0;
1.180 daniel 5104: ctxt->disableSAX = 1;
1.24 daniel 5105: }
1.119 daniel 5106: xmlFree(name);
1.24 daniel 5107: }
5108: }
1.77 daniel 5109: return(ent);
1.24 daniel 5110: }
1.229 veillard 5111:
1.135 daniel 5112: /**
5113: * xmlParseStringEntityRef:
5114: * @ctxt: an XML parser context
5115: * @str: a pointer to an index in the string
5116: *
5117: * parse ENTITY references declarations, but this version parses it from
5118: * a string value.
5119: *
5120: * [68] EntityRef ::= '&' Name ';'
5121: *
5122: * [ WFC: Entity Declared ]
5123: * In a document without any DTD, a document with only an internal DTD
5124: * subset which contains no parameter entity references, or a document
5125: * with "standalone='yes'", the Name given in the entity reference
5126: * must match that in an entity declaration, except that well-formed
5127: * documents need not declare any of the following entities: amp, lt,
5128: * gt, apos, quot. The declaration of a parameter entity must precede
5129: * any reference to it. Similarly, the declaration of a general entity
5130: * must precede any reference to it which appears in a default value in an
5131: * attribute-list declaration. Note that if entities are declared in the
5132: * external subset or in external parameter entities, a non-validating
5133: * processor is not obligated to read and process their declarations;
5134: * for such documents, the rule that an entity must be declared is a
5135: * well-formedness constraint only if standalone='yes'.
5136: *
5137: * [ WFC: Parsed Entity ]
5138: * An entity reference must not contain the name of an unparsed entity
5139: *
5140: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5141: * is updated to the current location in the string.
5142: */
5143: xmlEntityPtr
5144: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5145: xmlChar *name;
5146: const xmlChar *ptr;
5147: xmlChar cur;
5148: xmlEntityPtr ent = NULL;
5149:
1.156 daniel 5150: if ((str == NULL) || (*str == NULL))
5151: return(NULL);
1.135 daniel 5152: ptr = *str;
5153: cur = *ptr;
5154: if (cur == '&') {
5155: ptr++;
5156: cur = *ptr;
5157: name = xmlParseStringName(ctxt, &ptr);
5158: if (name == NULL) {
1.230 veillard 5159: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5161: ctxt->sax->error(ctxt->userData,
5162: "xmlParseEntityRef: no name\n");
5163: ctxt->wellFormed = 0;
1.180 daniel 5164: ctxt->disableSAX = 1;
1.135 daniel 5165: } else {
1.185 daniel 5166: if (*ptr == ';') {
5167: ptr++;
1.135 daniel 5168: /*
5169: * Ask first SAX for entity resolution, otherwise try the
5170: * predefined set.
5171: */
5172: if (ctxt->sax != NULL) {
5173: if (ctxt->sax->getEntity != NULL)
5174: ent = ctxt->sax->getEntity(ctxt->userData, name);
5175: if (ent == NULL)
5176: ent = xmlGetPredefinedEntity(name);
5177: }
5178: /*
5179: * [ WFC: Entity Declared ]
5180: * In a document without any DTD, a document with only an
5181: * internal DTD subset which contains no parameter entity
5182: * references, or a document with "standalone='yes'", the
5183: * Name given in the entity reference must match that in an
5184: * entity declaration, except that well-formed documents
5185: * need not declare any of the following entities: amp, lt,
5186: * gt, apos, quot.
5187: * The declaration of a parameter entity must precede any
5188: * reference to it.
5189: * Similarly, the declaration of a general entity must
5190: * precede any reference to it which appears in a default
5191: * value in an attribute-list declaration. Note that if
5192: * entities are declared in the external subset or in
5193: * external parameter entities, a non-validating processor
5194: * is not obligated to read and process their declarations;
5195: * for such documents, the rule that an entity must be
5196: * declared is a well-formedness constraint only if
5197: * standalone='yes'.
5198: */
5199: if (ent == NULL) {
5200: if ((ctxt->standalone == 1) ||
5201: ((ctxt->hasExternalSubset == 0) &&
5202: (ctxt->hasPErefs == 0))) {
1.230 veillard 5203: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5205: ctxt->sax->error(ctxt->userData,
5206: "Entity '%s' not defined\n", name);
5207: ctxt->wellFormed = 0;
1.180 daniel 5208: ctxt->disableSAX = 1;
1.135 daniel 5209: } else {
1.230 veillard 5210: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.135 daniel 5211: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5212: ctxt->sax->warning(ctxt->userData,
5213: "Entity '%s' not defined\n", name);
5214: }
5215: }
5216:
5217: /*
5218: * [ WFC: Parsed Entity ]
5219: * An entity reference must not contain the name of an
5220: * unparsed entity
5221: */
1.159 daniel 5222: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5223: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.135 daniel 5224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5225: ctxt->sax->error(ctxt->userData,
5226: "Entity reference to unparsed entity %s\n", name);
5227: ctxt->wellFormed = 0;
1.180 daniel 5228: ctxt->disableSAX = 1;
1.135 daniel 5229: }
5230:
5231: /*
5232: * [ WFC: No External Entity References ]
5233: * Attribute values cannot contain direct or indirect
5234: * entity references to external entities.
5235: */
5236: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5237: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5238: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.135 daniel 5239: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5240: ctxt->sax->error(ctxt->userData,
5241: "Attribute references external entity '%s'\n", name);
5242: ctxt->wellFormed = 0;
1.180 daniel 5243: ctxt->disableSAX = 1;
1.135 daniel 5244: }
5245: /*
5246: * [ WFC: No < in Attribute Values ]
5247: * The replacement text of any entity referred to directly or
5248: * indirectly in an attribute value (other than "<") must
5249: * not contain a <.
5250: */
5251: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5252: (ent != NULL) &&
1.236 veillard 5253: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.135 daniel 5254: (ent->content != NULL) &&
5255: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5256: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.135 daniel 5257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5258: ctxt->sax->error(ctxt->userData,
5259: "'<' in entity '%s' is not allowed in attributes values\n", name);
5260: ctxt->wellFormed = 0;
1.180 daniel 5261: ctxt->disableSAX = 1;
1.135 daniel 5262: }
5263:
5264: /*
5265: * Internal check, no parameter entities here ...
5266: */
5267: else {
1.159 daniel 5268: switch (ent->etype) {
1.135 daniel 5269: case XML_INTERNAL_PARAMETER_ENTITY:
5270: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5271: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.135 daniel 5272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5273: ctxt->sax->error(ctxt->userData,
5274: "Attempt to reference the parameter entity '%s'\n", name);
5275: ctxt->wellFormed = 0;
1.180 daniel 5276: ctxt->disableSAX = 1;
5277: break;
5278: default:
1.135 daniel 5279: break;
5280: }
5281: }
5282:
5283: /*
5284: * [ WFC: No Recursion ]
1.229 veillard 5285: * A parsed entity must not contain a recursive reference
1.135 daniel 5286: * to itself, either directly or indirectly.
1.229 veillard 5287: * Done somewhwere else
1.135 daniel 5288: */
5289:
5290: } else {
1.230 veillard 5291: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5292: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5293: ctxt->sax->error(ctxt->userData,
5294: "xmlParseEntityRef: expecting ';'\n");
5295: ctxt->wellFormed = 0;
1.180 daniel 5296: ctxt->disableSAX = 1;
1.135 daniel 5297: }
5298: xmlFree(name);
5299: }
5300: }
1.185 daniel 5301: *str = ptr;
1.135 daniel 5302: return(ent);
5303: }
1.24 daniel 5304:
1.50 daniel 5305: /**
5306: * xmlParsePEReference:
5307: * @ctxt: an XML parser context
5308: *
5309: * parse PEReference declarations
1.77 daniel 5310: * The entity content is handled directly by pushing it's content as
5311: * a new input stream.
1.22 daniel 5312: *
5313: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5314: *
1.98 daniel 5315: * [ WFC: No Recursion ]
1.229 veillard 5316: * A parsed entity must not contain a recursive
1.98 daniel 5317: * reference to itself, either directly or indirectly.
5318: *
5319: * [ WFC: Entity Declared ]
5320: * In a document without any DTD, a document with only an internal DTD
5321: * subset which contains no parameter entity references, or a document
5322: * with "standalone='yes'", ... ... The declaration of a parameter
5323: * entity must precede any reference to it...
5324: *
5325: * [ VC: Entity Declared ]
5326: * In a document with an external subset or external parameter entities
5327: * with "standalone='no'", ... ... The declaration of a parameter entity
5328: * must precede any reference to it...
5329: *
5330: * [ WFC: In DTD ]
5331: * Parameter-entity references may only appear in the DTD.
5332: * NOTE: misleading but this is handled.
1.22 daniel 5333: */
1.77 daniel 5334: void
1.55 daniel 5335: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5336: xmlChar *name;
1.72 daniel 5337: xmlEntityPtr entity = NULL;
1.50 daniel 5338: xmlParserInputPtr input;
1.22 daniel 5339:
1.152 daniel 5340: if (RAW == '%') {
1.40 daniel 5341: NEXT;
1.22 daniel 5342: name = xmlParseName(ctxt);
5343: if (name == NULL) {
1.230 veillard 5344: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5345: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5346: ctxt->sax->error(ctxt->userData,
5347: "xmlParsePEReference: no name\n");
1.59 daniel 5348: ctxt->wellFormed = 0;
1.180 daniel 5349: ctxt->disableSAX = 1;
1.22 daniel 5350: } else {
1.152 daniel 5351: if (RAW == ';') {
1.40 daniel 5352: NEXT;
1.98 daniel 5353: if ((ctxt->sax != NULL) &&
5354: (ctxt->sax->getParameterEntity != NULL))
5355: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5356: name);
1.45 daniel 5357: if (entity == NULL) {
1.98 daniel 5358: /*
5359: * [ WFC: Entity Declared ]
5360: * In a document without any DTD, a document with only an
5361: * internal DTD subset which contains no parameter entity
5362: * references, or a document with "standalone='yes'", ...
5363: * ... The declaration of a parameter entity must precede
5364: * any reference to it...
5365: */
5366: if ((ctxt->standalone == 1) ||
5367: ((ctxt->hasExternalSubset == 0) &&
5368: (ctxt->hasPErefs == 0))) {
1.230 veillard 5369: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.220 veillard 5370: if ((!ctxt->disableSAX) &&
5371: (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5372: ctxt->sax->error(ctxt->userData,
5373: "PEReference: %%%s; not found\n", name);
5374: ctxt->wellFormed = 0;
1.180 daniel 5375: ctxt->disableSAX = 1;
1.98 daniel 5376: } else {
5377: /*
5378: * [ VC: Entity Declared ]
5379: * In a document with an external subset or external
5380: * parameter entities with "standalone='no'", ...
5381: * ... The declaration of a parameter entity must precede
5382: * any reference to it...
5383: */
1.220 veillard 5384: if ((!ctxt->disableSAX) &&
5385: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 5386: ctxt->sax->warning(ctxt->userData,
5387: "PEReference: %%%s; not found\n", name);
5388: ctxt->valid = 0;
5389: }
1.50 daniel 5390: } else {
1.98 daniel 5391: /*
5392: * Internal checking in case the entity quest barfed
5393: */
1.159 daniel 5394: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5395: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5396: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5397: ctxt->sax->warning(ctxt->userData,
5398: "Internal: %%%s; is not a parameter entity\n", name);
5399: } else {
1.164 daniel 5400: /*
5401: * TODO !!!
5402: * handle the extra spaces added before and after
5403: * c.f. http://www.w3.org/TR/REC-xml#as-PE
5404: */
1.98 daniel 5405: input = xmlNewEntityInputStream(ctxt, entity);
5406: xmlPushInput(ctxt, input);
1.164 daniel 5407: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5408: (RAW == '<') && (NXT(1) == '?') &&
5409: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5410: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5411: xmlParseTextDecl(ctxt);
1.193 daniel 5412: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5413: /*
5414: * The XML REC instructs us to stop parsing
5415: * right here
5416: */
5417: ctxt->instate = XML_PARSER_EOF;
5418: xmlFree(name);
5419: return;
5420: }
1.164 daniel 5421: }
5422: if (ctxt->token == 0)
5423: ctxt->token = ' ';
1.98 daniel 5424: }
1.45 daniel 5425: }
1.98 daniel 5426: ctxt->hasPErefs = 1;
1.22 daniel 5427: } else {
1.230 veillard 5428: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5429: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5430: ctxt->sax->error(ctxt->userData,
1.59 daniel 5431: "xmlParsePEReference: expecting ';'\n");
5432: ctxt->wellFormed = 0;
1.180 daniel 5433: ctxt->disableSAX = 1;
1.22 daniel 5434: }
1.119 daniel 5435: xmlFree(name);
1.3 veillard 5436: }
5437: }
5438: }
5439:
1.50 daniel 5440: /**
1.135 daniel 5441: * xmlParseStringPEReference:
5442: * @ctxt: an XML parser context
5443: * @str: a pointer to an index in the string
5444: *
5445: * parse PEReference declarations
5446: *
5447: * [69] PEReference ::= '%' Name ';'
5448: *
5449: * [ WFC: No Recursion ]
1.229 veillard 5450: * A parsed entity must not contain a recursive
1.135 daniel 5451: * reference to itself, either directly or indirectly.
5452: *
5453: * [ WFC: Entity Declared ]
5454: * In a document without any DTD, a document with only an internal DTD
5455: * subset which contains no parameter entity references, or a document
5456: * with "standalone='yes'", ... ... The declaration of a parameter
5457: * entity must precede any reference to it...
5458: *
5459: * [ VC: Entity Declared ]
5460: * In a document with an external subset or external parameter entities
5461: * with "standalone='no'", ... ... The declaration of a parameter entity
5462: * must precede any reference to it...
5463: *
5464: * [ WFC: In DTD ]
5465: * Parameter-entity references may only appear in the DTD.
5466: * NOTE: misleading but this is handled.
5467: *
5468: * Returns the string of the entity content.
5469: * str is updated to the current value of the index
5470: */
5471: xmlEntityPtr
5472: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5473: const xmlChar *ptr;
5474: xmlChar cur;
5475: xmlChar *name;
5476: xmlEntityPtr entity = NULL;
5477:
5478: if ((str == NULL) || (*str == NULL)) return(NULL);
5479: ptr = *str;
5480: cur = *ptr;
5481: if (cur == '%') {
5482: ptr++;
5483: cur = *ptr;
5484: name = xmlParseStringName(ctxt, &ptr);
5485: if (name == NULL) {
1.230 veillard 5486: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5488: ctxt->sax->error(ctxt->userData,
5489: "xmlParseStringPEReference: no name\n");
5490: ctxt->wellFormed = 0;
1.180 daniel 5491: ctxt->disableSAX = 1;
1.135 daniel 5492: } else {
5493: cur = *ptr;
5494: if (cur == ';') {
5495: ptr++;
5496: cur = *ptr;
5497: if ((ctxt->sax != NULL) &&
5498: (ctxt->sax->getParameterEntity != NULL))
5499: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5500: name);
5501: if (entity == NULL) {
5502: /*
5503: * [ WFC: Entity Declared ]
5504: * In a document without any DTD, a document with only an
5505: * internal DTD subset which contains no parameter entity
5506: * references, or a document with "standalone='yes'", ...
5507: * ... The declaration of a parameter entity must precede
5508: * any reference to it...
5509: */
5510: if ((ctxt->standalone == 1) ||
5511: ((ctxt->hasExternalSubset == 0) &&
5512: (ctxt->hasPErefs == 0))) {
1.230 veillard 5513: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5515: ctxt->sax->error(ctxt->userData,
5516: "PEReference: %%%s; not found\n", name);
5517: ctxt->wellFormed = 0;
1.180 daniel 5518: ctxt->disableSAX = 1;
1.135 daniel 5519: } else {
5520: /*
5521: * [ VC: Entity Declared ]
5522: * In a document with an external subset or external
5523: * parameter entities with "standalone='no'", ...
5524: * ... The declaration of a parameter entity must
5525: * precede any reference to it...
5526: */
5527: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5528: ctxt->sax->warning(ctxt->userData,
5529: "PEReference: %%%s; not found\n", name);
5530: ctxt->valid = 0;
5531: }
5532: } else {
5533: /*
5534: * Internal checking in case the entity quest barfed
5535: */
1.159 daniel 5536: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5537: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 5538: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5539: ctxt->sax->warning(ctxt->userData,
5540: "Internal: %%%s; is not a parameter entity\n", name);
5541: }
5542: }
5543: ctxt->hasPErefs = 1;
5544: } else {
1.230 veillard 5545: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5546: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5547: ctxt->sax->error(ctxt->userData,
5548: "xmlParseStringPEReference: expecting ';'\n");
5549: ctxt->wellFormed = 0;
1.180 daniel 5550: ctxt->disableSAX = 1;
1.135 daniel 5551: }
5552: xmlFree(name);
5553: }
5554: }
5555: *str = ptr;
5556: return(entity);
5557: }
5558:
5559: /**
1.181 daniel 5560: * xmlParseDocTypeDecl:
1.50 daniel 5561: * @ctxt: an XML parser context
5562: *
5563: * parse a DOCTYPE declaration
1.21 daniel 5564: *
1.22 daniel 5565: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5566: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 5567: *
5568: * [ VC: Root Element Type ]
1.99 daniel 5569: * The Name in the document type declaration must match the element
1.98 daniel 5570: * type of the root element.
1.21 daniel 5571: */
5572:
1.55 daniel 5573: void
5574: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 5575: xmlChar *name = NULL;
1.123 daniel 5576: xmlChar *ExternalID = NULL;
5577: xmlChar *URI = NULL;
1.21 daniel 5578:
5579: /*
5580: * We know that '<!DOCTYPE' has been detected.
5581: */
1.40 daniel 5582: SKIP(9);
1.21 daniel 5583:
1.42 daniel 5584: SKIP_BLANKS;
1.21 daniel 5585:
5586: /*
5587: * Parse the DOCTYPE name.
5588: */
5589: name = xmlParseName(ctxt);
5590: if (name == NULL) {
1.230 veillard 5591: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5593: ctxt->sax->error(ctxt->userData,
5594: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 5595: ctxt->wellFormed = 0;
1.180 daniel 5596: ctxt->disableSAX = 1;
1.21 daniel 5597: }
1.165 daniel 5598: ctxt->intSubName = name;
1.21 daniel 5599:
1.42 daniel 5600: SKIP_BLANKS;
1.21 daniel 5601:
5602: /*
1.22 daniel 5603: * Check for SystemID and ExternalID
5604: */
1.67 daniel 5605: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 5606:
5607: if ((URI != NULL) || (ExternalID != NULL)) {
5608: ctxt->hasExternalSubset = 1;
5609: }
1.165 daniel 5610: ctxt->extSubURI = URI;
5611: ctxt->extSubSystem = ExternalID;
1.98 daniel 5612:
1.42 daniel 5613: SKIP_BLANKS;
1.36 daniel 5614:
1.76 daniel 5615: /*
1.165 daniel 5616: * Create and update the internal subset.
1.76 daniel 5617: */
1.171 daniel 5618: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5619: (!ctxt->disableSAX))
1.74 daniel 5620: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 5621:
5622: /*
1.140 daniel 5623: * Is there any internal subset declarations ?
5624: * they are handled separately in xmlParseInternalSubset()
5625: */
1.152 daniel 5626: if (RAW == '[')
1.140 daniel 5627: return;
5628:
5629: /*
5630: * We should be at the end of the DOCTYPE declaration.
5631: */
1.152 daniel 5632: if (RAW != '>') {
1.230 veillard 5633: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.140 daniel 5634: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5635: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5636: ctxt->wellFormed = 0;
1.180 daniel 5637: ctxt->disableSAX = 1;
1.140 daniel 5638: }
5639: NEXT;
5640: }
5641:
5642: /**
1.181 daniel 5643: * xmlParseInternalsubset:
1.140 daniel 5644: * @ctxt: an XML parser context
5645: *
5646: * parse the internal subset declaration
5647: *
5648: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5649: */
5650:
5651: void
5652: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5653: /*
1.22 daniel 5654: * Is there any DTD definition ?
5655: */
1.152 daniel 5656: if (RAW == '[') {
1.96 daniel 5657: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 5658: NEXT;
1.22 daniel 5659: /*
5660: * Parse the succession of Markup declarations and
5661: * PEReferences.
5662: * Subsequence (markupdecl | PEReference | S)*
5663: */
1.152 daniel 5664: while (RAW != ']') {
1.123 daniel 5665: const xmlChar *check = CUR_PTR;
1.115 daniel 5666: int cons = ctxt->input->consumed;
1.22 daniel 5667:
1.42 daniel 5668: SKIP_BLANKS;
1.22 daniel 5669: xmlParseMarkupDecl(ctxt);
1.50 daniel 5670: xmlParsePEReference(ctxt);
1.22 daniel 5671:
1.115 daniel 5672: /*
5673: * Pop-up of finished entities.
5674: */
1.152 daniel 5675: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 5676: xmlPopInput(ctxt);
5677:
1.118 daniel 5678: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.230 veillard 5679: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5681: ctxt->sax->error(ctxt->userData,
1.140 daniel 5682: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 5683: ctxt->wellFormed = 0;
1.180 daniel 5684: ctxt->disableSAX = 1;
1.22 daniel 5685: break;
5686: }
5687: }
1.209 veillard 5688: if (RAW == ']') {
5689: NEXT;
5690: SKIP_BLANKS;
5691: }
1.22 daniel 5692: }
5693:
5694: /*
5695: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 5696: */
1.152 daniel 5697: if (RAW != '>') {
1.230 veillard 5698: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.55 daniel 5699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5700: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 5701: ctxt->wellFormed = 0;
1.180 daniel 5702: ctxt->disableSAX = 1;
1.21 daniel 5703: }
1.40 daniel 5704: NEXT;
1.21 daniel 5705: }
5706:
1.50 daniel 5707: /**
5708: * xmlParseAttribute:
5709: * @ctxt: an XML parser context
1.123 daniel 5710: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 5711: *
5712: * parse an attribute
1.3 veillard 5713: *
1.22 daniel 5714: * [41] Attribute ::= Name Eq AttValue
5715: *
1.98 daniel 5716: * [ WFC: No External Entity References ]
5717: * Attribute values cannot contain direct or indirect entity references
5718: * to external entities.
5719: *
5720: * [ WFC: No < in Attribute Values ]
5721: * The replacement text of any entity referred to directly or indirectly in
5722: * an attribute value (other than "<") must not contain a <.
5723: *
5724: * [ VC: Attribute Value Type ]
1.117 daniel 5725: * The attribute must have been declared; the value must be of the type
1.99 daniel 5726: * declared for it.
1.98 daniel 5727: *
1.22 daniel 5728: * [25] Eq ::= S? '=' S?
5729: *
1.29 daniel 5730: * With namespace:
5731: *
5732: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 5733: *
5734: * Also the case QName == xmlns:??? is handled independently as a namespace
5735: * definition.
1.69 daniel 5736: *
1.72 daniel 5737: * Returns the attribute name, and the value in *value.
1.3 veillard 5738: */
5739:
1.123 daniel 5740: xmlChar *
5741: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5742: xmlChar *name, *val;
1.3 veillard 5743:
1.72 daniel 5744: *value = NULL;
5745: name = xmlParseName(ctxt);
1.22 daniel 5746: if (name == NULL) {
1.230 veillard 5747: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5749: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 5750: ctxt->wellFormed = 0;
1.180 daniel 5751: ctxt->disableSAX = 1;
1.52 daniel 5752: return(NULL);
1.3 veillard 5753: }
5754:
5755: /*
1.29 daniel 5756: * read the value
1.3 veillard 5757: */
1.42 daniel 5758: SKIP_BLANKS;
1.152 daniel 5759: if (RAW == '=') {
1.40 daniel 5760: NEXT;
1.42 daniel 5761: SKIP_BLANKS;
1.72 daniel 5762: val = xmlParseAttValue(ctxt);
1.96 daniel 5763: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 5764: } else {
1.230 veillard 5765: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.55 daniel 5766: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5767: ctxt->sax->error(ctxt->userData,
1.59 daniel 5768: "Specification mandate value for attribute %s\n", name);
5769: ctxt->wellFormed = 0;
1.180 daniel 5770: ctxt->disableSAX = 1;
1.170 daniel 5771: xmlFree(name);
1.52 daniel 5772: return(NULL);
1.43 daniel 5773: }
5774:
1.172 daniel 5775: /*
5776: * Check that xml:lang conforms to the specification
1.222 veillard 5777: * No more registered as an error, just generate a warning now
5778: * since this was deprecated in XML second edition
1.172 daniel 5779: */
1.236 veillard 5780: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
1.172 daniel 5781: if (!xmlCheckLanguageID(val)) {
1.222 veillard 5782: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5783: ctxt->sax->warning(ctxt->userData,
5784: "Malformed value for xml:lang : %s\n", val);
1.172 daniel 5785: }
5786: }
5787:
1.176 daniel 5788: /*
5789: * Check that xml:space conforms to the specification
5790: */
1.236 veillard 5791: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5792: if (xmlStrEqual(val, BAD_CAST "default"))
1.176 daniel 5793: *(ctxt->space) = 0;
1.236 veillard 5794: else if (xmlStrEqual(val, BAD_CAST "preserve"))
1.176 daniel 5795: *(ctxt->space) = 1;
5796: else {
1.230 veillard 5797: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.176 daniel 5798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5799: ctxt->sax->error(ctxt->userData,
5800: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5801: val);
5802: ctxt->wellFormed = 0;
1.180 daniel 5803: ctxt->disableSAX = 1;
1.176 daniel 5804: }
5805: }
5806:
1.72 daniel 5807: *value = val;
5808: return(name);
1.3 veillard 5809: }
5810:
1.50 daniel 5811: /**
5812: * xmlParseStartTag:
5813: * @ctxt: an XML parser context
5814: *
5815: * parse a start of tag either for rule element or
5816: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 5817: *
5818: * [40] STag ::= '<' Name (S Attribute)* S? '>'
5819: *
1.98 daniel 5820: * [ WFC: Unique Att Spec ]
5821: * No attribute name may appear more than once in the same start-tag or
5822: * empty-element tag.
5823: *
1.29 daniel 5824: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5825: *
1.98 daniel 5826: * [ WFC: Unique Att Spec ]
5827: * No attribute name may appear more than once in the same start-tag or
5828: * empty-element tag.
5829: *
1.29 daniel 5830: * With namespace:
5831: *
5832: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5833: *
5834: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 5835: *
1.192 daniel 5836: * Returns the element name parsed
1.2 veillard 5837: */
5838:
1.123 daniel 5839: xmlChar *
1.69 daniel 5840: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5841: xmlChar *name;
5842: xmlChar *attname;
5843: xmlChar *attvalue;
5844: const xmlChar **atts = NULL;
1.72 daniel 5845: int nbatts = 0;
5846: int maxatts = 0;
5847: int i;
1.2 veillard 5848:
1.152 daniel 5849: if (RAW != '<') return(NULL);
1.40 daniel 5850: NEXT;
1.3 veillard 5851:
1.72 daniel 5852: name = xmlParseName(ctxt);
1.59 daniel 5853: if (name == NULL) {
1.230 veillard 5854: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5855: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5856: ctxt->sax->error(ctxt->userData,
1.59 daniel 5857: "xmlParseStartTag: invalid element name\n");
5858: ctxt->wellFormed = 0;
1.180 daniel 5859: ctxt->disableSAX = 1;
1.83 daniel 5860: return(NULL);
1.50 daniel 5861: }
5862:
5863: /*
1.3 veillard 5864: * Now parse the attributes, it ends up with the ending
5865: *
5866: * (S Attribute)* S?
5867: */
1.42 daniel 5868: SKIP_BLANKS;
1.91 daniel 5869: GROW;
1.168 daniel 5870:
1.153 daniel 5871: while ((IS_CHAR(RAW)) &&
1.152 daniel 5872: (RAW != '>') &&
5873: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 5874: const xmlChar *q = CUR_PTR;
1.91 daniel 5875: int cons = ctxt->input->consumed;
1.29 daniel 5876:
1.72 daniel 5877: attname = xmlParseAttribute(ctxt, &attvalue);
5878: if ((attname != NULL) && (attvalue != NULL)) {
5879: /*
1.98 daniel 5880: * [ WFC: Unique Att Spec ]
5881: * No attribute name may appear more than once in the same
5882: * start-tag or empty-element tag.
1.72 daniel 5883: */
5884: for (i = 0; i < nbatts;i += 2) {
1.236 veillard 5885: if (xmlStrEqual(atts[i], attname)) {
1.230 veillard 5886: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.72 daniel 5887: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5888: ctxt->sax->error(ctxt->userData,
5889: "Attribute %s redefined\n",
5890: attname);
1.72 daniel 5891: ctxt->wellFormed = 0;
1.180 daniel 5892: ctxt->disableSAX = 1;
1.119 daniel 5893: xmlFree(attname);
5894: xmlFree(attvalue);
1.98 daniel 5895: goto failed;
1.72 daniel 5896: }
5897: }
5898:
5899: /*
5900: * Add the pair to atts
5901: */
5902: if (atts == NULL) {
5903: maxatts = 10;
1.123 daniel 5904: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5905: if (atts == NULL) {
1.241 veillard 5906: xmlGenericError(xmlGenericErrorContext,
5907: "malloc of %ld byte failed\n",
1.123 daniel 5908: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5909: return(NULL);
1.72 daniel 5910: }
1.127 daniel 5911: } else if (nbatts + 4 > maxatts) {
1.72 daniel 5912: maxatts *= 2;
1.233 veillard 5913: atts = (const xmlChar **) xmlRealloc((void *) atts,
5914: maxatts * sizeof(xmlChar *));
1.72 daniel 5915: if (atts == NULL) {
1.241 veillard 5916: xmlGenericError(xmlGenericErrorContext,
5917: "realloc of %ld byte failed\n",
1.123 daniel 5918: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5919: return(NULL);
1.72 daniel 5920: }
5921: }
5922: atts[nbatts++] = attname;
5923: atts[nbatts++] = attvalue;
5924: atts[nbatts] = NULL;
5925: atts[nbatts + 1] = NULL;
1.176 daniel 5926: } else {
5927: if (attname != NULL)
5928: xmlFree(attname);
5929: if (attvalue != NULL)
5930: xmlFree(attvalue);
1.72 daniel 5931: }
5932:
1.116 daniel 5933: failed:
1.168 daniel 5934:
5935: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5936: break;
5937: if (!IS_BLANK(RAW)) {
1.230 veillard 5938: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.168 daniel 5939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5940: ctxt->sax->error(ctxt->userData,
5941: "attributes construct error\n");
5942: ctxt->wellFormed = 0;
1.180 daniel 5943: ctxt->disableSAX = 1;
1.168 daniel 5944: }
1.42 daniel 5945: SKIP_BLANKS;
1.91 daniel 5946: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.230 veillard 5947: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5949: ctxt->sax->error(ctxt->userData,
1.31 daniel 5950: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 5951: ctxt->wellFormed = 0;
1.180 daniel 5952: ctxt->disableSAX = 1;
1.29 daniel 5953: break;
1.3 veillard 5954: }
1.91 daniel 5955: GROW;
1.3 veillard 5956: }
5957:
1.43 daniel 5958: /*
1.72 daniel 5959: * SAX: Start of Element !
1.43 daniel 5960: */
1.171 daniel 5961: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5962: (!ctxt->disableSAX))
1.74 daniel 5963: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5964:
1.72 daniel 5965: if (atts != NULL) {
1.123 daniel 5966: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.233 veillard 5967: xmlFree((void *) atts);
1.72 daniel 5968: }
1.83 daniel 5969: return(name);
1.3 veillard 5970: }
5971:
1.50 daniel 5972: /**
5973: * xmlParseEndTag:
5974: * @ctxt: an XML parser context
5975: *
5976: * parse an end of tag
1.27 daniel 5977: *
5978: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5979: *
5980: * With namespace
5981: *
1.72 daniel 5982: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5983: */
5984:
1.55 daniel 5985: void
1.140 daniel 5986: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5987: xmlChar *name;
1.140 daniel 5988: xmlChar *oldname;
1.7 veillard 5989:
1.91 daniel 5990: GROW;
1.152 daniel 5991: if ((RAW != '<') || (NXT(1) != '/')) {
1.230 veillard 5992: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.55 daniel 5993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5994: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5995: ctxt->wellFormed = 0;
1.180 daniel 5996: ctxt->disableSAX = 1;
1.27 daniel 5997: return;
5998: }
1.40 daniel 5999: SKIP(2);
1.7 veillard 6000:
1.72 daniel 6001: name = xmlParseName(ctxt);
1.7 veillard 6002:
6003: /*
6004: * We should definitely be at the ending "S? '>'" part
6005: */
1.91 daniel 6006: GROW;
1.42 daniel 6007: SKIP_BLANKS;
1.153 daniel 6008: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.230 veillard 6009: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6011: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 6012: ctxt->wellFormed = 0;
1.180 daniel 6013: ctxt->disableSAX = 1;
1.7 veillard 6014: } else
1.40 daniel 6015: NEXT;
1.7 veillard 6016:
1.72 daniel 6017: /*
1.98 daniel 6018: * [ WFC: Element Type Match ]
6019: * The Name in an element's end-tag must match the element type in the
6020: * start-tag.
6021: *
1.83 daniel 6022: */
1.147 daniel 6023: if ((name == NULL) || (ctxt->name == NULL) ||
1.236 veillard 6024: (!xmlStrEqual(name, ctxt->name))) {
1.230 veillard 6025: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.147 daniel 6026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6027: if ((name != NULL) && (ctxt->name != NULL)) {
6028: ctxt->sax->error(ctxt->userData,
6029: "Opening and ending tag mismatch: %s and %s\n",
6030: ctxt->name, name);
6031: } else if (ctxt->name != NULL) {
6032: ctxt->sax->error(ctxt->userData,
6033: "Ending tag eror for: %s\n", ctxt->name);
6034: } else {
6035: ctxt->sax->error(ctxt->userData,
6036: "Ending tag error: internal error ???\n");
6037: }
1.122 daniel 6038:
1.147 daniel 6039: }
1.83 daniel 6040: ctxt->wellFormed = 0;
1.180 daniel 6041: ctxt->disableSAX = 1;
1.83 daniel 6042: }
6043:
6044: /*
1.72 daniel 6045: * SAX: End of Tag
6046: */
1.171 daniel 6047: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6048: (!ctxt->disableSAX))
1.74 daniel 6049: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6050:
6051: if (name != NULL)
1.119 daniel 6052: xmlFree(name);
1.140 daniel 6053: oldname = namePop(ctxt);
1.176 daniel 6054: spacePop(ctxt);
1.140 daniel 6055: if (oldname != NULL) {
6056: #ifdef DEBUG_STACK
1.241 veillard 6057: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6058: #endif
6059: xmlFree(oldname);
6060: }
1.7 veillard 6061: return;
6062: }
6063:
1.50 daniel 6064: /**
6065: * xmlParseCDSect:
6066: * @ctxt: an XML parser context
6067: *
6068: * Parse escaped pure raw content.
1.29 daniel 6069: *
6070: * [18] CDSect ::= CDStart CData CDEnd
6071: *
6072: * [19] CDStart ::= '<![CDATA['
6073: *
6074: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6075: *
6076: * [21] CDEnd ::= ']]>'
1.3 veillard 6077: */
1.55 daniel 6078: void
6079: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6080: xmlChar *buf = NULL;
6081: int len = 0;
1.140 daniel 6082: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6083: int r, rl;
6084: int s, sl;
6085: int cur, l;
1.234 veillard 6086: int count = 0;
1.3 veillard 6087:
1.106 daniel 6088: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6089: (NXT(2) == '[') && (NXT(3) == 'C') &&
6090: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6091: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6092: (NXT(8) == '[')) {
6093: SKIP(9);
1.29 daniel 6094: } else
1.45 daniel 6095: return;
1.109 daniel 6096:
6097: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6098: r = CUR_CHAR(rl);
6099: if (!IS_CHAR(r)) {
1.230 veillard 6100: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6102: ctxt->sax->error(ctxt->userData,
1.135 daniel 6103: "CData section not finished\n");
1.59 daniel 6104: ctxt->wellFormed = 0;
1.180 daniel 6105: ctxt->disableSAX = 1;
1.109 daniel 6106: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6107: return;
1.3 veillard 6108: }
1.152 daniel 6109: NEXTL(rl);
6110: s = CUR_CHAR(sl);
6111: if (!IS_CHAR(s)) {
1.230 veillard 6112: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6113: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6114: ctxt->sax->error(ctxt->userData,
1.135 daniel 6115: "CData section not finished\n");
1.59 daniel 6116: ctxt->wellFormed = 0;
1.180 daniel 6117: ctxt->disableSAX = 1;
1.109 daniel 6118: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6119: return;
1.3 veillard 6120: }
1.152 daniel 6121: NEXTL(sl);
6122: cur = CUR_CHAR(l);
1.135 daniel 6123: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6124: if (buf == NULL) {
1.241 veillard 6125: xmlGenericError(xmlGenericErrorContext,
6126: "malloc of %d byte failed\n", size);
1.135 daniel 6127: return;
6128: }
1.108 veillard 6129: while (IS_CHAR(cur) &&
1.110 daniel 6130: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6131: if (len + 5 >= size) {
1.135 daniel 6132: size *= 2;
1.204 veillard 6133: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6134: if (buf == NULL) {
1.241 veillard 6135: xmlGenericError(xmlGenericErrorContext,
6136: "realloc of %d byte failed\n", size);
1.135 daniel 6137: return;
6138: }
6139: }
1.152 daniel 6140: COPY_BUF(rl,buf,len,r);
1.110 daniel 6141: r = s;
1.152 daniel 6142: rl = sl;
1.110 daniel 6143: s = cur;
1.152 daniel 6144: sl = l;
1.234 veillard 6145: count++;
6146: if (count > 50) {
6147: GROW;
6148: count = 0;
6149: }
1.152 daniel 6150: NEXTL(l);
6151: cur = CUR_CHAR(l);
1.3 veillard 6152: }
1.135 daniel 6153: buf[len] = 0;
1.109 daniel 6154: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6155: if (cur != '>') {
1.230 veillard 6156: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6158: ctxt->sax->error(ctxt->userData,
1.135 daniel 6159: "CData section not finished\n%.50s\n", buf);
1.59 daniel 6160: ctxt->wellFormed = 0;
1.180 daniel 6161: ctxt->disableSAX = 1;
1.135 daniel 6162: xmlFree(buf);
1.45 daniel 6163: return;
1.3 veillard 6164: }
1.152 daniel 6165: NEXTL(l);
1.16 daniel 6166:
1.45 daniel 6167: /*
1.135 daniel 6168: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6169: */
1.171 daniel 6170: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 6171: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6172: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6173: }
1.135 daniel 6174: xmlFree(buf);
1.2 veillard 6175: }
6176:
1.50 daniel 6177: /**
6178: * xmlParseContent:
6179: * @ctxt: an XML parser context
6180: *
6181: * Parse a content:
1.2 veillard 6182: *
1.27 daniel 6183: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6184: */
6185:
1.55 daniel 6186: void
6187: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6188: GROW;
1.176 daniel 6189: while (((RAW != 0) || (ctxt->token != 0)) &&
6190: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 6191: const xmlChar *test = CUR_PTR;
1.91 daniel 6192: int cons = ctxt->input->consumed;
1.123 daniel 6193: xmlChar tok = ctxt->token;
1.27 daniel 6194:
6195: /*
1.152 daniel 6196: * Handle possible processed charrefs.
6197: */
6198: if (ctxt->token != 0) {
6199: xmlParseCharData(ctxt, 0);
6200: }
6201: /*
1.27 daniel 6202: * First case : a Processing Instruction.
6203: */
1.152 daniel 6204: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6205: xmlParsePI(ctxt);
6206: }
1.72 daniel 6207:
1.27 daniel 6208: /*
6209: * Second case : a CDSection
6210: */
1.152 daniel 6211: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6212: (NXT(2) == '[') && (NXT(3) == 'C') &&
6213: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6214: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6215: (NXT(8) == '[')) {
1.45 daniel 6216: xmlParseCDSect(ctxt);
1.27 daniel 6217: }
1.72 daniel 6218:
1.27 daniel 6219: /*
6220: * Third case : a comment
6221: */
1.152 daniel 6222: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6223: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6224: xmlParseComment(ctxt);
1.97 daniel 6225: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6226: }
1.72 daniel 6227:
1.27 daniel 6228: /*
6229: * Fourth case : a sub-element.
6230: */
1.152 daniel 6231: else if (RAW == '<') {
1.72 daniel 6232: xmlParseElement(ctxt);
1.45 daniel 6233: }
1.72 daniel 6234:
1.45 daniel 6235: /*
1.50 daniel 6236: * Fifth case : a reference. If if has not been resolved,
6237: * parsing returns it's Name, create the node
1.45 daniel 6238: */
1.97 daniel 6239:
1.152 daniel 6240: else if (RAW == '&') {
1.77 daniel 6241: xmlParseReference(ctxt);
1.27 daniel 6242: }
1.72 daniel 6243:
1.27 daniel 6244: /*
6245: * Last case, text. Note that References are handled directly.
6246: */
6247: else {
1.45 daniel 6248: xmlParseCharData(ctxt, 0);
1.3 veillard 6249: }
1.14 veillard 6250:
1.91 daniel 6251: GROW;
1.14 veillard 6252: /*
1.45 daniel 6253: * Pop-up of finished entities.
1.14 veillard 6254: */
1.152 daniel 6255: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6256: xmlPopInput(ctxt);
1.135 daniel 6257: SHRINK;
1.45 daniel 6258:
1.113 daniel 6259: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6260: (tok == ctxt->token)) {
1.230 veillard 6261: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6262: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6263: ctxt->sax->error(ctxt->userData,
1.59 daniel 6264: "detected an error in element content\n");
6265: ctxt->wellFormed = 0;
1.180 daniel 6266: ctxt->disableSAX = 1;
1.224 veillard 6267: ctxt->instate = XML_PARSER_EOF;
1.29 daniel 6268: break;
6269: }
1.3 veillard 6270: }
1.2 veillard 6271: }
6272:
1.50 daniel 6273: /**
6274: * xmlParseElement:
6275: * @ctxt: an XML parser context
6276: *
6277: * parse an XML element, this is highly recursive
1.26 daniel 6278: *
6279: * [39] element ::= EmptyElemTag | STag content ETag
6280: *
1.98 daniel 6281: * [ WFC: Element Type Match ]
6282: * The Name in an element's end-tag must match the element type in the
6283: * start-tag.
6284: *
6285: * [ VC: Element Valid ]
1.117 daniel 6286: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6287: * where the Name matches the element type and one of the following holds:
6288: * - The declaration matches EMPTY and the element has no content.
6289: * - The declaration matches children and the sequence of child elements
6290: * belongs to the language generated by the regular expression in the
6291: * content model, with optional white space (characters matching the
6292: * nonterminal S) between each pair of child elements.
6293: * - The declaration matches Mixed and the content consists of character
6294: * data and child elements whose types match names in the content model.
6295: * - The declaration matches ANY, and the types of any child elements have
6296: * been declared.
1.2 veillard 6297: */
1.26 daniel 6298:
1.72 daniel 6299: void
1.69 daniel 6300: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6301: const xmlChar *openTag = CUR_PTR;
6302: xmlChar *name;
1.140 daniel 6303: xmlChar *oldname;
1.32 daniel 6304: xmlParserNodeInfo node_info;
1.118 daniel 6305: xmlNodePtr ret;
1.2 veillard 6306:
1.32 daniel 6307: /* Capture start position */
1.118 daniel 6308: if (ctxt->record_info) {
6309: node_info.begin_pos = ctxt->input->consumed +
6310: (CUR_PTR - ctxt->input->base);
6311: node_info.begin_line = ctxt->input->line;
6312: }
1.32 daniel 6313:
1.176 daniel 6314: if (ctxt->spaceNr == 0)
6315: spacePush(ctxt, -1);
6316: else
6317: spacePush(ctxt, *ctxt->space);
6318:
1.83 daniel 6319: name = xmlParseStartTag(ctxt);
6320: if (name == NULL) {
1.176 daniel 6321: spacePop(ctxt);
1.83 daniel 6322: return;
6323: }
1.140 daniel 6324: namePush(ctxt, name);
1.118 daniel 6325: ret = ctxt->node;
1.2 veillard 6326:
6327: /*
1.99 daniel 6328: * [ VC: Root Element Type ]
6329: * The Name in the document type declaration must match the element
6330: * type of the root element.
6331: */
1.105 daniel 6332: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6333: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6334: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6335:
6336: /*
1.2 veillard 6337: * Check for an Empty Element.
6338: */
1.152 daniel 6339: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6340: SKIP(2);
1.171 daniel 6341: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6342: (!ctxt->disableSAX))
1.83 daniel 6343: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6344: oldname = namePop(ctxt);
1.176 daniel 6345: spacePop(ctxt);
1.140 daniel 6346: if (oldname != NULL) {
6347: #ifdef DEBUG_STACK
1.241 veillard 6348: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6349: #endif
6350: xmlFree(oldname);
1.211 veillard 6351: }
6352: if ( ret != NULL && ctxt->record_info ) {
6353: node_info.end_pos = ctxt->input->consumed +
6354: (CUR_PTR - ctxt->input->base);
6355: node_info.end_line = ctxt->input->line;
6356: node_info.node = ret;
6357: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 6358: }
1.72 daniel 6359: return;
1.2 veillard 6360: }
1.152 daniel 6361: if (RAW == '>') {
1.91 daniel 6362: NEXT;
6363: } else {
1.230 veillard 6364: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6366: ctxt->sax->error(ctxt->userData,
6367: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6368: openTag);
1.59 daniel 6369: ctxt->wellFormed = 0;
1.180 daniel 6370: ctxt->disableSAX = 1;
1.45 daniel 6371:
6372: /*
6373: * end of parsing of this node.
6374: */
6375: nodePop(ctxt);
1.140 daniel 6376: oldname = namePop(ctxt);
1.176 daniel 6377: spacePop(ctxt);
1.140 daniel 6378: if (oldname != NULL) {
6379: #ifdef DEBUG_STACK
1.241 veillard 6380: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6381: #endif
6382: xmlFree(oldname);
6383: }
1.118 daniel 6384:
6385: /*
6386: * Capture end position and add node
6387: */
6388: if ( ret != NULL && ctxt->record_info ) {
6389: node_info.end_pos = ctxt->input->consumed +
6390: (CUR_PTR - ctxt->input->base);
6391: node_info.end_line = ctxt->input->line;
6392: node_info.node = ret;
6393: xmlParserAddNodeInfo(ctxt, &node_info);
6394: }
1.72 daniel 6395: return;
1.2 veillard 6396: }
6397:
6398: /*
6399: * Parse the content of the element:
6400: */
1.45 daniel 6401: xmlParseContent(ctxt);
1.153 daniel 6402: if (!IS_CHAR(RAW)) {
1.230 veillard 6403: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.55 daniel 6404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6405: ctxt->sax->error(ctxt->userData,
1.57 daniel 6406: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6407: ctxt->wellFormed = 0;
1.180 daniel 6408: ctxt->disableSAX = 1;
1.45 daniel 6409:
6410: /*
6411: * end of parsing of this node.
6412: */
6413: nodePop(ctxt);
1.140 daniel 6414: oldname = namePop(ctxt);
1.176 daniel 6415: spacePop(ctxt);
1.140 daniel 6416: if (oldname != NULL) {
6417: #ifdef DEBUG_STACK
1.241 veillard 6418: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6419: #endif
6420: xmlFree(oldname);
6421: }
1.72 daniel 6422: return;
1.2 veillard 6423: }
6424:
6425: /*
1.27 daniel 6426: * parse the end of tag: '</' should be here.
1.2 veillard 6427: */
1.140 daniel 6428: xmlParseEndTag(ctxt);
1.118 daniel 6429:
6430: /*
6431: * Capture end position and add node
6432: */
6433: if ( ret != NULL && ctxt->record_info ) {
6434: node_info.end_pos = ctxt->input->consumed +
6435: (CUR_PTR - ctxt->input->base);
6436: node_info.end_line = ctxt->input->line;
6437: node_info.node = ret;
6438: xmlParserAddNodeInfo(ctxt, &node_info);
6439: }
1.2 veillard 6440: }
6441:
1.50 daniel 6442: /**
6443: * xmlParseVersionNum:
6444: * @ctxt: an XML parser context
6445: *
6446: * parse the XML version value.
1.29 daniel 6447: *
6448: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6449: *
6450: * Returns the string giving the XML version number, or NULL
1.29 daniel 6451: */
1.123 daniel 6452: xmlChar *
1.55 daniel 6453: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6454: xmlChar *buf = NULL;
6455: int len = 0;
6456: int size = 10;
6457: xmlChar cur;
1.29 daniel 6458:
1.135 daniel 6459: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6460: if (buf == NULL) {
1.241 veillard 6461: xmlGenericError(xmlGenericErrorContext,
6462: "malloc of %d byte failed\n", size);
1.135 daniel 6463: return(NULL);
6464: }
6465: cur = CUR;
1.152 daniel 6466: while (((cur >= 'a') && (cur <= 'z')) ||
6467: ((cur >= 'A') && (cur <= 'Z')) ||
6468: ((cur >= '0') && (cur <= '9')) ||
6469: (cur == '_') || (cur == '.') ||
6470: (cur == ':') || (cur == '-')) {
1.135 daniel 6471: if (len + 1 >= size) {
6472: size *= 2;
1.204 veillard 6473: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6474: if (buf == NULL) {
1.241 veillard 6475: xmlGenericError(xmlGenericErrorContext,
6476: "realloc of %d byte failed\n", size);
1.135 daniel 6477: return(NULL);
6478: }
6479: }
6480: buf[len++] = cur;
6481: NEXT;
6482: cur=CUR;
6483: }
6484: buf[len] = 0;
6485: return(buf);
1.29 daniel 6486: }
6487:
1.50 daniel 6488: /**
6489: * xmlParseVersionInfo:
6490: * @ctxt: an XML parser context
6491: *
6492: * parse the XML version.
1.29 daniel 6493: *
6494: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6495: *
6496: * [25] Eq ::= S? '=' S?
1.50 daniel 6497: *
1.68 daniel 6498: * Returns the version string, e.g. "1.0"
1.29 daniel 6499: */
6500:
1.123 daniel 6501: xmlChar *
1.55 daniel 6502: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6503: xmlChar *version = NULL;
6504: const xmlChar *q;
1.29 daniel 6505:
1.152 daniel 6506: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6507: (NXT(2) == 'r') && (NXT(3) == 's') &&
6508: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6509: (NXT(6) == 'n')) {
6510: SKIP(7);
1.42 daniel 6511: SKIP_BLANKS;
1.152 daniel 6512: if (RAW != '=') {
1.230 veillard 6513: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6515: ctxt->sax->error(ctxt->userData,
6516: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6517: ctxt->wellFormed = 0;
1.180 daniel 6518: ctxt->disableSAX = 1;
1.31 daniel 6519: return(NULL);
6520: }
1.40 daniel 6521: NEXT;
1.42 daniel 6522: SKIP_BLANKS;
1.152 daniel 6523: if (RAW == '"') {
1.40 daniel 6524: NEXT;
6525: q = CUR_PTR;
1.29 daniel 6526: version = xmlParseVersionNum(ctxt);
1.152 daniel 6527: if (RAW != '"') {
1.230 veillard 6528: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6530: ctxt->sax->error(ctxt->userData,
6531: "String not closed\n%.50s\n", q);
1.59 daniel 6532: ctxt->wellFormed = 0;
1.180 daniel 6533: ctxt->disableSAX = 1;
1.55 daniel 6534: } else
1.40 daniel 6535: NEXT;
1.152 daniel 6536: } else if (RAW == '\''){
1.40 daniel 6537: NEXT;
6538: q = CUR_PTR;
1.29 daniel 6539: version = xmlParseVersionNum(ctxt);
1.152 daniel 6540: if (RAW != '\'') {
1.230 veillard 6541: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6543: ctxt->sax->error(ctxt->userData,
6544: "String not closed\n%.50s\n", q);
1.59 daniel 6545: ctxt->wellFormed = 0;
1.180 daniel 6546: ctxt->disableSAX = 1;
1.55 daniel 6547: } else
1.40 daniel 6548: NEXT;
1.31 daniel 6549: } else {
1.230 veillard 6550: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6552: ctxt->sax->error(ctxt->userData,
1.59 daniel 6553: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6554: ctxt->wellFormed = 0;
1.180 daniel 6555: ctxt->disableSAX = 1;
1.29 daniel 6556: }
6557: }
6558: return(version);
6559: }
6560:
1.50 daniel 6561: /**
6562: * xmlParseEncName:
6563: * @ctxt: an XML parser context
6564: *
6565: * parse the XML encoding name
1.29 daniel 6566: *
6567: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 6568: *
1.68 daniel 6569: * Returns the encoding name value or NULL
1.29 daniel 6570: */
1.123 daniel 6571: xmlChar *
1.55 daniel 6572: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 6573: xmlChar *buf = NULL;
6574: int len = 0;
6575: int size = 10;
6576: xmlChar cur;
1.29 daniel 6577:
1.135 daniel 6578: cur = CUR;
6579: if (((cur >= 'a') && (cur <= 'z')) ||
6580: ((cur >= 'A') && (cur <= 'Z'))) {
6581: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6582: if (buf == NULL) {
1.241 veillard 6583: xmlGenericError(xmlGenericErrorContext,
6584: "malloc of %d byte failed\n", size);
1.135 daniel 6585: return(NULL);
6586: }
6587:
6588: buf[len++] = cur;
1.40 daniel 6589: NEXT;
1.135 daniel 6590: cur = CUR;
1.152 daniel 6591: while (((cur >= 'a') && (cur <= 'z')) ||
6592: ((cur >= 'A') && (cur <= 'Z')) ||
6593: ((cur >= '0') && (cur <= '9')) ||
6594: (cur == '.') || (cur == '_') ||
6595: (cur == '-')) {
1.135 daniel 6596: if (len + 1 >= size) {
6597: size *= 2;
1.204 veillard 6598: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6599: if (buf == NULL) {
1.241 veillard 6600: xmlGenericError(xmlGenericErrorContext,
6601: "realloc of %d byte failed\n", size);
1.135 daniel 6602: return(NULL);
6603: }
6604: }
6605: buf[len++] = cur;
6606: NEXT;
6607: cur = CUR;
6608: if (cur == 0) {
6609: SHRINK;
6610: GROW;
6611: cur = CUR;
6612: }
6613: }
6614: buf[len] = 0;
1.29 daniel 6615: } else {
1.230 veillard 6616: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.55 daniel 6617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6618: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 6619: ctxt->wellFormed = 0;
1.180 daniel 6620: ctxt->disableSAX = 1;
1.29 daniel 6621: }
1.135 daniel 6622: return(buf);
1.29 daniel 6623: }
6624:
1.50 daniel 6625: /**
6626: * xmlParseEncodingDecl:
6627: * @ctxt: an XML parser context
6628: *
6629: * parse the XML encoding declaration
1.29 daniel 6630: *
6631: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 6632: *
1.229 veillard 6633: * this setups the conversion filters.
1.50 daniel 6634: *
1.68 daniel 6635: * Returns the encoding value or NULL
1.29 daniel 6636: */
6637:
1.123 daniel 6638: xmlChar *
1.55 daniel 6639: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6640: xmlChar *encoding = NULL;
6641: const xmlChar *q;
1.29 daniel 6642:
1.42 daniel 6643: SKIP_BLANKS;
1.152 daniel 6644: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 6645: (NXT(2) == 'c') && (NXT(3) == 'o') &&
6646: (NXT(4) == 'd') && (NXT(5) == 'i') &&
6647: (NXT(6) == 'n') && (NXT(7) == 'g')) {
6648: SKIP(8);
1.42 daniel 6649: SKIP_BLANKS;
1.152 daniel 6650: if (RAW != '=') {
1.230 veillard 6651: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6652: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6653: ctxt->sax->error(ctxt->userData,
6654: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 6655: ctxt->wellFormed = 0;
1.180 daniel 6656: ctxt->disableSAX = 1;
1.31 daniel 6657: return(NULL);
6658: }
1.40 daniel 6659: NEXT;
1.42 daniel 6660: SKIP_BLANKS;
1.152 daniel 6661: if (RAW == '"') {
1.40 daniel 6662: NEXT;
6663: q = CUR_PTR;
1.29 daniel 6664: encoding = xmlParseEncName(ctxt);
1.152 daniel 6665: if (RAW != '"') {
1.230 veillard 6666: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6668: ctxt->sax->error(ctxt->userData,
6669: "String not closed\n%.50s\n", q);
1.59 daniel 6670: ctxt->wellFormed = 0;
1.180 daniel 6671: ctxt->disableSAX = 1;
1.55 daniel 6672: } else
1.40 daniel 6673: NEXT;
1.152 daniel 6674: } else if (RAW == '\''){
1.40 daniel 6675: NEXT;
6676: q = CUR_PTR;
1.29 daniel 6677: encoding = xmlParseEncName(ctxt);
1.152 daniel 6678: if (RAW != '\'') {
1.230 veillard 6679: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6681: ctxt->sax->error(ctxt->userData,
6682: "String not closed\n%.50s\n", q);
1.59 daniel 6683: ctxt->wellFormed = 0;
1.180 daniel 6684: ctxt->disableSAX = 1;
1.55 daniel 6685: } else
1.40 daniel 6686: NEXT;
1.152 daniel 6687: } else if (RAW == '"'){
1.230 veillard 6688: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6690: ctxt->sax->error(ctxt->userData,
1.59 daniel 6691: "xmlParseEncodingDecl : expected ' or \"\n");
6692: ctxt->wellFormed = 0;
1.180 daniel 6693: ctxt->disableSAX = 1;
1.29 daniel 6694: }
1.193 daniel 6695: if (encoding != NULL) {
6696: xmlCharEncoding enc;
6697: xmlCharEncodingHandlerPtr handler;
6698:
1.195 daniel 6699: if (ctxt->input->encoding != NULL)
6700: xmlFree((xmlChar *) ctxt->input->encoding);
6701: ctxt->input->encoding = encoding;
6702:
1.193 daniel 6703: enc = xmlParseCharEncoding((const char *) encoding);
6704: /*
6705: * registered set of known encodings
6706: */
6707: if (enc != XML_CHAR_ENCODING_ERROR) {
6708: xmlSwitchEncoding(ctxt, enc);
6709: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6710: xmlFree(encoding);
6711: return(NULL);
6712: }
6713: } else {
6714: /*
6715: * fallback for unknown encodings
6716: */
6717: handler = xmlFindCharEncodingHandler((const char *) encoding);
6718: if (handler != NULL) {
6719: xmlSwitchToEncoding(ctxt, handler);
6720: } else {
6721: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 6722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6723: ctxt->sax->error(ctxt->userData,
6724: "Unsupported encoding %s\n", encoding);
1.193 daniel 6725: return(NULL);
6726: }
6727: }
6728: }
1.29 daniel 6729: }
6730: return(encoding);
6731: }
6732:
1.50 daniel 6733: /**
6734: * xmlParseSDDecl:
6735: * @ctxt: an XML parser context
6736: *
6737: * parse the XML standalone declaration
1.29 daniel 6738: *
6739: * [32] SDDecl ::= S 'standalone' Eq
6740: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 6741: *
6742: * [ VC: Standalone Document Declaration ]
6743: * TODO The standalone document declaration must have the value "no"
6744: * if any external markup declarations contain declarations of:
6745: * - attributes with default values, if elements to which these
6746: * attributes apply appear in the document without specifications
6747: * of values for these attributes, or
6748: * - entities (other than amp, lt, gt, apos, quot), if references
6749: * to those entities appear in the document, or
6750: * - attributes with values subject to normalization, where the
6751: * attribute appears in the document with a value which will change
6752: * as a result of normalization, or
6753: * - element types with element content, if white space occurs directly
6754: * within any instance of those types.
1.68 daniel 6755: *
6756: * Returns 1 if standalone, 0 otherwise
1.29 daniel 6757: */
6758:
1.55 daniel 6759: int
6760: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 6761: int standalone = -1;
6762:
1.42 daniel 6763: SKIP_BLANKS;
1.152 daniel 6764: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 6765: (NXT(2) == 'a') && (NXT(3) == 'n') &&
6766: (NXT(4) == 'd') && (NXT(5) == 'a') &&
6767: (NXT(6) == 'l') && (NXT(7) == 'o') &&
6768: (NXT(8) == 'n') && (NXT(9) == 'e')) {
6769: SKIP(10);
1.81 daniel 6770: SKIP_BLANKS;
1.152 daniel 6771: if (RAW != '=') {
1.230 veillard 6772: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6774: ctxt->sax->error(ctxt->userData,
1.59 daniel 6775: "XML standalone declaration : expected '='\n");
6776: ctxt->wellFormed = 0;
1.180 daniel 6777: ctxt->disableSAX = 1;
1.32 daniel 6778: return(standalone);
6779: }
1.40 daniel 6780: NEXT;
1.42 daniel 6781: SKIP_BLANKS;
1.152 daniel 6782: if (RAW == '\''){
1.40 daniel 6783: NEXT;
1.152 daniel 6784: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6785: standalone = 0;
1.40 daniel 6786: SKIP(2);
1.152 daniel 6787: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6788: (NXT(2) == 's')) {
1.29 daniel 6789: standalone = 1;
1.40 daniel 6790: SKIP(3);
1.29 daniel 6791: } else {
1.230 veillard 6792: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6793: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6794: ctxt->sax->error(ctxt->userData,
6795: "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 6796: ctxt->wellFormed = 0;
1.180 daniel 6797: ctxt->disableSAX = 1;
1.29 daniel 6798: }
1.152 daniel 6799: if (RAW != '\'') {
1.230 veillard 6800: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6801: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6802: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6803: ctxt->wellFormed = 0;
1.180 daniel 6804: ctxt->disableSAX = 1;
1.55 daniel 6805: } else
1.40 daniel 6806: NEXT;
1.152 daniel 6807: } else if (RAW == '"'){
1.40 daniel 6808: NEXT;
1.152 daniel 6809: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6810: standalone = 0;
1.40 daniel 6811: SKIP(2);
1.152 daniel 6812: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6813: (NXT(2) == 's')) {
1.29 daniel 6814: standalone = 1;
1.40 daniel 6815: SKIP(3);
1.29 daniel 6816: } else {
1.230 veillard 6817: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6819: ctxt->sax->error(ctxt->userData,
1.59 daniel 6820: "standalone accepts only 'yes' or 'no'\n");
6821: ctxt->wellFormed = 0;
1.180 daniel 6822: ctxt->disableSAX = 1;
1.29 daniel 6823: }
1.152 daniel 6824: if (RAW != '"') {
1.230 veillard 6825: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6827: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6828: ctxt->wellFormed = 0;
1.180 daniel 6829: ctxt->disableSAX = 1;
1.55 daniel 6830: } else
1.40 daniel 6831: NEXT;
1.37 daniel 6832: } else {
1.230 veillard 6833: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6834: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6835: ctxt->sax->error(ctxt->userData,
6836: "Standalone value not found\n");
1.59 daniel 6837: ctxt->wellFormed = 0;
1.180 daniel 6838: ctxt->disableSAX = 1;
1.37 daniel 6839: }
1.29 daniel 6840: }
6841: return(standalone);
6842: }
6843:
1.50 daniel 6844: /**
6845: * xmlParseXMLDecl:
6846: * @ctxt: an XML parser context
6847: *
6848: * parse an XML declaration header
1.29 daniel 6849: *
6850: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 6851: */
6852:
1.55 daniel 6853: void
6854: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6855: xmlChar *version;
1.1 veillard 6856:
6857: /*
1.19 daniel 6858: * We know that '<?xml' is here.
1.1 veillard 6859: */
1.40 daniel 6860: SKIP(5);
1.1 veillard 6861:
1.153 daniel 6862: if (!IS_BLANK(RAW)) {
1.230 veillard 6863: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6864: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6865: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 6866: ctxt->wellFormed = 0;
1.180 daniel 6867: ctxt->disableSAX = 1;
1.59 daniel 6868: }
1.42 daniel 6869: SKIP_BLANKS;
1.1 veillard 6870:
6871: /*
1.29 daniel 6872: * We should have the VersionInfo here.
1.1 veillard 6873: */
1.29 daniel 6874: version = xmlParseVersionInfo(ctxt);
6875: if (version == NULL)
1.45 daniel 6876: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 6877: ctxt->version = xmlStrdup(version);
1.119 daniel 6878: xmlFree(version);
1.29 daniel 6879:
6880: /*
6881: * We may have the encoding declaration
6882: */
1.153 daniel 6883: if (!IS_BLANK(RAW)) {
1.152 daniel 6884: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6885: SKIP(2);
6886: return;
6887: }
1.230 veillard 6888: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6889: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6890: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6891: ctxt->wellFormed = 0;
1.180 daniel 6892: ctxt->disableSAX = 1;
1.59 daniel 6893: }
1.195 daniel 6894: xmlParseEncodingDecl(ctxt);
1.193 daniel 6895: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6896: /*
6897: * The XML REC instructs us to stop parsing right here
6898: */
6899: return;
6900: }
1.1 veillard 6901:
6902: /*
1.29 daniel 6903: * We may have the standalone status.
1.1 veillard 6904: */
1.164 daniel 6905: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 6906: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6907: SKIP(2);
6908: return;
6909: }
1.230 veillard 6910: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6911: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6912: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6913: ctxt->wellFormed = 0;
1.180 daniel 6914: ctxt->disableSAX = 1;
1.59 daniel 6915: }
6916: SKIP_BLANKS;
1.167 daniel 6917: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 6918:
1.42 daniel 6919: SKIP_BLANKS;
1.152 daniel 6920: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 6921: SKIP(2);
1.152 daniel 6922: } else if (RAW == '>') {
1.31 daniel 6923: /* Deprecated old WD ... */
1.230 veillard 6924: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6925: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6926: ctxt->sax->error(ctxt->userData,
6927: "XML declaration must end-up with '?>'\n");
1.59 daniel 6928: ctxt->wellFormed = 0;
1.180 daniel 6929: ctxt->disableSAX = 1;
1.40 daniel 6930: NEXT;
1.29 daniel 6931: } else {
1.230 veillard 6932: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6934: ctxt->sax->error(ctxt->userData,
6935: "parsing XML declaration: '?>' expected\n");
1.59 daniel 6936: ctxt->wellFormed = 0;
1.180 daniel 6937: ctxt->disableSAX = 1;
1.40 daniel 6938: MOVETO_ENDTAG(CUR_PTR);
6939: NEXT;
1.29 daniel 6940: }
1.1 veillard 6941: }
6942:
1.50 daniel 6943: /**
6944: * xmlParseMisc:
6945: * @ctxt: an XML parser context
6946: *
6947: * parse an XML Misc* optionnal field.
1.21 daniel 6948: *
1.22 daniel 6949: * [27] Misc ::= Comment | PI | S
1.1 veillard 6950: */
6951:
1.55 daniel 6952: void
6953: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 6954: while (((RAW == '<') && (NXT(1) == '?')) ||
6955: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6956: (NXT(2) == '-') && (NXT(3) == '-')) ||
6957: IS_BLANK(CUR)) {
1.152 daniel 6958: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 6959: xmlParsePI(ctxt);
1.40 daniel 6960: } else if (IS_BLANK(CUR)) {
6961: NEXT;
1.1 veillard 6962: } else
1.114 daniel 6963: xmlParseComment(ctxt);
1.1 veillard 6964: }
6965: }
6966:
1.50 daniel 6967: /**
1.181 daniel 6968: * xmlParseDocument:
1.50 daniel 6969: * @ctxt: an XML parser context
6970: *
6971: * parse an XML document (and build a tree if using the standard SAX
6972: * interface).
1.21 daniel 6973: *
1.22 daniel 6974: * [1] document ::= prolog element Misc*
1.29 daniel 6975: *
6976: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 6977: *
1.68 daniel 6978: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 6979: * as a result of the parsing.
1.1 veillard 6980: */
6981:
1.55 daniel 6982: int
6983: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 6984: xmlChar start[4];
6985: xmlCharEncoding enc;
6986:
1.235 veillard 6987: xmlInitParser();
1.45 daniel 6988:
1.91 daniel 6989: GROW;
6990:
1.14 veillard 6991: /*
1.44 daniel 6992: * SAX: beginning of the document processing.
6993: */
1.72 daniel 6994: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 6995: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 6996:
1.156 daniel 6997: /*
6998: * Get the 4 first bytes and decode the charset
6999: * if enc != XML_CHAR_ENCODING_NONE
7000: * plug some encoding conversion routines.
7001: */
7002: start[0] = RAW;
7003: start[1] = NXT(1);
7004: start[2] = NXT(2);
7005: start[3] = NXT(3);
7006: enc = xmlDetectCharEncoding(start, 4);
7007: if (enc != XML_CHAR_ENCODING_NONE) {
7008: xmlSwitchEncoding(ctxt, enc);
7009: }
7010:
1.1 veillard 7011:
1.59 daniel 7012: if (CUR == 0) {
1.230 veillard 7013: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7015: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 7016: ctxt->wellFormed = 0;
1.180 daniel 7017: ctxt->disableSAX = 1;
1.59 daniel 7018: }
1.1 veillard 7019:
7020: /*
7021: * Check for the XMLDecl in the Prolog.
7022: */
1.91 daniel 7023: GROW;
1.152 daniel 7024: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7025: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7026: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 7027:
7028: /*
7029: * Note that we will switch encoding on the fly.
7030: */
1.19 daniel 7031: xmlParseXMLDecl(ctxt);
1.193 daniel 7032: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7033: /*
7034: * The XML REC instructs us to stop parsing right here
7035: */
7036: return(-1);
7037: }
1.167 daniel 7038: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7039: SKIP_BLANKS;
1.1 veillard 7040: } else {
1.72 daniel 7041: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7042: }
1.171 daniel 7043: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 7044: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7045:
7046: /*
7047: * The Misc part of the Prolog
7048: */
1.91 daniel 7049: GROW;
1.16 daniel 7050: xmlParseMisc(ctxt);
1.1 veillard 7051:
7052: /*
1.29 daniel 7053: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7054: * (doctypedecl Misc*)?
7055: */
1.91 daniel 7056: GROW;
1.152 daniel 7057: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7058: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7059: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7060: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7061: (NXT(8) == 'E')) {
1.165 daniel 7062:
1.166 daniel 7063: ctxt->inSubset = 1;
1.22 daniel 7064: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7065: if (RAW == '[') {
1.140 daniel 7066: ctxt->instate = XML_PARSER_DTD;
7067: xmlParseInternalSubset(ctxt);
7068: }
1.165 daniel 7069:
7070: /*
7071: * Create and update the external subset.
7072: */
1.166 daniel 7073: ctxt->inSubset = 2;
1.171 daniel 7074: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7075: (!ctxt->disableSAX))
1.165 daniel 7076: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7077: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7078: ctxt->inSubset = 0;
1.165 daniel 7079:
7080:
1.96 daniel 7081: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7082: xmlParseMisc(ctxt);
1.21 daniel 7083: }
7084:
7085: /*
7086: * Time to start parsing the tree itself
1.1 veillard 7087: */
1.91 daniel 7088: GROW;
1.152 daniel 7089: if (RAW != '<') {
1.230 veillard 7090: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7092: ctxt->sax->error(ctxt->userData,
1.151 daniel 7093: "Start tag expected, '<' not found\n");
1.59 daniel 7094: ctxt->wellFormed = 0;
1.180 daniel 7095: ctxt->disableSAX = 1;
1.140 daniel 7096: ctxt->instate = XML_PARSER_EOF;
7097: } else {
7098: ctxt->instate = XML_PARSER_CONTENT;
7099: xmlParseElement(ctxt);
7100: ctxt->instate = XML_PARSER_EPILOG;
7101:
7102:
7103: /*
7104: * The Misc part at the end
7105: */
7106: xmlParseMisc(ctxt);
7107:
1.152 daniel 7108: if (RAW != 0) {
1.230 veillard 7109: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7110: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7111: ctxt->sax->error(ctxt->userData,
7112: "Extra content at the end of the document\n");
7113: ctxt->wellFormed = 0;
1.180 daniel 7114: ctxt->disableSAX = 1;
1.140 daniel 7115: }
7116: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7117: }
7118:
1.44 daniel 7119: /*
7120: * SAX: end of the document processing.
7121: */
1.171 daniel 7122: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7123: (!ctxt->disableSAX))
1.74 daniel 7124: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7125:
1.59 daniel 7126: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7127: return(0);
7128: }
7129:
1.229 veillard 7130: /**
7131: * xmlParseExtParsedEnt:
7132: * @ctxt: an XML parser context
7133: *
7134: * parse a genreral parsed entity
7135: * An external general parsed entity is well-formed if it matches the
7136: * production labeled extParsedEnt.
7137: *
7138: * [78] extParsedEnt ::= TextDecl? content
7139: *
7140: * Returns 0, -1 in case of error. the parser context is augmented
7141: * as a result of the parsing.
7142: */
7143:
7144: int
7145: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7146: xmlChar start[4];
7147: xmlCharEncoding enc;
7148:
7149: xmlDefaultSAXHandlerInit();
7150:
7151: GROW;
7152:
7153: /*
7154: * SAX: beginning of the document processing.
7155: */
7156: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7157: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7158:
7159: /*
7160: * Get the 4 first bytes and decode the charset
7161: * if enc != XML_CHAR_ENCODING_NONE
7162: * plug some encoding conversion routines.
7163: */
7164: start[0] = RAW;
7165: start[1] = NXT(1);
7166: start[2] = NXT(2);
7167: start[3] = NXT(3);
7168: enc = xmlDetectCharEncoding(start, 4);
7169: if (enc != XML_CHAR_ENCODING_NONE) {
7170: xmlSwitchEncoding(ctxt, enc);
7171: }
7172:
7173:
7174: if (CUR == 0) {
1.230 veillard 7175: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.229 veillard 7176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7177: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7178: ctxt->wellFormed = 0;
7179: ctxt->disableSAX = 1;
7180: }
7181:
7182: /*
7183: * Check for the XMLDecl in the Prolog.
7184: */
7185: GROW;
7186: if ((RAW == '<') && (NXT(1) == '?') &&
7187: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7188: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7189:
7190: /*
7191: * Note that we will switch encoding on the fly.
7192: */
7193: xmlParseXMLDecl(ctxt);
7194: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7195: /*
7196: * The XML REC instructs us to stop parsing right here
7197: */
7198: return(-1);
7199: }
7200: SKIP_BLANKS;
7201: } else {
7202: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7203: }
7204: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7205: ctxt->sax->startDocument(ctxt->userData);
7206:
7207: /*
7208: * Doing validity checking on chunk doesn't make sense
7209: */
7210: ctxt->instate = XML_PARSER_CONTENT;
7211: ctxt->validate = 0;
7212: ctxt->depth = 0;
7213:
7214: xmlParseContent(ctxt);
7215:
7216: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 7217: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.229 veillard 7218: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7219: ctxt->sax->error(ctxt->userData,
7220: "chunk is not well balanced\n");
7221: ctxt->wellFormed = 0;
7222: ctxt->disableSAX = 1;
7223: } else if (RAW != 0) {
1.230 veillard 7224: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.229 veillard 7225: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7226: ctxt->sax->error(ctxt->userData,
7227: "extra content at the end of well balanced chunk\n");
7228: ctxt->wellFormed = 0;
7229: ctxt->disableSAX = 1;
7230: }
7231:
7232: /*
7233: * SAX: end of the document processing.
7234: */
7235: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7236: (!ctxt->disableSAX))
7237: ctxt->sax->endDocument(ctxt->userData);
7238:
7239: if (! ctxt->wellFormed) return(-1);
7240: return(0);
7241: }
7242:
1.98 daniel 7243: /************************************************************************
7244: * *
1.128 daniel 7245: * Progressive parsing interfaces *
7246: * *
7247: ************************************************************************/
7248:
7249: /**
7250: * xmlParseLookupSequence:
7251: * @ctxt: an XML parser context
7252: * @first: the first char to lookup
1.140 daniel 7253: * @next: the next char to lookup or zero
7254: * @third: the next char to lookup or zero
1.128 daniel 7255: *
1.140 daniel 7256: * Try to find if a sequence (first, next, third) or just (first next) or
7257: * (first) is available in the input stream.
7258: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7259: * to avoid rescanning sequences of bytes, it DOES change the state of the
7260: * parser, do not use liberally.
1.128 daniel 7261: *
1.140 daniel 7262: * Returns the index to the current parsing point if the full sequence
7263: * is available, -1 otherwise.
1.128 daniel 7264: */
7265: int
1.140 daniel 7266: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7267: xmlChar next, xmlChar third) {
7268: int base, len;
7269: xmlParserInputPtr in;
7270: const xmlChar *buf;
7271:
7272: in = ctxt->input;
7273: if (in == NULL) return(-1);
7274: base = in->cur - in->base;
7275: if (base < 0) return(-1);
7276: if (ctxt->checkIndex > base)
7277: base = ctxt->checkIndex;
7278: if (in->buf == NULL) {
7279: buf = in->base;
7280: len = in->length;
7281: } else {
7282: buf = in->buf->buffer->content;
7283: len = in->buf->buffer->use;
7284: }
7285: /* take into account the sequence length */
7286: if (third) len -= 2;
7287: else if (next) len --;
7288: for (;base < len;base++) {
7289: if (buf[base] == first) {
7290: if (third != 0) {
7291: if ((buf[base + 1] != next) ||
7292: (buf[base + 2] != third)) continue;
7293: } else if (next != 0) {
7294: if (buf[base + 1] != next) continue;
7295: }
7296: ctxt->checkIndex = 0;
7297: #ifdef DEBUG_PUSH
7298: if (next == 0)
1.241 veillard 7299: xmlGenericError(xmlGenericErrorContext,
7300: "PP: lookup '%c' found at %d\n",
1.140 daniel 7301: first, base);
7302: else if (third == 0)
1.241 veillard 7303: xmlGenericError(xmlGenericErrorContext,
7304: "PP: lookup '%c%c' found at %d\n",
1.140 daniel 7305: first, next, base);
7306: else
1.241 veillard 7307: xmlGenericError(xmlGenericErrorContext,
7308: "PP: lookup '%c%c%c' found at %d\n",
1.140 daniel 7309: first, next, third, base);
7310: #endif
7311: return(base - (in->cur - in->base));
7312: }
7313: }
7314: ctxt->checkIndex = base;
7315: #ifdef DEBUG_PUSH
7316: if (next == 0)
1.241 veillard 7317: xmlGenericError(xmlGenericErrorContext,
7318: "PP: lookup '%c' failed\n", first);
1.140 daniel 7319: else if (third == 0)
1.241 veillard 7320: xmlGenericError(xmlGenericErrorContext,
7321: "PP: lookup '%c%c' failed\n", first, next);
1.140 daniel 7322: else
1.241 veillard 7323: xmlGenericError(xmlGenericErrorContext,
7324: "PP: lookup '%c%c%c' failed\n", first, next, third);
1.140 daniel 7325: #endif
7326: return(-1);
1.128 daniel 7327: }
7328:
7329: /**
1.143 daniel 7330: * xmlParseTryOrFinish:
1.128 daniel 7331: * @ctxt: an XML parser context
1.143 daniel 7332: * @terminate: last chunk indicator
1.128 daniel 7333: *
7334: * Try to progress on parsing
7335: *
7336: * Returns zero if no parsing was possible
7337: */
7338: int
1.143 daniel 7339: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7340: int ret = 0;
1.140 daniel 7341: int avail;
7342: xmlChar cur, next;
7343:
7344: #ifdef DEBUG_PUSH
7345: switch (ctxt->instate) {
7346: case XML_PARSER_EOF:
1.241 veillard 7347: xmlGenericError(xmlGenericErrorContext,
7348: "PP: try EOF\n"); break;
1.140 daniel 7349: case XML_PARSER_START:
1.241 veillard 7350: xmlGenericError(xmlGenericErrorContext,
7351: "PP: try START\n"); break;
1.140 daniel 7352: case XML_PARSER_MISC:
1.241 veillard 7353: xmlGenericError(xmlGenericErrorContext,
7354: "PP: try MISC\n");break;
1.140 daniel 7355: case XML_PARSER_COMMENT:
1.241 veillard 7356: xmlGenericError(xmlGenericErrorContext,
7357: "PP: try COMMENT\n");break;
1.140 daniel 7358: case XML_PARSER_PROLOG:
1.241 veillard 7359: xmlGenericError(xmlGenericErrorContext,
7360: "PP: try PROLOG\n");break;
1.140 daniel 7361: case XML_PARSER_START_TAG:
1.241 veillard 7362: xmlGenericError(xmlGenericErrorContext,
7363: "PP: try START_TAG\n");break;
1.140 daniel 7364: case XML_PARSER_CONTENT:
1.241 veillard 7365: xmlGenericError(xmlGenericErrorContext,
7366: "PP: try CONTENT\n");break;
1.140 daniel 7367: case XML_PARSER_CDATA_SECTION:
1.241 veillard 7368: xmlGenericError(xmlGenericErrorContext,
7369: "PP: try CDATA_SECTION\n");break;
1.140 daniel 7370: case XML_PARSER_END_TAG:
1.241 veillard 7371: xmlGenericError(xmlGenericErrorContext,
7372: "PP: try END_TAG\n");break;
1.140 daniel 7373: case XML_PARSER_ENTITY_DECL:
1.241 veillard 7374: xmlGenericError(xmlGenericErrorContext,
7375: "PP: try ENTITY_DECL\n");break;
1.140 daniel 7376: case XML_PARSER_ENTITY_VALUE:
1.241 veillard 7377: xmlGenericError(xmlGenericErrorContext,
7378: "PP: try ENTITY_VALUE\n");break;
1.140 daniel 7379: case XML_PARSER_ATTRIBUTE_VALUE:
1.241 veillard 7380: xmlGenericError(xmlGenericErrorContext,
7381: "PP: try ATTRIBUTE_VALUE\n");break;
1.140 daniel 7382: case XML_PARSER_DTD:
1.241 veillard 7383: xmlGenericError(xmlGenericErrorContext,
7384: "PP: try DTD\n");break;
1.140 daniel 7385: case XML_PARSER_EPILOG:
1.241 veillard 7386: xmlGenericError(xmlGenericErrorContext,
7387: "PP: try EPILOG\n");break;
1.140 daniel 7388: case XML_PARSER_PI:
1.241 veillard 7389: xmlGenericError(xmlGenericErrorContext,
7390: "PP: try PI\n");break;
1.245 veillard 7391: case XML_PARSER_IGNORE:
7392: xmlGenericError(xmlGenericErrorContext,
7393: "PP: try IGNORE\n");break;
1.140 daniel 7394: }
7395: #endif
1.128 daniel 7396:
7397: while (1) {
1.140 daniel 7398: /*
7399: * Pop-up of finished entities.
7400: */
1.152 daniel 7401: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7402: xmlPopInput(ctxt);
7403:
1.184 daniel 7404: if (ctxt->input ==NULL) break;
7405: if (ctxt->input->buf == NULL)
7406: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7407: else
1.184 daniel 7408: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7409: if (avail < 1)
7410: goto done;
1.128 daniel 7411: switch (ctxt->instate) {
7412: case XML_PARSER_EOF:
1.140 daniel 7413: /*
7414: * Document parsing is done !
7415: */
7416: goto done;
7417: case XML_PARSER_START:
7418: /*
7419: * Very first chars read from the document flow.
7420: */
1.184 daniel 7421: cur = ctxt->input->cur[0];
1.140 daniel 7422: if (IS_BLANK(cur)) {
7423: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7424: ctxt->sax->setDocumentLocator(ctxt->userData,
7425: &xmlDefaultSAXLocator);
1.230 veillard 7426: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.140 daniel 7427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7428: ctxt->sax->error(ctxt->userData,
7429: "Extra spaces at the beginning of the document are not allowed\n");
7430: ctxt->wellFormed = 0;
1.180 daniel 7431: ctxt->disableSAX = 1;
1.140 daniel 7432: SKIP_BLANKS;
7433: ret++;
1.184 daniel 7434: if (ctxt->input->buf == NULL)
7435: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7436: else
1.184 daniel 7437: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7438: }
7439: if (avail < 2)
7440: goto done;
7441:
1.184 daniel 7442: cur = ctxt->input->cur[0];
7443: next = ctxt->input->cur[1];
1.140 daniel 7444: if (cur == 0) {
7445: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7446: ctxt->sax->setDocumentLocator(ctxt->userData,
7447: &xmlDefaultSAXLocator);
1.230 veillard 7448: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7450: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7451: ctxt->wellFormed = 0;
1.180 daniel 7452: ctxt->disableSAX = 1;
1.140 daniel 7453: ctxt->instate = XML_PARSER_EOF;
7454: #ifdef DEBUG_PUSH
1.241 veillard 7455: xmlGenericError(xmlGenericErrorContext,
7456: "PP: entering EOF\n");
1.140 daniel 7457: #endif
7458: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7459: ctxt->sax->endDocument(ctxt->userData);
7460: goto done;
7461: }
7462: if ((cur == '<') && (next == '?')) {
7463: /* PI or XML decl */
7464: if (avail < 5) return(ret);
1.143 daniel 7465: if ((!terminate) &&
7466: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7467: return(ret);
7468: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7469: ctxt->sax->setDocumentLocator(ctxt->userData,
7470: &xmlDefaultSAXLocator);
1.184 daniel 7471: if ((ctxt->input->cur[2] == 'x') &&
7472: (ctxt->input->cur[3] == 'm') &&
7473: (ctxt->input->cur[4] == 'l') &&
7474: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 7475: ret += 5;
7476: #ifdef DEBUG_PUSH
1.241 veillard 7477: xmlGenericError(xmlGenericErrorContext,
7478: "PP: Parsing XML Decl\n");
1.140 daniel 7479: #endif
7480: xmlParseXMLDecl(ctxt);
1.193 daniel 7481: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7482: /*
7483: * The XML REC instructs us to stop parsing right
7484: * here
7485: */
7486: ctxt->instate = XML_PARSER_EOF;
7487: return(0);
7488: }
1.167 daniel 7489: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7490: if ((ctxt->encoding == NULL) &&
7491: (ctxt->input->encoding != NULL))
7492: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 7493: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7494: (!ctxt->disableSAX))
1.140 daniel 7495: ctxt->sax->startDocument(ctxt->userData);
7496: ctxt->instate = XML_PARSER_MISC;
7497: #ifdef DEBUG_PUSH
1.241 veillard 7498: xmlGenericError(xmlGenericErrorContext,
7499: "PP: entering MISC\n");
1.140 daniel 7500: #endif
7501: } else {
7502: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7503: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7504: (!ctxt->disableSAX))
1.140 daniel 7505: ctxt->sax->startDocument(ctxt->userData);
7506: ctxt->instate = XML_PARSER_MISC;
7507: #ifdef DEBUG_PUSH
1.241 veillard 7508: xmlGenericError(xmlGenericErrorContext,
7509: "PP: entering MISC\n");
1.140 daniel 7510: #endif
7511: }
7512: } else {
7513: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7514: ctxt->sax->setDocumentLocator(ctxt->userData,
7515: &xmlDefaultSAXLocator);
7516: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7517: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7518: (!ctxt->disableSAX))
1.140 daniel 7519: ctxt->sax->startDocument(ctxt->userData);
7520: ctxt->instate = XML_PARSER_MISC;
7521: #ifdef DEBUG_PUSH
1.241 veillard 7522: xmlGenericError(xmlGenericErrorContext,
7523: "PP: entering MISC\n");
1.140 daniel 7524: #endif
7525: }
7526: break;
7527: case XML_PARSER_MISC:
7528: SKIP_BLANKS;
1.184 daniel 7529: if (ctxt->input->buf == NULL)
7530: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7531: else
1.184 daniel 7532: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7533: if (avail < 2)
7534: goto done;
1.184 daniel 7535: cur = ctxt->input->cur[0];
7536: next = ctxt->input->cur[1];
1.140 daniel 7537: if ((cur == '<') && (next == '?')) {
1.143 daniel 7538: if ((!terminate) &&
7539: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7540: goto done;
7541: #ifdef DEBUG_PUSH
1.241 veillard 7542: xmlGenericError(xmlGenericErrorContext,
7543: "PP: Parsing PI\n");
1.140 daniel 7544: #endif
7545: xmlParsePI(ctxt);
7546: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7547: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7548: if ((!terminate) &&
7549: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7550: goto done;
7551: #ifdef DEBUG_PUSH
1.241 veillard 7552: xmlGenericError(xmlGenericErrorContext,
7553: "PP: Parsing Comment\n");
1.140 daniel 7554: #endif
7555: xmlParseComment(ctxt);
7556: ctxt->instate = XML_PARSER_MISC;
7557: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7558: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7559: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7560: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7561: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 7562: if ((!terminate) &&
7563: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7564: goto done;
7565: #ifdef DEBUG_PUSH
1.241 veillard 7566: xmlGenericError(xmlGenericErrorContext,
7567: "PP: Parsing internal subset\n");
1.140 daniel 7568: #endif
1.166 daniel 7569: ctxt->inSubset = 1;
1.140 daniel 7570: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7571: if (RAW == '[') {
1.140 daniel 7572: ctxt->instate = XML_PARSER_DTD;
7573: #ifdef DEBUG_PUSH
1.241 veillard 7574: xmlGenericError(xmlGenericErrorContext,
7575: "PP: entering DTD\n");
1.140 daniel 7576: #endif
7577: } else {
1.166 daniel 7578: /*
7579: * Create and update the external subset.
7580: */
7581: ctxt->inSubset = 2;
1.171 daniel 7582: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7583: (ctxt->sax->externalSubset != NULL))
7584: ctxt->sax->externalSubset(ctxt->userData,
7585: ctxt->intSubName, ctxt->extSubSystem,
7586: ctxt->extSubURI);
7587: ctxt->inSubset = 0;
1.140 daniel 7588: ctxt->instate = XML_PARSER_PROLOG;
7589: #ifdef DEBUG_PUSH
1.241 veillard 7590: xmlGenericError(xmlGenericErrorContext,
7591: "PP: entering PROLOG\n");
1.140 daniel 7592: #endif
7593: }
7594: } else if ((cur == '<') && (next == '!') &&
7595: (avail < 9)) {
7596: goto done;
7597: } else {
7598: ctxt->instate = XML_PARSER_START_TAG;
7599: #ifdef DEBUG_PUSH
1.241 veillard 7600: xmlGenericError(xmlGenericErrorContext,
7601: "PP: entering START_TAG\n");
1.140 daniel 7602: #endif
7603: }
7604: break;
1.245 veillard 7605: case XML_PARSER_IGNORE:
7606: xmlGenericError(xmlGenericErrorContext,
7607: "PP: internal error, state == IGNORE");
7608: ctxt->instate = XML_PARSER_DTD;
7609: #ifdef DEBUG_PUSH
7610: xmlGenericError(xmlGenericErrorContext,
7611: "PP: entering DTD\n");
7612: #endif
7613: break;
1.128 daniel 7614: case XML_PARSER_PROLOG:
1.140 daniel 7615: SKIP_BLANKS;
1.184 daniel 7616: if (ctxt->input->buf == NULL)
7617: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7618: else
1.184 daniel 7619: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7620: if (avail < 2)
7621: goto done;
1.184 daniel 7622: cur = ctxt->input->cur[0];
7623: next = ctxt->input->cur[1];
1.140 daniel 7624: if ((cur == '<') && (next == '?')) {
1.143 daniel 7625: if ((!terminate) &&
7626: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7627: goto done;
7628: #ifdef DEBUG_PUSH
1.241 veillard 7629: xmlGenericError(xmlGenericErrorContext,
7630: "PP: Parsing PI\n");
1.140 daniel 7631: #endif
7632: xmlParsePI(ctxt);
7633: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7634: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7635: if ((!terminate) &&
7636: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7637: goto done;
7638: #ifdef DEBUG_PUSH
1.241 veillard 7639: xmlGenericError(xmlGenericErrorContext,
7640: "PP: Parsing Comment\n");
1.140 daniel 7641: #endif
7642: xmlParseComment(ctxt);
7643: ctxt->instate = XML_PARSER_PROLOG;
7644: } else if ((cur == '<') && (next == '!') &&
7645: (avail < 4)) {
7646: goto done;
7647: } else {
7648: ctxt->instate = XML_PARSER_START_TAG;
7649: #ifdef DEBUG_PUSH
1.241 veillard 7650: xmlGenericError(xmlGenericErrorContext,
7651: "PP: entering START_TAG\n");
1.140 daniel 7652: #endif
7653: }
7654: break;
7655: case XML_PARSER_EPILOG:
7656: SKIP_BLANKS;
1.184 daniel 7657: if (ctxt->input->buf == NULL)
7658: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7659: else
1.184 daniel 7660: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7661: if (avail < 2)
7662: goto done;
1.184 daniel 7663: cur = ctxt->input->cur[0];
7664: next = ctxt->input->cur[1];
1.140 daniel 7665: if ((cur == '<') && (next == '?')) {
1.143 daniel 7666: if ((!terminate) &&
7667: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7668: goto done;
7669: #ifdef DEBUG_PUSH
1.241 veillard 7670: xmlGenericError(xmlGenericErrorContext,
7671: "PP: Parsing PI\n");
1.140 daniel 7672: #endif
7673: xmlParsePI(ctxt);
7674: ctxt->instate = XML_PARSER_EPILOG;
7675: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7676: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7677: if ((!terminate) &&
7678: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7679: goto done;
7680: #ifdef DEBUG_PUSH
1.241 veillard 7681: xmlGenericError(xmlGenericErrorContext,
7682: "PP: Parsing Comment\n");
1.140 daniel 7683: #endif
7684: xmlParseComment(ctxt);
7685: ctxt->instate = XML_PARSER_EPILOG;
7686: } else if ((cur == '<') && (next == '!') &&
7687: (avail < 4)) {
7688: goto done;
7689: } else {
1.230 veillard 7690: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7692: ctxt->sax->error(ctxt->userData,
7693: "Extra content at the end of the document\n");
7694: ctxt->wellFormed = 0;
1.180 daniel 7695: ctxt->disableSAX = 1;
1.140 daniel 7696: ctxt->instate = XML_PARSER_EOF;
7697: #ifdef DEBUG_PUSH
1.241 veillard 7698: xmlGenericError(xmlGenericErrorContext,
7699: "PP: entering EOF\n");
1.140 daniel 7700: #endif
1.171 daniel 7701: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7702: (!ctxt->disableSAX))
1.140 daniel 7703: ctxt->sax->endDocument(ctxt->userData);
7704: goto done;
7705: }
7706: break;
7707: case XML_PARSER_START_TAG: {
7708: xmlChar *name, *oldname;
7709:
1.184 daniel 7710: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7711: goto done;
1.184 daniel 7712: cur = ctxt->input->cur[0];
1.140 daniel 7713: if (cur != '<') {
1.230 veillard 7714: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7715: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7716: ctxt->sax->error(ctxt->userData,
7717: "Start tag expect, '<' not found\n");
7718: ctxt->wellFormed = 0;
1.180 daniel 7719: ctxt->disableSAX = 1;
1.140 daniel 7720: ctxt->instate = XML_PARSER_EOF;
7721: #ifdef DEBUG_PUSH
1.241 veillard 7722: xmlGenericError(xmlGenericErrorContext,
7723: "PP: entering EOF\n");
1.140 daniel 7724: #endif
1.171 daniel 7725: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7726: (!ctxt->disableSAX))
1.140 daniel 7727: ctxt->sax->endDocument(ctxt->userData);
7728: goto done;
7729: }
1.143 daniel 7730: if ((!terminate) &&
7731: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7732: goto done;
1.176 daniel 7733: if (ctxt->spaceNr == 0)
7734: spacePush(ctxt, -1);
7735: else
7736: spacePush(ctxt, *ctxt->space);
1.140 daniel 7737: name = xmlParseStartTag(ctxt);
7738: if (name == NULL) {
1.176 daniel 7739: spacePop(ctxt);
1.140 daniel 7740: ctxt->instate = XML_PARSER_EOF;
7741: #ifdef DEBUG_PUSH
1.241 veillard 7742: xmlGenericError(xmlGenericErrorContext,
7743: "PP: entering EOF\n");
1.140 daniel 7744: #endif
1.171 daniel 7745: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7746: (!ctxt->disableSAX))
1.140 daniel 7747: ctxt->sax->endDocument(ctxt->userData);
7748: goto done;
7749: }
7750: namePush(ctxt, xmlStrdup(name));
7751:
7752: /*
7753: * [ VC: Root Element Type ]
7754: * The Name in the document type declaration must match
7755: * the element type of the root element.
7756: */
7757: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7758: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7759: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7760:
7761: /*
7762: * Check for an Empty Element.
7763: */
1.152 daniel 7764: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7765: SKIP(2);
1.171 daniel 7766: if ((ctxt->sax != NULL) &&
7767: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 7768: ctxt->sax->endElement(ctxt->userData, name);
7769: xmlFree(name);
7770: oldname = namePop(ctxt);
1.176 daniel 7771: spacePop(ctxt);
1.140 daniel 7772: if (oldname != NULL) {
7773: #ifdef DEBUG_STACK
1.241 veillard 7774: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 7775: #endif
7776: xmlFree(oldname);
7777: }
7778: if (ctxt->name == NULL) {
7779: ctxt->instate = XML_PARSER_EPILOG;
7780: #ifdef DEBUG_PUSH
1.241 veillard 7781: xmlGenericError(xmlGenericErrorContext,
7782: "PP: entering EPILOG\n");
1.140 daniel 7783: #endif
7784: } else {
7785: ctxt->instate = XML_PARSER_CONTENT;
7786: #ifdef DEBUG_PUSH
1.241 veillard 7787: xmlGenericError(xmlGenericErrorContext,
7788: "PP: entering CONTENT\n");
1.140 daniel 7789: #endif
7790: }
7791: break;
7792: }
1.152 daniel 7793: if (RAW == '>') {
1.140 daniel 7794: NEXT;
7795: } else {
1.230 veillard 7796: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.140 daniel 7797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7798: ctxt->sax->error(ctxt->userData,
7799: "Couldn't find end of Start Tag %s\n",
7800: name);
7801: ctxt->wellFormed = 0;
1.180 daniel 7802: ctxt->disableSAX = 1;
1.140 daniel 7803:
7804: /*
7805: * end of parsing of this node.
7806: */
7807: nodePop(ctxt);
7808: oldname = namePop(ctxt);
1.176 daniel 7809: spacePop(ctxt);
1.140 daniel 7810: if (oldname != NULL) {
7811: #ifdef DEBUG_STACK
1.241 veillard 7812: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 7813: #endif
7814: xmlFree(oldname);
7815: }
7816: }
7817: xmlFree(name);
7818: ctxt->instate = XML_PARSER_CONTENT;
7819: #ifdef DEBUG_PUSH
1.241 veillard 7820: xmlGenericError(xmlGenericErrorContext,
7821: "PP: entering CONTENT\n");
1.140 daniel 7822: #endif
7823: break;
7824: }
1.224 veillard 7825: case XML_PARSER_CONTENT: {
7826: const xmlChar *test;
7827: int cons;
7828: xmlChar tok;
7829:
1.140 daniel 7830: /*
7831: * Handle preparsed entities and charRef
7832: */
7833: if (ctxt->token != 0) {
7834: xmlChar cur[2] = { 0 , 0 } ;
7835:
7836: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 7837: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7838: (ctxt->sax->characters != NULL))
1.140 daniel 7839: ctxt->sax->characters(ctxt->userData, cur, 1);
7840: ctxt->token = 0;
7841: }
1.184 daniel 7842: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7843: goto done;
1.184 daniel 7844: cur = ctxt->input->cur[0];
7845: next = ctxt->input->cur[1];
1.224 veillard 7846:
7847: test = CUR_PTR;
7848: cons = ctxt->input->consumed;
7849: tok = ctxt->token;
1.140 daniel 7850: if ((cur == '<') && (next == '?')) {
1.143 daniel 7851: if ((!terminate) &&
7852: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7853: goto done;
7854: #ifdef DEBUG_PUSH
1.241 veillard 7855: xmlGenericError(xmlGenericErrorContext,
7856: "PP: Parsing PI\n");
1.140 daniel 7857: #endif
7858: xmlParsePI(ctxt);
7859: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7860: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7861: if ((!terminate) &&
7862: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7863: goto done;
7864: #ifdef DEBUG_PUSH
1.241 veillard 7865: xmlGenericError(xmlGenericErrorContext,
7866: "PP: Parsing Comment\n");
1.140 daniel 7867: #endif
7868: xmlParseComment(ctxt);
7869: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 7870: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7871: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7872: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7873: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7874: (ctxt->input->cur[8] == '[')) {
1.140 daniel 7875: SKIP(9);
7876: ctxt->instate = XML_PARSER_CDATA_SECTION;
7877: #ifdef DEBUG_PUSH
1.241 veillard 7878: xmlGenericError(xmlGenericErrorContext,
7879: "PP: entering CDATA_SECTION\n");
1.140 daniel 7880: #endif
7881: break;
7882: } else if ((cur == '<') && (next == '!') &&
7883: (avail < 9)) {
7884: goto done;
7885: } else if ((cur == '<') && (next == '/')) {
7886: ctxt->instate = XML_PARSER_END_TAG;
7887: #ifdef DEBUG_PUSH
1.241 veillard 7888: xmlGenericError(xmlGenericErrorContext,
7889: "PP: entering END_TAG\n");
1.140 daniel 7890: #endif
7891: break;
7892: } else if (cur == '<') {
7893: ctxt->instate = XML_PARSER_START_TAG;
7894: #ifdef DEBUG_PUSH
1.241 veillard 7895: xmlGenericError(xmlGenericErrorContext,
7896: "PP: entering START_TAG\n");
1.140 daniel 7897: #endif
7898: break;
7899: } else if (cur == '&') {
1.143 daniel 7900: if ((!terminate) &&
7901: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 7902: goto done;
7903: #ifdef DEBUG_PUSH
1.241 veillard 7904: xmlGenericError(xmlGenericErrorContext,
7905: "PP: Parsing Reference\n");
1.140 daniel 7906: #endif
7907: xmlParseReference(ctxt);
7908: } else {
1.156 daniel 7909: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 7910: /*
1.181 daniel 7911: * Goal of the following test is:
1.140 daniel 7912: * - minimize calls to the SAX 'character' callback
7913: * when they are mergeable
7914: * - handle an problem for isBlank when we only parse
7915: * a sequence of blank chars and the next one is
7916: * not available to check against '<' presence.
7917: * - tries to homogenize the differences in SAX
7918: * callbacks beween the push and pull versions
7919: * of the parser.
7920: */
7921: if ((ctxt->inputNr == 1) &&
7922: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 7923: if ((!terminate) &&
7924: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 7925: goto done;
7926: }
7927: ctxt->checkIndex = 0;
7928: #ifdef DEBUG_PUSH
1.241 veillard 7929: xmlGenericError(xmlGenericErrorContext,
7930: "PP: Parsing char data\n");
1.140 daniel 7931: #endif
7932: xmlParseCharData(ctxt, 0);
7933: }
7934: /*
7935: * Pop-up of finished entities.
7936: */
1.152 daniel 7937: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7938: xmlPopInput(ctxt);
1.224 veillard 7939: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7940: (tok == ctxt->token)) {
1.230 veillard 7941: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.224 veillard 7942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7943: ctxt->sax->error(ctxt->userData,
7944: "detected an error in element content\n");
7945: ctxt->wellFormed = 0;
7946: ctxt->disableSAX = 1;
7947: ctxt->instate = XML_PARSER_EOF;
7948: break;
7949: }
1.140 daniel 7950: break;
1.224 veillard 7951: }
1.140 daniel 7952: case XML_PARSER_CDATA_SECTION: {
7953: /*
7954: * The Push mode need to have the SAX callback for
7955: * cdataBlock merge back contiguous callbacks.
7956: */
7957: int base;
7958:
7959: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7960: if (base < 0) {
7961: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 7962: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 7963: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 7964: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 7965: XML_PARSER_BIG_BUFFER_SIZE);
7966: }
7967: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7968: ctxt->checkIndex = 0;
7969: }
7970: goto done;
7971: } else {
1.171 daniel 7972: if ((ctxt->sax != NULL) && (base > 0) &&
7973: (!ctxt->disableSAX)) {
1.140 daniel 7974: if (ctxt->sax->cdataBlock != NULL)
7975: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 7976: ctxt->input->cur, base);
1.140 daniel 7977: }
7978: SKIP(base + 3);
7979: ctxt->checkIndex = 0;
7980: ctxt->instate = XML_PARSER_CONTENT;
7981: #ifdef DEBUG_PUSH
1.241 veillard 7982: xmlGenericError(xmlGenericErrorContext,
7983: "PP: entering CONTENT\n");
1.140 daniel 7984: #endif
7985: }
7986: break;
7987: }
1.141 daniel 7988: case XML_PARSER_END_TAG:
1.140 daniel 7989: if (avail < 2)
7990: goto done;
1.143 daniel 7991: if ((!terminate) &&
7992: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7993: goto done;
7994: xmlParseEndTag(ctxt);
7995: if (ctxt->name == NULL) {
7996: ctxt->instate = XML_PARSER_EPILOG;
7997: #ifdef DEBUG_PUSH
1.241 veillard 7998: xmlGenericError(xmlGenericErrorContext,
7999: "PP: entering EPILOG\n");
1.140 daniel 8000: #endif
8001: } else {
8002: ctxt->instate = XML_PARSER_CONTENT;
8003: #ifdef DEBUG_PUSH
1.241 veillard 8004: xmlGenericError(xmlGenericErrorContext,
8005: "PP: entering CONTENT\n");
1.140 daniel 8006: #endif
8007: }
8008: break;
8009: case XML_PARSER_DTD: {
8010: /*
8011: * Sorry but progressive parsing of the internal subset
8012: * is not expected to be supported. We first check that
8013: * the full content of the internal subset is available and
8014: * the parsing is launched only at that point.
8015: * Internal subset ends up with "']' S? '>'" in an unescaped
8016: * section and not in a ']]>' sequence which are conditional
8017: * sections (whoever argued to keep that crap in XML deserve
8018: * a place in hell !).
8019: */
8020: int base, i;
8021: xmlChar *buf;
8022: xmlChar quote = 0;
8023:
1.184 daniel 8024: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 8025: if (base < 0) return(0);
8026: if (ctxt->checkIndex > base)
8027: base = ctxt->checkIndex;
1.184 daniel 8028: buf = ctxt->input->buf->buffer->content;
1.202 daniel 8029: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8030: base++) {
1.140 daniel 8031: if (quote != 0) {
8032: if (buf[base] == quote)
8033: quote = 0;
8034: continue;
8035: }
8036: if (buf[base] == '"') {
8037: quote = '"';
8038: continue;
8039: }
8040: if (buf[base] == '\'') {
8041: quote = '\'';
8042: continue;
8043: }
8044: if (buf[base] == ']') {
1.202 daniel 8045: if ((unsigned int) base +1 >=
8046: ctxt->input->buf->buffer->use)
1.140 daniel 8047: break;
8048: if (buf[base + 1] == ']') {
8049: /* conditional crap, skip both ']' ! */
8050: base++;
8051: continue;
8052: }
1.202 daniel 8053: for (i = 0;
8054: (unsigned int) base + i < ctxt->input->buf->buffer->use;
8055: i++) {
1.140 daniel 8056: if (buf[base + i] == '>')
8057: goto found_end_int_subset;
8058: }
8059: break;
8060: }
8061: }
8062: /*
8063: * We didn't found the end of the Internal subset
8064: */
8065: if (quote == 0)
8066: ctxt->checkIndex = base;
8067: #ifdef DEBUG_PUSH
8068: if (next == 0)
1.241 veillard 8069: xmlGenericError(xmlGenericErrorContext,
8070: "PP: lookup of int subset end filed\n");
1.140 daniel 8071: #endif
8072: goto done;
8073:
8074: found_end_int_subset:
8075: xmlParseInternalSubset(ctxt);
1.166 daniel 8076: ctxt->inSubset = 2;
1.171 daniel 8077: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8078: (ctxt->sax->externalSubset != NULL))
8079: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8080: ctxt->extSubSystem, ctxt->extSubURI);
8081: ctxt->inSubset = 0;
1.140 daniel 8082: ctxt->instate = XML_PARSER_PROLOG;
8083: ctxt->checkIndex = 0;
8084: #ifdef DEBUG_PUSH
1.241 veillard 8085: xmlGenericError(xmlGenericErrorContext,
8086: "PP: entering PROLOG\n");
1.140 daniel 8087: #endif
8088: break;
8089: }
8090: case XML_PARSER_COMMENT:
1.241 veillard 8091: xmlGenericError(xmlGenericErrorContext,
8092: "PP: internal error, state == COMMENT\n");
1.140 daniel 8093: ctxt->instate = XML_PARSER_CONTENT;
8094: #ifdef DEBUG_PUSH
1.241 veillard 8095: xmlGenericError(xmlGenericErrorContext,
8096: "PP: entering CONTENT\n");
1.140 daniel 8097: #endif
8098: break;
8099: case XML_PARSER_PI:
1.241 veillard 8100: xmlGenericError(xmlGenericErrorContext,
8101: "PP: internal error, state == PI\n");
1.140 daniel 8102: ctxt->instate = XML_PARSER_CONTENT;
8103: #ifdef DEBUG_PUSH
1.241 veillard 8104: xmlGenericError(xmlGenericErrorContext,
8105: "PP: entering CONTENT\n");
1.140 daniel 8106: #endif
8107: break;
1.128 daniel 8108: case XML_PARSER_ENTITY_DECL:
1.241 veillard 8109: xmlGenericError(xmlGenericErrorContext,
8110: "PP: internal error, state == ENTITY_DECL\n");
1.140 daniel 8111: ctxt->instate = XML_PARSER_DTD;
8112: #ifdef DEBUG_PUSH
1.241 veillard 8113: xmlGenericError(xmlGenericErrorContext,
8114: "PP: entering DTD\n");
1.140 daniel 8115: #endif
8116: break;
1.128 daniel 8117: case XML_PARSER_ENTITY_VALUE:
1.241 veillard 8118: xmlGenericError(xmlGenericErrorContext,
8119: "PP: internal error, state == ENTITY_VALUE\n");
1.140 daniel 8120: ctxt->instate = XML_PARSER_CONTENT;
8121: #ifdef DEBUG_PUSH
1.241 veillard 8122: xmlGenericError(xmlGenericErrorContext,
8123: "PP: entering DTD\n");
1.140 daniel 8124: #endif
8125: break;
1.128 daniel 8126: case XML_PARSER_ATTRIBUTE_VALUE:
1.241 veillard 8127: xmlGenericError(xmlGenericErrorContext,
8128: "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8129: ctxt->instate = XML_PARSER_START_TAG;
8130: #ifdef DEBUG_PUSH
1.241 veillard 8131: xmlGenericError(xmlGenericErrorContext,
8132: "PP: entering START_TAG\n");
1.168 daniel 8133: #endif
8134: break;
8135: case XML_PARSER_SYSTEM_LITERAL:
1.241 veillard 8136: xmlGenericError(xmlGenericErrorContext,
8137: "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8138: ctxt->instate = XML_PARSER_START_TAG;
8139: #ifdef DEBUG_PUSH
1.241 veillard 8140: xmlGenericError(xmlGenericErrorContext,
8141: "PP: entering START_TAG\n");
1.140 daniel 8142: #endif
8143: break;
1.128 daniel 8144: }
8145: }
1.140 daniel 8146: done:
8147: #ifdef DEBUG_PUSH
1.241 veillard 8148: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
1.140 daniel 8149: #endif
1.128 daniel 8150: return(ret);
8151: }
8152:
8153: /**
1.143 daniel 8154: * xmlParseTry:
8155: * @ctxt: an XML parser context
8156: *
8157: * Try to progress on parsing
8158: *
8159: * Returns zero if no parsing was possible
8160: */
8161: int
8162: xmlParseTry(xmlParserCtxtPtr ctxt) {
8163: return(xmlParseTryOrFinish(ctxt, 0));
8164: }
8165:
8166: /**
1.128 daniel 8167: * xmlParseChunk:
8168: * @ctxt: an XML parser context
8169: * @chunk: an char array
8170: * @size: the size in byte of the chunk
8171: * @terminate: last chunk indicator
8172: *
8173: * Parse a Chunk of memory
8174: *
8175: * Returns zero if no error, the xmlParserErrors otherwise.
8176: */
1.140 daniel 8177: int
1.128 daniel 8178: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8179: int terminate) {
1.132 daniel 8180: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8181: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8182: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8183: int cur = ctxt->input->cur - ctxt->input->base;
8184:
1.132 daniel 8185: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8186: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8187: ctxt->input->cur = ctxt->input->base + cur;
8188: #ifdef DEBUG_PUSH
1.241 veillard 8189: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
1.140 daniel 8190: #endif
8191:
1.150 daniel 8192: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8193: xmlParseTryOrFinish(ctxt, terminate);
1.247 veillard 8194: } else if (ctxt->instate != XML_PARSER_EOF) {
8195: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8196: xmlParserInputBufferPtr in = ctxt->input->buf;
8197: if ((in->encoder != NULL) && (in->buffer != NULL) &&
8198: (in->raw != NULL)) {
8199: int nbchars;
8200:
8201: nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8202: if (nbchars < 0) {
8203: xmlGenericError(xmlGenericErrorContext,
8204: "xmlParseChunk: encoder error\n");
8205: return(XML_ERR_INVALID_ENCODING);
8206: }
8207: }
8208: }
8209: }
8210: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8211: if (terminate) {
1.151 daniel 8212: /*
8213: * Check for termination
8214: */
1.140 daniel 8215: if ((ctxt->instate != XML_PARSER_EOF) &&
8216: (ctxt->instate != XML_PARSER_EPILOG)) {
1.230 veillard 8217: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 8218: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8219: ctxt->sax->error(ctxt->userData,
8220: "Extra content at the end of the document\n");
8221: ctxt->wellFormed = 0;
1.180 daniel 8222: ctxt->disableSAX = 1;
1.140 daniel 8223: }
8224: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8225: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8226: (!ctxt->disableSAX))
1.140 daniel 8227: ctxt->sax->endDocument(ctxt->userData);
8228: }
8229: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8230: }
8231: return((xmlParserErrors) ctxt->errNo);
8232: }
8233:
8234: /************************************************************************
8235: * *
1.98 daniel 8236: * I/O front end functions to the parser *
8237: * *
8238: ************************************************************************/
1.201 daniel 8239:
8240: /**
1.229 veillard 8241: * xmlStopParser:
1.201 daniel 8242: * @ctxt: an XML parser context
8243: *
8244: * Blocks further parser processing
8245: */
8246: void
8247: xmlStopParser(xmlParserCtxtPtr ctxt) {
8248: ctxt->instate = XML_PARSER_EOF;
8249: if (ctxt->input != NULL)
8250: ctxt->input->cur = BAD_CAST"";
8251: }
1.98 daniel 8252:
1.50 daniel 8253: /**
1.181 daniel 8254: * xmlCreatePushParserCtxt:
1.140 daniel 8255: * @sax: a SAX handler
8256: * @user_data: The user data returned on SAX callbacks
8257: * @chunk: a pointer to an array of chars
8258: * @size: number of chars in the array
8259: * @filename: an optional file name or URI
8260: *
8261: * Create a parser context for using the XML parser in push mode
8262: * To allow content encoding detection, @size should be >= 4
8263: * The value of @filename is used for fetching external entities
8264: * and error/warning reports.
8265: *
8266: * Returns the new parser context or NULL
8267: */
8268: xmlParserCtxtPtr
8269: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8270: const char *chunk, int size, const char *filename) {
8271: xmlParserCtxtPtr ctxt;
8272: xmlParserInputPtr inputStream;
8273: xmlParserInputBufferPtr buf;
8274: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8275:
8276: /*
1.156 daniel 8277: * plug some encoding conversion routines
1.140 daniel 8278: */
8279: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8280: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8281:
8282: buf = xmlAllocParserInputBuffer(enc);
8283: if (buf == NULL) return(NULL);
8284:
8285: ctxt = xmlNewParserCtxt();
8286: if (ctxt == NULL) {
8287: xmlFree(buf);
8288: return(NULL);
8289: }
8290: if (sax != NULL) {
8291: if (ctxt->sax != &xmlDefaultSAXHandler)
8292: xmlFree(ctxt->sax);
8293: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8294: if (ctxt->sax == NULL) {
8295: xmlFree(buf);
8296: xmlFree(ctxt);
8297: return(NULL);
8298: }
8299: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8300: if (user_data != NULL)
8301: ctxt->userData = user_data;
8302: }
8303: if (filename == NULL) {
8304: ctxt->directory = NULL;
8305: } else {
8306: ctxt->directory = xmlParserGetDirectory(filename);
8307: }
8308:
8309: inputStream = xmlNewInputStream(ctxt);
8310: if (inputStream == NULL) {
8311: xmlFreeParserCtxt(ctxt);
8312: return(NULL);
8313: }
8314:
8315: if (filename == NULL)
8316: inputStream->filename = NULL;
8317: else
8318: inputStream->filename = xmlMemStrdup(filename);
8319: inputStream->buf = buf;
8320: inputStream->base = inputStream->buf->buffer->content;
8321: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8322: if (enc != XML_CHAR_ENCODING_NONE) {
8323: xmlSwitchEncoding(ctxt, enc);
8324: }
1.140 daniel 8325:
8326: inputPush(ctxt, inputStream);
8327:
8328: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8329: (ctxt->input->buf != NULL)) {
8330: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8331: #ifdef DEBUG_PUSH
1.241 veillard 8332: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
1.140 daniel 8333: #endif
8334: }
1.190 daniel 8335:
8336: return(ctxt);
8337: }
8338:
8339: /**
8340: * xmlCreateIOParserCtxt:
8341: * @sax: a SAX handler
8342: * @user_data: The user data returned on SAX callbacks
8343: * @ioread: an I/O read function
8344: * @ioclose: an I/O close function
8345: * @ioctx: an I/O handler
8346: * @enc: the charset encoding if known
8347: *
8348: * Create a parser context for using the XML parser with an existing
8349: * I/O stream
8350: *
8351: * Returns the new parser context or NULL
8352: */
8353: xmlParserCtxtPtr
8354: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8355: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8356: void *ioctx, xmlCharEncoding enc) {
8357: xmlParserCtxtPtr ctxt;
8358: xmlParserInputPtr inputStream;
8359: xmlParserInputBufferPtr buf;
8360:
8361: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8362: if (buf == NULL) return(NULL);
8363:
8364: ctxt = xmlNewParserCtxt();
8365: if (ctxt == NULL) {
8366: xmlFree(buf);
8367: return(NULL);
8368: }
8369: if (sax != NULL) {
8370: if (ctxt->sax != &xmlDefaultSAXHandler)
8371: xmlFree(ctxt->sax);
8372: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8373: if (ctxt->sax == NULL) {
8374: xmlFree(buf);
8375: xmlFree(ctxt);
8376: return(NULL);
8377: }
8378: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8379: if (user_data != NULL)
8380: ctxt->userData = user_data;
8381: }
8382:
1.229 veillard 8383: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8384: if (inputStream == NULL) {
8385: xmlFreeParserCtxt(ctxt);
8386: return(NULL);
1.74 daniel 8387: }
1.229 veillard 8388: inputPush(ctxt, inputStream);
1.69 daniel 8389:
1.229 veillard 8390: return(ctxt);
1.1 veillard 8391: }
8392:
1.229 veillard 8393: /************************************************************************
8394: * *
8395: * Front ends when parsing a Dtd *
8396: * *
8397: ************************************************************************/
1.76 daniel 8398:
8399: /**
1.242 veillard 8400: * xmlIOParseDTD:
8401: * @sax: the SAX handler block or NULL
8402: * @input: an Input Buffer
8403: * @enc: the charset encoding if known
8404: *
8405: * Load and parse a DTD
8406: *
8407: * Returns the resulting xmlDtdPtr or NULL in case of error.
1.243 veillard 8408: * @input will be freed at parsing end.
1.242 veillard 8409: */
8410:
8411: xmlDtdPtr
8412: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8413: xmlCharEncoding enc) {
8414: xmlDtdPtr ret = NULL;
8415: xmlParserCtxtPtr ctxt;
8416: xmlParserInputPtr pinput = NULL;
8417:
8418: if (input == NULL)
8419: return(NULL);
8420:
8421: ctxt = xmlNewParserCtxt();
8422: if (ctxt == NULL) {
8423: return(NULL);
8424: }
8425:
8426: /*
8427: * Set-up the SAX context
8428: */
8429: if (sax != NULL) {
8430: if (ctxt->sax != NULL)
8431: xmlFree(ctxt->sax);
8432: ctxt->sax = sax;
8433: ctxt->userData = NULL;
8434: }
8435:
8436: /*
8437: * generate a parser input from the I/O handler
8438: */
8439:
8440: pinput = xmlNewIOInputStream(ctxt, input, enc);
8441: if (pinput == NULL) {
8442: if (sax != NULL) ctxt->sax = NULL;
8443: xmlFreeParserCtxt(ctxt);
8444: return(NULL);
8445: }
8446:
8447: /*
8448: * plug some encoding conversion routines here.
8449: */
8450: xmlPushInput(ctxt, pinput);
8451:
8452: pinput->filename = NULL;
8453: pinput->line = 1;
8454: pinput->col = 1;
8455: pinput->base = ctxt->input->cur;
8456: pinput->cur = ctxt->input->cur;
8457: pinput->free = NULL;
8458:
8459: /*
8460: * let's parse that entity knowing it's an external subset.
8461: */
8462: ctxt->inSubset = 2;
8463: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8464: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8465: BAD_CAST "none", BAD_CAST "none");
8466: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8467:
8468: if (ctxt->myDoc != NULL) {
8469: if (ctxt->wellFormed) {
8470: ret = ctxt->myDoc->extSubset;
8471: ctxt->myDoc->extSubset = NULL;
8472: } else {
8473: ret = NULL;
8474: }
8475: xmlFreeDoc(ctxt->myDoc);
8476: ctxt->myDoc = NULL;
8477: }
8478: if (sax != NULL) ctxt->sax = NULL;
8479: xmlFreeParserCtxt(ctxt);
8480:
8481: return(ret);
8482: }
8483:
8484: /**
1.181 daniel 8485: * xmlSAXParseDTD:
1.76 daniel 8486: * @sax: the SAX handler block
8487: * @ExternalID: a NAME* containing the External ID of the DTD
8488: * @SystemID: a NAME* containing the URL to the DTD
8489: *
8490: * Load and parse an external subset.
8491: *
8492: * Returns the resulting xmlDtdPtr or NULL in case of error.
8493: */
8494:
8495: xmlDtdPtr
1.123 daniel 8496: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8497: const xmlChar *SystemID) {
1.76 daniel 8498: xmlDtdPtr ret = NULL;
8499: xmlParserCtxtPtr ctxt;
1.83 daniel 8500: xmlParserInputPtr input = NULL;
1.76 daniel 8501: xmlCharEncoding enc;
8502:
8503: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8504:
1.97 daniel 8505: ctxt = xmlNewParserCtxt();
1.76 daniel 8506: if (ctxt == NULL) {
8507: return(NULL);
8508: }
8509:
8510: /*
8511: * Set-up the SAX context
8512: */
8513: if (sax != NULL) {
1.93 veillard 8514: if (ctxt->sax != NULL)
1.119 daniel 8515: xmlFree(ctxt->sax);
1.76 daniel 8516: ctxt->sax = sax;
8517: ctxt->userData = NULL;
8518: }
8519:
8520: /*
8521: * Ask the Entity resolver to load the damn thing
8522: */
8523:
8524: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8525: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8526: if (input == NULL) {
1.86 daniel 8527: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8528: xmlFreeParserCtxt(ctxt);
8529: return(NULL);
8530: }
8531:
8532: /*
1.156 daniel 8533: * plug some encoding conversion routines here.
1.76 daniel 8534: */
8535: xmlPushInput(ctxt, input);
1.156 daniel 8536: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8537: xmlSwitchEncoding(ctxt, enc);
8538:
1.95 veillard 8539: if (input->filename == NULL)
1.156 daniel 8540: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8541: input->line = 1;
8542: input->col = 1;
8543: input->base = ctxt->input->cur;
8544: input->cur = ctxt->input->cur;
8545: input->free = NULL;
8546:
8547: /*
8548: * let's parse that entity knowing it's an external subset.
8549: */
1.191 daniel 8550: ctxt->inSubset = 2;
8551: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8552: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8553: ExternalID, SystemID);
1.79 daniel 8554: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8555:
8556: if (ctxt->myDoc != NULL) {
8557: if (ctxt->wellFormed) {
1.191 daniel 8558: ret = ctxt->myDoc->extSubset;
8559: ctxt->myDoc->extSubset = NULL;
1.76 daniel 8560: } else {
8561: ret = NULL;
8562: }
8563: xmlFreeDoc(ctxt->myDoc);
8564: ctxt->myDoc = NULL;
8565: }
1.86 daniel 8566: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8567: xmlFreeParserCtxt(ctxt);
8568:
8569: return(ret);
8570: }
8571:
8572: /**
1.181 daniel 8573: * xmlParseDTD:
1.76 daniel 8574: * @ExternalID: a NAME* containing the External ID of the DTD
8575: * @SystemID: a NAME* containing the URL to the DTD
8576: *
8577: * Load and parse an external subset.
8578: *
8579: * Returns the resulting xmlDtdPtr or NULL in case of error.
8580: */
8581:
8582: xmlDtdPtr
1.123 daniel 8583: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8584: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8585: }
8586:
1.229 veillard 8587: /************************************************************************
8588: * *
8589: * Front ends when parsing an Entity *
8590: * *
8591: ************************************************************************/
8592:
1.59 daniel 8593: /**
1.181 daniel 8594: * xmlSAXParseBalancedChunk:
1.144 daniel 8595: * @ctx: an XML parser context (possibly NULL)
8596: * @sax: the SAX handler bloc (possibly NULL)
8597: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8598: * @input: a parser input stream
8599: * @enc: the encoding
8600: *
8601: * Parse a well-balanced chunk of an XML document
8602: * The user has to provide SAX callback block whose routines will be
8603: * called by the parser
8604: * The allowed sequence for the Well Balanced Chunk is the one defined by
8605: * the content production in the XML grammar:
8606: *
8607: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8608: *
1.176 daniel 8609: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 8610: * the error code otherwise
8611: */
8612:
8613: int
8614: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8615: void *user_data, xmlParserInputPtr input,
8616: xmlCharEncoding enc) {
8617: xmlParserCtxtPtr ctxt;
8618: int ret;
8619:
8620: if (input == NULL) return(-1);
8621:
8622: if (ctx != NULL)
8623: ctxt = ctx;
8624: else {
8625: ctxt = xmlNewParserCtxt();
8626: if (ctxt == NULL)
8627: return(-1);
8628: if (sax == NULL)
8629: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8630: }
8631:
8632: /*
8633: * Set-up the SAX context
8634: */
8635: if (sax != NULL) {
8636: if (ctxt->sax != NULL)
8637: xmlFree(ctxt->sax);
8638: ctxt->sax = sax;
8639: ctxt->userData = user_data;
8640: }
8641:
8642: /*
8643: * plug some encoding conversion routines here.
8644: */
8645: xmlPushInput(ctxt, input);
8646: if (enc != XML_CHAR_ENCODING_NONE)
8647: xmlSwitchEncoding(ctxt, enc);
8648:
8649: /*
8650: * let's parse that entity knowing it's an external subset.
8651: */
8652: xmlParseContent(ctxt);
8653: ret = ctxt->errNo;
8654:
8655: if (ctx == NULL) {
8656: if (sax != NULL)
8657: ctxt->sax = NULL;
8658: else
8659: xmlFreeDoc(ctxt->myDoc);
8660: xmlFreeParserCtxt(ctxt);
8661: }
8662: return(ret);
8663: }
8664:
8665: /**
1.213 veillard 8666: * xmlParseCtxtExternalEntity:
8667: * @ctx: the existing parsing context
8668: * @URL: the URL for the entity to load
8669: * @ID: the System ID for the entity to load
8670: * @list: the return value for the set of parsed nodes
8671: *
8672: * Parse an external general entity within an existing parsing context
8673: * An external general parsed entity is well-formed if it matches the
8674: * production labeled extParsedEnt.
8675: *
8676: * [78] extParsedEnt ::= TextDecl? content
8677: *
8678: * Returns 0 if the entity is well formed, -1 in case of args problem and
8679: * the parser error code otherwise
8680: */
8681:
8682: int
8683: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8684: const xmlChar *ID, xmlNodePtr *list) {
8685: xmlParserCtxtPtr ctxt;
8686: xmlDocPtr newDoc;
8687: xmlSAXHandlerPtr oldsax = NULL;
8688: int ret = 0;
8689:
8690: if (ctx->depth > 40) {
8691: return(XML_ERR_ENTITY_LOOP);
8692: }
8693:
8694: if (list != NULL)
8695: *list = NULL;
8696: if ((URL == NULL) && (ID == NULL))
8697: return(-1);
8698: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8699: return(-1);
8700:
8701:
1.228 veillard 8702: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.213 veillard 8703: if (ctxt == NULL) return(-1);
8704: ctxt->userData = ctxt;
8705: oldsax = ctxt->sax;
8706: ctxt->sax = ctx->sax;
8707: newDoc = xmlNewDoc(BAD_CAST "1.0");
8708: if (newDoc == NULL) {
8709: xmlFreeParserCtxt(ctxt);
8710: return(-1);
8711: }
8712: if (ctx->myDoc != NULL) {
8713: newDoc->intSubset = ctx->myDoc->intSubset;
8714: newDoc->extSubset = ctx->myDoc->extSubset;
8715: }
8716: if (ctx->myDoc->URL != NULL) {
8717: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8718: }
8719: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8720: if (newDoc->children == NULL) {
8721: ctxt->sax = oldsax;
8722: xmlFreeParserCtxt(ctxt);
8723: newDoc->intSubset = NULL;
8724: newDoc->extSubset = NULL;
8725: xmlFreeDoc(newDoc);
8726: return(-1);
8727: }
8728: nodePush(ctxt, newDoc->children);
8729: if (ctx->myDoc == NULL) {
8730: ctxt->myDoc = newDoc;
8731: } else {
8732: ctxt->myDoc = ctx->myDoc;
8733: newDoc->children->doc = ctx->myDoc;
8734: }
8735:
8736: /*
8737: * Parse a possible text declaration first
8738: */
8739: GROW;
8740: if ((RAW == '<') && (NXT(1) == '?') &&
8741: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8742: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8743: xmlParseTextDecl(ctxt);
8744: }
8745:
8746: /*
8747: * Doing validity checking on chunk doesn't make sense
8748: */
8749: ctxt->instate = XML_PARSER_CONTENT;
8750: ctxt->validate = ctx->validate;
8751: ctxt->depth = ctx->depth + 1;
8752: ctxt->replaceEntities = ctx->replaceEntities;
8753: if (ctxt->validate) {
8754: ctxt->vctxt.error = ctx->vctxt.error;
8755: ctxt->vctxt.warning = ctx->vctxt.warning;
8756: /* Allocate the Node stack */
8757: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1.228 veillard 8758: if (ctxt->vctxt.nodeTab == NULL) {
1.241 veillard 8759: xmlGenericError(xmlGenericErrorContext,
8760: "xmlParseCtxtExternalEntity: out of memory\n");
1.228 veillard 8761: ctxt->validate = 0;
8762: ctxt->vctxt.error = NULL;
8763: ctxt->vctxt.warning = NULL;
8764: } else {
8765: ctxt->vctxt.nodeNr = 0;
8766: ctxt->vctxt.nodeMax = 4;
8767: ctxt->vctxt.node = NULL;
8768: }
1.213 veillard 8769: } else {
8770: ctxt->vctxt.error = NULL;
8771: ctxt->vctxt.warning = NULL;
8772: }
8773:
8774: xmlParseContent(ctxt);
8775:
8776: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8777: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8778: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8779: ctxt->sax->error(ctxt->userData,
8780: "chunk is not well balanced\n");
8781: ctxt->wellFormed = 0;
8782: ctxt->disableSAX = 1;
8783: } else if (RAW != 0) {
1.230 veillard 8784: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.213 veillard 8785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8786: ctxt->sax->error(ctxt->userData,
8787: "extra content at the end of well balanced chunk\n");
8788: ctxt->wellFormed = 0;
8789: ctxt->disableSAX = 1;
8790: }
8791: if (ctxt->node != newDoc->children) {
1.230 veillard 8792: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8793: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8794: ctxt->sax->error(ctxt->userData,
8795: "chunk is not well balanced\n");
8796: ctxt->wellFormed = 0;
8797: ctxt->disableSAX = 1;
8798: }
8799:
8800: if (!ctxt->wellFormed) {
8801: if (ctxt->errNo == 0)
8802: ret = 1;
8803: else
8804: ret = ctxt->errNo;
8805: } else {
8806: if (list != NULL) {
8807: xmlNodePtr cur;
8808:
8809: /*
8810: * Return the newly created nodeset after unlinking it from
8811: * they pseudo parent.
8812: */
8813: cur = newDoc->children->children;
8814: *list = cur;
8815: while (cur != NULL) {
8816: cur->parent = NULL;
8817: cur = cur->next;
8818: }
8819: newDoc->children->children = NULL;
8820: }
8821: ret = 0;
8822: }
8823: ctxt->sax = oldsax;
8824: xmlFreeParserCtxt(ctxt);
8825: newDoc->intSubset = NULL;
8826: newDoc->extSubset = NULL;
8827: xmlFreeDoc(newDoc);
8828:
8829: return(ret);
8830: }
8831:
8832: /**
1.181 daniel 8833: * xmlParseExternalEntity:
8834: * @doc: the document the chunk pertains to
8835: * @sax: the SAX handler bloc (possibly NULL)
8836: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8837: * @depth: Used for loop detection, use 0
1.181 daniel 8838: * @URL: the URL for the entity to load
8839: * @ID: the System ID for the entity to load
8840: * @list: the return value for the set of parsed nodes
8841: *
8842: * Parse an external general entity
8843: * An external general parsed entity is well-formed if it matches the
8844: * production labeled extParsedEnt.
8845: *
8846: * [78] extParsedEnt ::= TextDecl? content
8847: *
8848: * Returns 0 if the entity is well formed, -1 in case of args problem and
8849: * the parser error code otherwise
8850: */
8851:
8852: int
8853: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 8854: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 8855: xmlParserCtxtPtr ctxt;
8856: xmlDocPtr newDoc;
8857: xmlSAXHandlerPtr oldsax = NULL;
8858: int ret = 0;
8859:
1.185 daniel 8860: if (depth > 40) {
8861: return(XML_ERR_ENTITY_LOOP);
8862: }
8863:
8864:
1.181 daniel 8865:
8866: if (list != NULL)
8867: *list = NULL;
8868: if ((URL == NULL) && (ID == NULL))
1.213 veillard 8869: return(-1);
8870: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 8871: return(-1);
8872:
8873:
1.228 veillard 8874: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.181 daniel 8875: if (ctxt == NULL) return(-1);
8876: ctxt->userData = ctxt;
8877: if (sax != NULL) {
8878: oldsax = ctxt->sax;
8879: ctxt->sax = sax;
8880: if (user_data != NULL)
8881: ctxt->userData = user_data;
8882: }
8883: newDoc = xmlNewDoc(BAD_CAST "1.0");
8884: if (newDoc == NULL) {
8885: xmlFreeParserCtxt(ctxt);
8886: return(-1);
8887: }
8888: if (doc != NULL) {
8889: newDoc->intSubset = doc->intSubset;
8890: newDoc->extSubset = doc->extSubset;
8891: }
8892: if (doc->URL != NULL) {
8893: newDoc->URL = xmlStrdup(doc->URL);
8894: }
8895: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8896: if (newDoc->children == NULL) {
8897: if (sax != NULL)
8898: ctxt->sax = oldsax;
8899: xmlFreeParserCtxt(ctxt);
8900: newDoc->intSubset = NULL;
8901: newDoc->extSubset = NULL;
8902: xmlFreeDoc(newDoc);
8903: return(-1);
8904: }
8905: nodePush(ctxt, newDoc->children);
8906: if (doc == NULL) {
8907: ctxt->myDoc = newDoc;
8908: } else {
8909: ctxt->myDoc = doc;
8910: newDoc->children->doc = doc;
8911: }
8912:
8913: /*
8914: * Parse a possible text declaration first
8915: */
8916: GROW;
8917: if ((RAW == '<') && (NXT(1) == '?') &&
8918: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8919: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8920: xmlParseTextDecl(ctxt);
8921: }
8922:
8923: /*
8924: * Doing validity checking on chunk doesn't make sense
8925: */
8926: ctxt->instate = XML_PARSER_CONTENT;
8927: ctxt->validate = 0;
1.185 daniel 8928: ctxt->depth = depth;
1.181 daniel 8929:
8930: xmlParseContent(ctxt);
8931:
8932: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8933: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8935: ctxt->sax->error(ctxt->userData,
8936: "chunk is not well balanced\n");
8937: ctxt->wellFormed = 0;
8938: ctxt->disableSAX = 1;
8939: } else if (RAW != 0) {
1.230 veillard 8940: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.181 daniel 8941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8942: ctxt->sax->error(ctxt->userData,
8943: "extra content at the end of well balanced chunk\n");
8944: ctxt->wellFormed = 0;
8945: ctxt->disableSAX = 1;
8946: }
8947: if (ctxt->node != newDoc->children) {
1.230 veillard 8948: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8949: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8950: ctxt->sax->error(ctxt->userData,
8951: "chunk is not well balanced\n");
8952: ctxt->wellFormed = 0;
8953: ctxt->disableSAX = 1;
8954: }
8955:
8956: if (!ctxt->wellFormed) {
8957: if (ctxt->errNo == 0)
8958: ret = 1;
8959: else
8960: ret = ctxt->errNo;
8961: } else {
8962: if (list != NULL) {
8963: xmlNodePtr cur;
8964:
8965: /*
8966: * Return the newly created nodeset after unlinking it from
8967: * they pseudo parent.
8968: */
8969: cur = newDoc->children->children;
8970: *list = cur;
8971: while (cur != NULL) {
8972: cur->parent = NULL;
8973: cur = cur->next;
8974: }
8975: newDoc->children->children = NULL;
8976: }
8977: ret = 0;
8978: }
8979: if (sax != NULL)
8980: ctxt->sax = oldsax;
8981: xmlFreeParserCtxt(ctxt);
8982: newDoc->intSubset = NULL;
8983: newDoc->extSubset = NULL;
8984: xmlFreeDoc(newDoc);
8985:
8986: return(ret);
8987: }
8988:
8989: /**
8990: * xmlParseBalancedChunk:
1.176 daniel 8991: * @doc: the document the chunk pertains to
8992: * @sax: the SAX handler bloc (possibly NULL)
8993: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8994: * @depth: Used for loop detection, use 0
1.176 daniel 8995: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8996: * @list: the return value for the set of parsed nodes
8997: *
8998: * Parse a well-balanced chunk of an XML document
8999: * called by the parser
9000: * The allowed sequence for the Well Balanced Chunk is the one defined by
9001: * the content production in the XML grammar:
1.144 daniel 9002: *
1.175 daniel 9003: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9004: *
1.176 daniel 9005: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9006: * the parser error code otherwise
1.144 daniel 9007: */
9008:
1.175 daniel 9009: int
9010: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 9011: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 9012: xmlParserCtxtPtr ctxt;
1.175 daniel 9013: xmlDocPtr newDoc;
1.181 daniel 9014: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 9015: int size;
1.176 daniel 9016: int ret = 0;
1.175 daniel 9017:
1.185 daniel 9018: if (depth > 40) {
9019: return(XML_ERR_ENTITY_LOOP);
9020: }
9021:
1.175 daniel 9022:
1.176 daniel 9023: if (list != NULL)
9024: *list = NULL;
9025: if (string == NULL)
9026: return(-1);
9027:
9028: size = xmlStrlen(string);
9029:
1.183 daniel 9030: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 9031: if (ctxt == NULL) return(-1);
9032: ctxt->userData = ctxt;
1.175 daniel 9033: if (sax != NULL) {
1.176 daniel 9034: oldsax = ctxt->sax;
9035: ctxt->sax = sax;
9036: if (user_data != NULL)
9037: ctxt->userData = user_data;
1.175 daniel 9038: }
9039: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 9040: if (newDoc == NULL) {
9041: xmlFreeParserCtxt(ctxt);
9042: return(-1);
9043: }
1.175 daniel 9044: if (doc != NULL) {
9045: newDoc->intSubset = doc->intSubset;
9046: newDoc->extSubset = doc->extSubset;
9047: }
1.176 daniel 9048: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9049: if (newDoc->children == NULL) {
9050: if (sax != NULL)
9051: ctxt->sax = oldsax;
9052: xmlFreeParserCtxt(ctxt);
9053: newDoc->intSubset = NULL;
9054: newDoc->extSubset = NULL;
9055: xmlFreeDoc(newDoc);
9056: return(-1);
9057: }
9058: nodePush(ctxt, newDoc->children);
9059: if (doc == NULL) {
9060: ctxt->myDoc = newDoc;
9061: } else {
9062: ctxt->myDoc = doc;
9063: newDoc->children->doc = doc;
9064: }
9065: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 9066: ctxt->depth = depth;
1.176 daniel 9067:
9068: /*
9069: * Doing validity checking on chunk doesn't make sense
9070: */
9071: ctxt->validate = 0;
9072:
1.175 daniel 9073: xmlParseContent(ctxt);
1.176 daniel 9074:
9075: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 9076: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9077: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9078: ctxt->sax->error(ctxt->userData,
9079: "chunk is not well balanced\n");
9080: ctxt->wellFormed = 0;
1.180 daniel 9081: ctxt->disableSAX = 1;
1.176 daniel 9082: } else if (RAW != 0) {
1.230 veillard 9083: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.176 daniel 9084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9085: ctxt->sax->error(ctxt->userData,
9086: "extra content at the end of well balanced chunk\n");
9087: ctxt->wellFormed = 0;
1.180 daniel 9088: ctxt->disableSAX = 1;
1.176 daniel 9089: }
9090: if (ctxt->node != newDoc->children) {
1.230 veillard 9091: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9093: ctxt->sax->error(ctxt->userData,
9094: "chunk is not well balanced\n");
9095: ctxt->wellFormed = 0;
1.180 daniel 9096: ctxt->disableSAX = 1;
1.176 daniel 9097: }
1.175 daniel 9098:
1.176 daniel 9099: if (!ctxt->wellFormed) {
9100: if (ctxt->errNo == 0)
9101: ret = 1;
9102: else
9103: ret = ctxt->errNo;
9104: } else {
9105: if (list != NULL) {
9106: xmlNodePtr cur;
1.175 daniel 9107:
1.176 daniel 9108: /*
9109: * Return the newly created nodeset after unlinking it from
9110: * they pseudo parent.
9111: */
9112: cur = newDoc->children->children;
9113: *list = cur;
9114: while (cur != NULL) {
9115: cur->parent = NULL;
9116: cur = cur->next;
9117: }
9118: newDoc->children->children = NULL;
9119: }
9120: ret = 0;
1.175 daniel 9121: }
1.176 daniel 9122: if (sax != NULL)
9123: ctxt->sax = oldsax;
1.175 daniel 9124: xmlFreeParserCtxt(ctxt);
9125: newDoc->intSubset = NULL;
9126: newDoc->extSubset = NULL;
1.176 daniel 9127: xmlFreeDoc(newDoc);
1.175 daniel 9128:
1.176 daniel 9129: return(ret);
1.144 daniel 9130: }
9131:
9132: /**
1.229 veillard 9133: * xmlSAXParseEntity:
9134: * @sax: the SAX handler block
9135: * @filename: the filename
9136: *
9137: * parse an XML external entity out of context and build a tree.
9138: * It use the given SAX function block to handle the parsing callback.
9139: * If sax is NULL, fallback to the default DOM tree building routines.
9140: *
9141: * [78] extParsedEnt ::= TextDecl? content
9142: *
9143: * This correspond to a "Well Balanced" chunk
1.144 daniel 9144: *
1.229 veillard 9145: * Returns the resulting document tree
1.144 daniel 9146: */
9147:
1.229 veillard 9148: xmlDocPtr
9149: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9150: xmlDocPtr ret;
9151: xmlParserCtxtPtr ctxt;
9152: char *directory = NULL;
9153:
9154: ctxt = xmlCreateFileParserCtxt(filename);
9155: if (ctxt == NULL) {
9156: return(NULL);
9157: }
9158: if (sax != NULL) {
9159: if (ctxt->sax != NULL)
9160: xmlFree(ctxt->sax);
9161: ctxt->sax = sax;
9162: ctxt->userData = NULL;
9163: }
9164:
9165: if ((ctxt->directory == NULL) && (directory == NULL))
9166: directory = xmlParserGetDirectory(filename);
9167:
9168: xmlParseExtParsedEnt(ctxt);
9169:
9170: if (ctxt->wellFormed)
9171: ret = ctxt->myDoc;
9172: else {
9173: ret = NULL;
9174: xmlFreeDoc(ctxt->myDoc);
9175: ctxt->myDoc = NULL;
9176: }
9177: if (sax != NULL)
9178: ctxt->sax = NULL;
9179: xmlFreeParserCtxt(ctxt);
9180:
9181: return(ret);
1.144 daniel 9182: }
9183:
9184: /**
1.229 veillard 9185: * xmlParseEntity:
9186: * @filename: the filename
9187: *
9188: * parse an XML external entity out of context and build a tree.
9189: *
9190: * [78] extParsedEnt ::= TextDecl? content
9191: *
9192: * This correspond to a "Well Balanced" chunk
1.59 daniel 9193: *
1.68 daniel 9194: * Returns the resulting document tree
1.59 daniel 9195: */
9196:
1.69 daniel 9197: xmlDocPtr
1.229 veillard 9198: xmlParseEntity(const char *filename) {
9199: return(xmlSAXParseEntity(NULL, filename));
1.55 daniel 9200: }
9201:
9202: /**
1.181 daniel 9203: * xmlCreateEntityParserCtxt:
9204: * @URL: the entity URL
9205: * @ID: the entity PUBLIC ID
9206: * @base: a posible base for the target URI
9207: *
9208: * Create a parser context for an external entity
9209: * Automatic support for ZLIB/Compress compressed document is provided
9210: * by default if found at compile-time.
9211: *
9212: * Returns the new parser context or NULL
9213: */
9214: xmlParserCtxtPtr
9215: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9216: const xmlChar *base) {
9217: xmlParserCtxtPtr ctxt;
9218: xmlParserInputPtr inputStream;
9219: char *directory = NULL;
1.210 veillard 9220: xmlChar *uri;
9221:
1.181 daniel 9222: ctxt = xmlNewParserCtxt();
9223: if (ctxt == NULL) {
9224: return(NULL);
9225: }
9226:
1.210 veillard 9227: uri = xmlBuildURI(URL, base);
9228:
9229: if (uri == NULL) {
9230: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9231: if (inputStream == NULL) {
9232: xmlFreeParserCtxt(ctxt);
9233: return(NULL);
9234: }
9235:
9236: inputPush(ctxt, inputStream);
9237:
9238: if ((ctxt->directory == NULL) && (directory == NULL))
9239: directory = xmlParserGetDirectory((char *)URL);
9240: if ((ctxt->directory == NULL) && (directory != NULL))
9241: ctxt->directory = directory;
9242: } else {
9243: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9244: if (inputStream == NULL) {
1.248 veillard 9245: xmlFree(uri);
1.210 veillard 9246: xmlFreeParserCtxt(ctxt);
9247: return(NULL);
9248: }
1.181 daniel 9249:
1.210 veillard 9250: inputPush(ctxt, inputStream);
1.181 daniel 9251:
1.210 veillard 9252: if ((ctxt->directory == NULL) && (directory == NULL))
9253: directory = xmlParserGetDirectory((char *)uri);
9254: if ((ctxt->directory == NULL) && (directory != NULL))
9255: ctxt->directory = directory;
9256: xmlFree(uri);
9257: }
1.181 daniel 9258:
9259: return(ctxt);
9260: }
9261:
1.229 veillard 9262: /************************************************************************
9263: * *
9264: * Front ends when parsing from a file *
9265: * *
9266: ************************************************************************/
9267:
1.181 daniel 9268: /**
9269: * xmlCreateFileParserCtxt:
1.50 daniel 9270: * @filename: the filename
9271: *
1.69 daniel 9272: * Create a parser context for a file content.
9273: * Automatic support for ZLIB/Compress compressed document is provided
9274: * by default if found at compile-time.
1.50 daniel 9275: *
1.69 daniel 9276: * Returns the new parser context or NULL
1.9 httpng 9277: */
1.69 daniel 9278: xmlParserCtxtPtr
9279: xmlCreateFileParserCtxt(const char *filename)
9280: {
9281: xmlParserCtxtPtr ctxt;
1.40 daniel 9282: xmlParserInputPtr inputStream;
1.91 daniel 9283: xmlParserInputBufferPtr buf;
1.111 daniel 9284: char *directory = NULL;
1.9 httpng 9285:
1.91 daniel 9286: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.228 veillard 9287: if (buf == NULL) {
9288: return(NULL);
9289: }
1.9 httpng 9290:
1.97 daniel 9291: ctxt = xmlNewParserCtxt();
1.16 daniel 9292: if (ctxt == NULL) {
1.228 veillard 9293: if (xmlDefaultSAXHandler.error != NULL) {
9294: xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9295: }
1.16 daniel 9296: return(NULL);
9297: }
1.97 daniel 9298:
1.96 daniel 9299: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9300: if (inputStream == NULL) {
1.97 daniel 9301: xmlFreeParserCtxt(ctxt);
1.40 daniel 9302: return(NULL);
9303: }
9304:
1.119 daniel 9305: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9306: inputStream->buf = buf;
9307: inputStream->base = inputStream->buf->buffer->content;
9308: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9309:
1.40 daniel 9310: inputPush(ctxt, inputStream);
1.110 daniel 9311: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9312: directory = xmlParserGetDirectory(filename);
9313: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9314: ctxt->directory = directory;
1.106 daniel 9315:
1.69 daniel 9316: return(ctxt);
9317: }
9318:
9319: /**
1.181 daniel 9320: * xmlSAXParseFile:
1.69 daniel 9321: * @sax: the SAX handler block
9322: * @filename: the filename
9323: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9324: * documents
9325: *
9326: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9327: * compressed document is provided by default if found at compile-time.
9328: * It use the given SAX function block to handle the parsing callback.
9329: * If sax is NULL, fallback to the default DOM tree building routines.
9330: *
9331: * Returns the resulting document tree
9332: */
9333:
1.79 daniel 9334: xmlDocPtr
9335: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9336: int recovery) {
9337: xmlDocPtr ret;
1.229 veillard 9338: xmlParserCtxtPtr ctxt;
9339: char *directory = NULL;
9340:
9341: ctxt = xmlCreateFileParserCtxt(filename);
9342: if (ctxt == NULL) {
9343: return(NULL);
9344: }
9345: if (sax != NULL) {
9346: if (ctxt->sax != NULL)
9347: xmlFree(ctxt->sax);
9348: ctxt->sax = sax;
9349: ctxt->userData = NULL;
9350: }
9351:
9352: if ((ctxt->directory == NULL) && (directory == NULL))
9353: directory = xmlParserGetDirectory(filename);
9354: if ((ctxt->directory == NULL) && (directory != NULL))
9355: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9356:
9357: xmlParseDocument(ctxt);
9358:
9359: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9360: else {
9361: ret = NULL;
9362: xmlFreeDoc(ctxt->myDoc);
9363: ctxt->myDoc = NULL;
9364: }
9365: if (sax != NULL)
9366: ctxt->sax = NULL;
9367: xmlFreeParserCtxt(ctxt);
9368:
9369: return(ret);
9370: }
9371:
9372: /**
9373: * xmlRecoverDoc:
9374: * @cur: a pointer to an array of xmlChar
9375: *
9376: * parse an XML in-memory document and build a tree.
9377: * In the case the document is not Well Formed, a tree is built anyway
9378: *
9379: * Returns the resulting document tree
9380: */
9381:
9382: xmlDocPtr
9383: xmlRecoverDoc(xmlChar *cur) {
9384: return(xmlSAXParseDoc(NULL, cur, 1));
9385: }
9386:
9387: /**
9388: * xmlParseFile:
9389: * @filename: the filename
9390: *
9391: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9392: * compressed document is provided by default if found at compile-time.
9393: *
9394: * Returns the resulting document tree
9395: */
9396:
9397: xmlDocPtr
9398: xmlParseFile(const char *filename) {
9399: return(xmlSAXParseFile(NULL, filename, 0));
9400: }
9401:
9402: /**
9403: * xmlRecoverFile:
9404: * @filename: the filename
9405: *
9406: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9407: * compressed document is provided by default if found at compile-time.
9408: * In the case the document is not Well Formed, a tree is built anyway
9409: *
9410: * Returns the resulting document tree
9411: */
9412:
9413: xmlDocPtr
9414: xmlRecoverFile(const char *filename) {
9415: return(xmlSAXParseFile(NULL, filename, 1));
9416: }
9417:
9418:
9419: /**
9420: * xmlSetupParserForBuffer:
9421: * @ctxt: an XML parser context
9422: * @buffer: a xmlChar * buffer
9423: * @filename: a file name
9424: *
9425: * Setup the parser context to parse a new buffer; Clears any prior
9426: * contents from the parser context. The buffer parameter must not be
9427: * NULL, but the filename parameter can be
9428: */
9429: void
9430: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9431: const char* filename)
9432: {
9433: xmlParserInputPtr input;
9434:
9435: input = xmlNewInputStream(ctxt);
9436: if (input == NULL) {
9437: perror("malloc");
9438: xmlFree(ctxt);
9439: return;
9440: }
9441:
9442: xmlClearParserCtxt(ctxt);
9443: if (filename != NULL)
9444: input->filename = xmlMemStrdup(filename);
9445: input->base = buffer;
9446: input->cur = buffer;
9447: inputPush(ctxt, input);
9448: }
9449:
9450: /**
9451: * xmlSAXUserParseFile:
9452: * @sax: a SAX handler
9453: * @user_data: The user data returned on SAX callbacks
9454: * @filename: a file name
9455: *
9456: * parse an XML file and call the given SAX handler routines.
9457: * Automatic support for ZLIB/Compress compressed document is provided
9458: *
9459: * Returns 0 in case of success or a error number otherwise
9460: */
9461: int
9462: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9463: const char *filename) {
9464: int ret = 0;
9465: xmlParserCtxtPtr ctxt;
9466:
9467: ctxt = xmlCreateFileParserCtxt(filename);
9468: if (ctxt == NULL) return -1;
9469: if (ctxt->sax != &xmlDefaultSAXHandler)
9470: xmlFree(ctxt->sax);
9471: ctxt->sax = sax;
9472: if (user_data != NULL)
9473: ctxt->userData = user_data;
9474:
1.16 daniel 9475: xmlParseDocument(ctxt);
1.229 veillard 9476:
9477: if (ctxt->wellFormed)
9478: ret = 0;
1.59 daniel 9479: else {
1.229 veillard 9480: if (ctxt->errNo != 0)
9481: ret = ctxt->errNo;
9482: else
9483: ret = -1;
1.59 daniel 9484: }
1.86 daniel 9485: if (sax != NULL)
1.229 veillard 9486: ctxt->sax = NULL;
1.69 daniel 9487: xmlFreeParserCtxt(ctxt);
1.20 daniel 9488:
1.229 veillard 9489: return ret;
1.20 daniel 9490: }
9491:
1.229 veillard 9492: /************************************************************************
9493: * *
9494: * Front ends when parsing from memory *
9495: * *
9496: ************************************************************************/
1.32 daniel 9497:
1.50 daniel 9498: /**
1.181 daniel 9499: * xmlCreateMemoryParserCtxt:
1.229 veillard 9500: * @buffer: a pointer to a char array
9501: * @size: the size of the array
1.50 daniel 9502: *
1.69 daniel 9503: * Create a parser context for an XML in-memory document.
1.50 daniel 9504: *
1.69 daniel 9505: * Returns the new parser context or NULL
1.20 daniel 9506: */
1.69 daniel 9507: xmlParserCtxtPtr
9508: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9509: xmlParserCtxtPtr ctxt;
1.40 daniel 9510: xmlParserInputPtr input;
1.209 veillard 9511: xmlParserInputBufferPtr buf;
1.40 daniel 9512:
1.229 veillard 9513: if (buffer == NULL)
9514: return(NULL);
9515: if (size <= 0)
1.181 daniel 9516: return(NULL);
1.40 daniel 9517:
1.97 daniel 9518: ctxt = xmlNewParserCtxt();
1.181 daniel 9519: if (ctxt == NULL)
1.20 daniel 9520: return(NULL);
1.97 daniel 9521:
1.209 veillard 9522: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9523: if (buf == NULL) return(NULL);
9524:
1.96 daniel 9525: input = xmlNewInputStream(ctxt);
1.40 daniel 9526: if (input == NULL) {
1.97 daniel 9527: xmlFreeParserCtxt(ctxt);
1.40 daniel 9528: return(NULL);
9529: }
1.20 daniel 9530:
1.40 daniel 9531: input->filename = NULL;
1.209 veillard 9532: input->buf = buf;
9533: input->base = input->buf->buffer->content;
9534: input->cur = input->buf->buffer->content;
1.20 daniel 9535:
1.40 daniel 9536: inputPush(ctxt, input);
1.69 daniel 9537: return(ctxt);
9538: }
9539:
9540: /**
1.181 daniel 9541: * xmlSAXParseMemory:
1.69 daniel 9542: * @sax: the SAX handler block
9543: * @buffer: an pointer to a char array
1.127 daniel 9544: * @size: the size of the array
9545: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9546: * documents
9547: *
9548: * parse an XML in-memory block and use the given SAX function block
9549: * to handle the parsing callback. If sax is NULL, fallback to the default
9550: * DOM tree building routines.
9551: *
9552: * Returns the resulting document tree
9553: */
9554: xmlDocPtr
9555: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9556: xmlDocPtr ret;
9557: xmlParserCtxtPtr ctxt;
9558:
9559: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9560: if (ctxt == NULL) return(NULL);
1.74 daniel 9561: if (sax != NULL) {
9562: ctxt->sax = sax;
9563: ctxt->userData = NULL;
9564: }
1.20 daniel 9565:
9566: xmlParseDocument(ctxt);
1.40 daniel 9567:
1.72 daniel 9568: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9569: else {
9570: ret = NULL;
1.72 daniel 9571: xmlFreeDoc(ctxt->myDoc);
9572: ctxt->myDoc = NULL;
1.59 daniel 9573: }
1.86 daniel 9574: if (sax != NULL)
9575: ctxt->sax = NULL;
1.69 daniel 9576: xmlFreeParserCtxt(ctxt);
1.16 daniel 9577:
1.9 httpng 9578: return(ret);
1.17 daniel 9579: }
9580:
1.55 daniel 9581: /**
1.181 daniel 9582: * xmlParseMemory:
1.68 daniel 9583: * @buffer: an pointer to a char array
1.55 daniel 9584: * @size: the size of the array
9585: *
9586: * parse an XML in-memory block and build a tree.
9587: *
1.68 daniel 9588: * Returns the resulting document tree
1.55 daniel 9589: */
9590:
9591: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9592: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9593: }
9594:
9595: /**
1.181 daniel 9596: * xmlRecoverMemory:
1.68 daniel 9597: * @buffer: an pointer to a char array
1.59 daniel 9598: * @size: the size of the array
9599: *
9600: * parse an XML in-memory block and build a tree.
9601: * In the case the document is not Well Formed, a tree is built anyway
9602: *
1.68 daniel 9603: * Returns the resulting document tree
1.59 daniel 9604: */
9605:
9606: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9607: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9608: }
9609:
1.123 daniel 9610: /**
9611: * xmlSAXUserParseMemory:
9612: * @sax: a SAX handler
9613: * @user_data: The user data returned on SAX callbacks
9614: * @buffer: an in-memory XML document input
1.127 daniel 9615: * @size: the length of the XML document in bytes
1.123 daniel 9616: *
9617: * A better SAX parsing routine.
9618: * parse an XML in-memory buffer and call the given SAX handler routines.
9619: *
9620: * Returns 0 in case of success or a error number otherwise
9621: */
9622: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9623: char *buffer, int size) {
9624: int ret = 0;
9625: xmlParserCtxtPtr ctxt;
1.218 veillard 9626: xmlSAXHandlerPtr oldsax = NULL;
1.123 daniel 9627:
9628: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9629: if (ctxt == NULL) return -1;
1.216 veillard 9630: if (sax != NULL) {
9631: oldsax = ctxt->sax;
9632: ctxt->sax = sax;
9633: }
1.123 daniel 9634: ctxt->userData = user_data;
9635:
9636: xmlParseDocument(ctxt);
9637:
9638: if (ctxt->wellFormed)
9639: ret = 0;
9640: else {
9641: if (ctxt->errNo != 0)
9642: ret = ctxt->errNo;
9643: else
9644: ret = -1;
9645: }
1.216 veillard 9646: if (sax != NULL) {
9647: ctxt->sax = oldsax;
9648: }
1.123 daniel 9649: xmlFreeParserCtxt(ctxt);
9650:
9651: return ret;
9652: }
9653:
1.132 daniel 9654: /**
1.229 veillard 9655: * xmlCreateDocParserCtxt:
9656: * @cur: a pointer to an array of xmlChar
9657: *
9658: * Creates a parser context for an XML in-memory document.
1.132 daniel 9659: *
1.229 veillard 9660: * Returns the new parser context or NULL
1.132 daniel 9661: */
1.229 veillard 9662: xmlParserCtxtPtr
9663: xmlCreateDocParserCtxt(xmlChar *cur) {
9664: int len;
1.132 daniel 9665:
1.229 veillard 9666: if (cur == NULL)
9667: return(NULL);
9668: len = xmlStrlen(cur);
9669: return(xmlCreateMemoryParserCtxt((char *)cur, len));
1.132 daniel 9670: }
1.98 daniel 9671:
1.50 daniel 9672: /**
1.229 veillard 9673: * xmlSAXParseDoc:
9674: * @sax: the SAX handler block
9675: * @cur: a pointer to an array of xmlChar
9676: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9677: * documents
1.50 daniel 9678: *
1.229 veillard 9679: * parse an XML in-memory document and build a tree.
9680: * It use the given SAX function block to handle the parsing callback.
9681: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 9682: *
1.229 veillard 9683: * Returns the resulting document tree
1.32 daniel 9684: */
9685:
1.229 veillard 9686: xmlDocPtr
9687: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9688: xmlDocPtr ret;
9689: xmlParserCtxtPtr ctxt;
9690:
9691: if (cur == NULL) return(NULL);
1.32 daniel 9692:
9693:
1.229 veillard 9694: ctxt = xmlCreateDocParserCtxt(cur);
9695: if (ctxt == NULL) return(NULL);
9696: if (sax != NULL) {
9697: ctxt->sax = sax;
9698: ctxt->userData = NULL;
9699: }
1.32 daniel 9700:
1.229 veillard 9701: xmlParseDocument(ctxt);
9702: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9703: else {
9704: ret = NULL;
9705: xmlFreeDoc(ctxt->myDoc);
9706: ctxt->myDoc = NULL;
9707: }
9708: if (sax != NULL)
9709: ctxt->sax = NULL;
9710: xmlFreeParserCtxt(ctxt);
9711:
9712: return(ret);
1.32 daniel 9713: }
9714:
1.50 daniel 9715: /**
1.229 veillard 9716: * xmlParseDoc:
9717: * @cur: a pointer to an array of xmlChar
1.50 daniel 9718: *
1.229 veillard 9719: * parse an XML in-memory document and build a tree.
1.50 daniel 9720: *
1.229 veillard 9721: * Returns the resulting document tree
1.32 daniel 9722: */
9723:
1.229 veillard 9724: xmlDocPtr
9725: xmlParseDoc(xmlChar *cur) {
9726: return(xmlSAXParseDoc(NULL, cur, 0));
9727: }
1.32 daniel 9728:
9729:
1.229 veillard 9730: /************************************************************************
9731: * *
9732: * Miscellaneous *
9733: * *
9734: ************************************************************************/
1.32 daniel 9735:
1.237 veillard 9736: #ifdef LIBXML_XPATH_ENABLED
9737: #include <libxml/xpath.h>
9738: #endif
9739:
1.235 veillard 9740: static int xmlParserInitialized = 0;
9741:
9742: /**
9743: * xmlInitParser:
9744: *
9745: * Initialization function for the XML parser.
9746: * This is not reentrant. Call once before processing in case of
9747: * use in multithreaded programs.
9748: */
9749:
9750: void
9751: xmlInitParser(void) {
9752: if (xmlParserInitialized) return;
9753:
9754: xmlInitCharEncodingHandlers();
9755: xmlInitializePredefinedEntities();
9756: xmlDefaultSAXHandlerInit();
1.237 veillard 9757: xmlRegisterDefaultInputCallbacks();
9758: xmlRegisterDefaultOutputCallbacks();
1.235 veillard 9759: #ifdef LIBXML_HTML_ENABLED
9760: htmlInitAutoClose();
9761: htmlDefaultSAXHandlerInit();
1.237 veillard 9762: #endif
9763: #ifdef LIBXML_XPATH_ENABLED
9764: xmlXPathInit();
1.235 veillard 9765: #endif
9766: xmlParserInitialized = 1;
9767: }
9768:
1.50 daniel 9769: /**
1.229 veillard 9770: * xmlCleanupParser:
1.50 daniel 9771: *
1.229 veillard 9772: * Cleanup function for the XML parser. It tries to reclaim all
9773: * parsing related global memory allocated for the parser processing.
9774: * It doesn't deallocate any document related memory. Calling this
9775: * function should not prevent reusing the parser.
1.32 daniel 9776: */
1.229 veillard 9777:
1.55 daniel 9778: void
1.229 veillard 9779: xmlCleanupParser(void) {
1.235 veillard 9780: xmlParserInitialized = 0;
1.229 veillard 9781: xmlCleanupCharEncodingHandlers();
9782: xmlCleanupPredefinedEntities();
1.32 daniel 9783: }
1.220 veillard 9784:
9785: /**
9786: * xmlPedanticParserDefault:
9787: * @val: int 0 or 1
9788: *
9789: * Set and return the previous value for enabling pedantic warnings.
9790: *
9791: * Returns the last value for 0 for no substitution, 1 for substitution.
9792: */
9793:
9794: int
9795: xmlPedanticParserDefault(int val) {
9796: int old = xmlPedanticParserDefaultValue;
9797:
9798: xmlPedanticParserDefaultValue = val;
9799: return(old);
9800: }
1.98 daniel 9801:
9802: /**
1.181 daniel 9803: * xmlSubstituteEntitiesDefault:
1.98 daniel 9804: * @val: int 0 or 1
9805: *
9806: * Set and return the previous value for default entity support.
9807: * Initially the parser always keep entity references instead of substituting
9808: * entity values in the output. This function has to be used to change the
9809: * default parser behaviour
9810: * SAX::subtituteEntities() has to be used for changing that on a file by
9811: * file basis.
9812: *
9813: * Returns the last value for 0 for no substitution, 1 for substitution.
9814: */
9815:
9816: int
9817: xmlSubstituteEntitiesDefault(int val) {
9818: int old = xmlSubstituteEntitiesDefaultValue;
9819:
9820: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 9821: return(old);
9822: }
9823:
9824: /**
9825: * xmlKeepBlanksDefault:
9826: * @val: int 0 or 1
9827: *
9828: * Set and return the previous value for default blanks text nodes support.
9829: * The 1.x version of the parser used an heuristic to try to detect
9830: * ignorable white spaces. As a result the SAX callback was generating
9831: * ignorableWhitespace() callbacks instead of characters() one, and when
9832: * using the DOM output text nodes containing those blanks were not generated.
9833: * The 2.x and later version will switch to the XML standard way and
9834: * ignorableWhitespace() are only generated when running the parser in
9835: * validating mode and when the current element doesn't allow CDATA or
9836: * mixed content.
9837: * This function is provided as a way to force the standard behaviour
9838: * on 1.X libs and to switch back to the old mode for compatibility when
9839: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9840: * by using xmlIsBlankNode() commodity function to detect the "empty"
9841: * nodes generated.
9842: * This value also affect autogeneration of indentation when saving code
9843: * if blanks sections are kept, indentation is not generated.
9844: *
9845: * Returns the last value for 0 for no substitution, 1 for substitution.
9846: */
9847:
9848: int
9849: xmlKeepBlanksDefault(int val) {
9850: int old = xmlKeepBlanksDefaultValue;
9851:
9852: xmlKeepBlanksDefaultValue = val;
9853: xmlIndentTreeOutput = !val;
1.98 daniel 9854: return(old);
9855: }
1.77 daniel 9856:
Webmaster