Annotation of XML/parser.c, revision 1.246
1.1 veillard 1: /*
1.229 veillard 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
1.15 veillard 4: *
1.222 veillard 5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
1.229 veillard 13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscelaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAx callbacks or as standalones functions using a preparsed
26: * document.
27: *
1.15 veillard 28: * See Copyright for the status of this software.
29: *
1.60 daniel 30: * Daniel.Veillard@w3.org
1.246 ! veillard 31: *
! 32: * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
! 33: * and xmlDoValidityCheckingDefaultValue for VMS
1.1 veillard 34: */
35:
1.26 daniel 36: #ifdef WIN32
1.138 daniel 37: #include "win32config.h"
1.226 veillard 38: #define XML_DIR_SEP '\\'
1.26 daniel 39: #else
1.121 daniel 40: #include "config.h"
1.226 veillard 41: #define XML_DIR_SEP '/'
1.26 daniel 42: #endif
1.121 daniel 43:
1.1 veillard 44: #include <stdio.h>
1.238 veillard 45: #include <stdlib.h>
1.204 veillard 46: #include <string.h>
1.238 veillard 47: #include <libxml/xmlmemory.h>
48: #include <libxml/tree.h>
49: #include <libxml/parser.h>
50: #include <libxml/parserInternals.h>
51: #include <libxml/valid.h>
52: #include <libxml/entities.h>
53: #include <libxml/xmlerror.h>
54: #include <libxml/encoding.h>
55: #include <libxml/xmlIO.h>
56: #include <libxml/uri.h>
57:
1.121 daniel 58: #ifdef HAVE_CTYPE_H
1.1 veillard 59: #include <ctype.h>
1.121 daniel 60: #endif
61: #ifdef HAVE_STDLIB_H
1.50 daniel 62: #include <stdlib.h>
1.121 daniel 63: #endif
64: #ifdef HAVE_SYS_STAT_H
1.9 httpng 65: #include <sys/stat.h>
1.121 daniel 66: #endif
1.9 httpng 67: #ifdef HAVE_FCNTL_H
68: #include <fcntl.h>
69: #endif
1.10 httpng 70: #ifdef HAVE_UNISTD_H
71: #include <unistd.h>
72: #endif
1.20 daniel 73: #ifdef HAVE_ZLIB_H
74: #include <zlib.h>
75: #endif
1.1 veillard 76:
77:
1.140 daniel 78: #define XML_PARSER_BIG_BUFFER_SIZE 1000
79: #define XML_PARSER_BUFFER_SIZE 100
80:
1.229 veillard 81: /*
82: * Various global defaults for parsing
83: */
1.160 daniel 84: int xmlGetWarningsDefaultValue = 1;
1.220 veillard 85: int xmlParserDebugEntities = 0;
1.246 ! veillard 86: #ifdef VMS
! 87: int xmlSubstituteEntitiesDefaultVal = 0;
! 88: #define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
! 89: int xmlDoValidityCheckingDefaultVal = 0;
! 90: #define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
! 91: #else
1.229 veillard 92: int xmlSubstituteEntitiesDefaultValue = 0;
93: int xmlDoValidityCheckingDefaultValue = 0;
1.246 ! veillard 94: #endif
1.229 veillard 95: int xmlPedanticParserDefaultValue = 0;
96: int xmlKeepBlanksDefaultValue = 1;
1.86 daniel 97:
1.139 daniel 98: /*
99: * List of XML prefixed PI allowed by W3C specs
100: */
101:
102: const char *xmlW3CPIs[] = {
103: "xml-stylesheet",
104: NULL
105: };
1.91 daniel 106:
1.229 veillard 107: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
1.151 daniel 108: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
109: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
110: const xmlChar **str);
1.91 daniel 111:
112:
1.45 daniel 113: /************************************************************************
114: * *
115: * Parser stacks related functions and macros *
116: * *
117: ************************************************************************/
1.79 daniel 118:
1.135 daniel 119: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
120: const xmlChar ** str);
1.79 daniel 121:
1.1 veillard 122: /*
1.40 daniel 123: * Generic function for accessing stacks in the Parser Context
1.1 veillard 124: */
125:
1.140 daniel 126: #define PUSH_AND_POP(scope, type, name) \
127: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 128: if (ctxt->name##Nr >= ctxt->name##Max) { \
129: ctxt->name##Max *= 2; \
1.204 veillard 130: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 131: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
132: if (ctxt->name##Tab == NULL) { \
1.241 veillard 133: xmlGenericError(xmlGenericErrorContext, \
134: "realloc failed !\n"); \
1.145 daniel 135: return(0); \
1.31 daniel 136: } \
137: } \
1.40 daniel 138: ctxt->name##Tab[ctxt->name##Nr] = value; \
139: ctxt->name = value; \
140: return(ctxt->name##Nr++); \
1.31 daniel 141: } \
1.140 daniel 142: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 143: type ret; \
1.40 daniel 144: if (ctxt->name##Nr <= 0) return(0); \
145: ctxt->name##Nr--; \
1.50 daniel 146: if (ctxt->name##Nr > 0) \
147: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
148: else \
149: ctxt->name = NULL; \
1.69 daniel 150: ret = ctxt->name##Tab[ctxt->name##Nr]; \
151: ctxt->name##Tab[ctxt->name##Nr] = 0; \
152: return(ret); \
1.31 daniel 153: } \
154:
1.229 veillard 155: /*
156: * Those macros actually generate the functions
157: */
1.140 daniel 158: PUSH_AND_POP(extern, xmlParserInputPtr, input)
159: PUSH_AND_POP(extern, xmlNodePtr, node)
160: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 161:
1.176 daniel 162: int spacePush(xmlParserCtxtPtr ctxt, int val) {
163: if (ctxt->spaceNr >= ctxt->spaceMax) {
164: ctxt->spaceMax *= 2;
1.204 veillard 165: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 166: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
167: if (ctxt->spaceTab == NULL) {
1.241 veillard 168: xmlGenericError(xmlGenericErrorContext,
169: "realloc failed !\n");
1.176 daniel 170: return(0);
171: }
172: }
173: ctxt->spaceTab[ctxt->spaceNr] = val;
174: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
175: return(ctxt->spaceNr++);
176: }
177:
178: int spacePop(xmlParserCtxtPtr ctxt) {
179: int ret;
180: if (ctxt->spaceNr <= 0) return(0);
181: ctxt->spaceNr--;
182: if (ctxt->spaceNr > 0)
183: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
184: else
185: ctxt->space = NULL;
186: ret = ctxt->spaceTab[ctxt->spaceNr];
187: ctxt->spaceTab[ctxt->spaceNr] = -1;
188: return(ret);
189: }
190:
1.55 daniel 191: /*
192: * Macros for accessing the content. Those should be used only by the parser,
193: * and not exported.
194: *
1.229 veillard 195: * Dirty macros, i.e. one often need to make assumption on the context to
196: * use them
1.55 daniel 197: *
1.123 daniel 198: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 199: * To be used with extreme caution since operations consuming
200: * characters may move the input buffer to a different location !
1.123 daniel 201: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.151 daniel 202: * This should be used internally by the parser
1.55 daniel 203: * only to compare to ASCII values otherwise it would break when
204: * running with UTF-8 encoding.
1.229 veillard 205: * RAW same as CUR but in the input buffer, bypass any token
206: * extraction that may have been done
1.123 daniel 207: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 208: * to compare on ASCII based substring.
1.123 daniel 209: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 210: * strings within the parser.
211: *
1.77 daniel 212: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 213: *
214: * NEXT Skip to the next character, this does the proper decoding
215: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.229 veillard 216: * NEXTL(l) Skip l xmlChars in the input buffer
217: * CUR_CHAR(l) returns the current unicode character (int), set l
218: * to the number of xmlChars used for the encoding [0-5].
219: * CUR_SCHAR same but operate on a string instead of the context
220: * COPY_BUF copy the current unicode char to the target buffer, increment
221: * the index
222: * GROW, SHRINK handling of input buffers
1.55 daniel 223: */
1.45 daniel 224:
1.152 daniel 225: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 226: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 227: #define NXT(val) ctxt->input->cur[(val)]
228: #define CUR_PTR ctxt->input->cur
1.154 daniel 229:
1.240 veillard 230: #define SKIP(val) do { \
231: ctxt->nbChars += (val),ctxt->input->cur += (val); \
1.164 daniel 232: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 233: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
1.168 daniel 234: if ((*ctxt->input->cur == 0) && \
235: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 236: xmlPopInput(ctxt); \
237: } while (0)
1.164 daniel 238:
1.240 veillard 239: #define SHRINK do { \
240: xmlParserInputShrink(ctxt->input); \
1.97 daniel 241: if ((*ctxt->input->cur == 0) && \
242: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 243: xmlPopInput(ctxt); \
244: } while (0)
1.97 daniel 245:
1.240 veillard 246: #define GROW do { \
247: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.97 daniel 248: if ((*ctxt->input->cur == 0) && \
249: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 250: xmlPopInput(ctxt); \
251: } while (0)
1.55 daniel 252:
1.240 veillard 253: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1.154 daniel 254:
1.240 veillard 255: #define NEXT xmlNextChar(ctxt)
1.154 daniel 256:
1.240 veillard 257: #define NEXTL(l) do { \
1.153 daniel 258: if (*(ctxt->input->cur) == '\n') { \
259: ctxt->input->line++; ctxt->input->col = 1; \
260: } else ctxt->input->col++; \
1.154 daniel 261: ctxt->token = 0; ctxt->input->cur += l; \
262: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.240 veillard 263: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
264: } while (0)
1.154 daniel 265:
1.240 veillard 266: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
267: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1.154 daniel 268:
1.152 daniel 269: #define COPY_BUF(l,b,i,v) \
270: if (l == 1) b[i++] = (xmlChar) v; \
1.240 veillard 271: else i += xmlCopyChar(l,&b[i],v)
1.151 daniel 272:
273: /**
1.229 veillard 274: * xmlSkipBlankChars:
1.151 daniel 275: * @ctxt: the XML parser context
276: *
1.229 veillard 277: * skip all blanks character found at that point in the input streams.
278: * It pops up finished entities in the process if allowable at that point.
279: *
280: * Returns the number of space chars skipped
1.151 daniel 281: */
1.55 daniel 282:
1.229 veillard 283: int
284: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
285: int cur, res = 0;
1.201 daniel 286:
1.176 daniel 287: /*
1.229 veillard 288: * It's Okay to use CUR/NEXT here since all the blanks are on
289: * the ASCII range.
290: */
291: do {
292: cur = CUR;
293: while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
294: NEXT;
295: cur = CUR;
296: res++;
1.151 daniel 297: }
1.229 veillard 298: while ((cur == 0) && (ctxt->inputNr > 1) &&
299: (ctxt->instate != XML_PARSER_COMMENT)) {
1.168 daniel 300: xmlPopInput(ctxt);
1.229 veillard 301: cur = CUR;
302: }
1.222 veillard 303: /*
304: * Need to handle support of entities branching here
305: */
1.155 daniel 306: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1.229 veillard 307: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.222 veillard 308: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1.155 daniel 309: return(res);
1.152 daniel 310: }
311:
1.97 daniel 312: /************************************************************************
313: * *
1.229 veillard 314: * Commodity functions to handle entities *
1.97 daniel 315: * *
316: ************************************************************************/
1.40 daniel 317:
1.50 daniel 318: /**
319: * xmlPopInput:
320: * @ctxt: an XML parser context
321: *
1.40 daniel 322: * xmlPopInput: the current input pointed by ctxt->input came to an end
323: * pop it and return the next char.
1.45 daniel 324: *
1.123 daniel 325: * Returns the current xmlChar in the parser context
1.40 daniel 326: */
1.123 daniel 327: xmlChar
1.55 daniel 328: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 329: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.220 veillard 330: if (xmlParserDebugEntities)
1.241 veillard 331: xmlGenericError(xmlGenericErrorContext,
332: "Popping input %d\n", ctxt->inputNr);
1.69 daniel 333: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 334: if ((*ctxt->input->cur == 0) &&
335: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
336: return(xmlPopInput(ctxt));
1.40 daniel 337: return(CUR);
338: }
339:
1.50 daniel 340: /**
1.229 veillard 341: * xmlPushInput:
1.174 daniel 342: * @ctxt: an XML parser context
1.229 veillard 343: * @input: an XML parser input fragment (entity, XML fragment ...).
1.174 daniel 344: *
1.229 veillard 345: * xmlPushInput: switch to a new input stream which is stacked on top
346: * of the previous one(s).
1.174 daniel 347: */
1.229 veillard 348: void
349: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
350: if (input == NULL) return;
1.174 daniel 351:
1.229 veillard 352: if (xmlParserDebugEntities) {
353: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 354: xmlGenericError(xmlGenericErrorContext,
355: "%s(%d): ", ctxt->input->filename,
1.229 veillard 356: ctxt->input->line);
1.241 veillard 357: xmlGenericError(xmlGenericErrorContext,
358: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1.229 veillard 359: }
360: inputPush(ctxt, input);
361: GROW;
1.174 daniel 362: }
1.97 daniel 363:
364: /**
365: * xmlParseCharRef:
366: * @ctxt: an XML parser context
367: *
368: * parse Reference declarations
369: *
370: * [66] CharRef ::= '&#' [0-9]+ ';' |
371: * '&#x' [0-9a-fA-F]+ ';'
372: *
1.98 daniel 373: * [ WFC: Legal Character ]
374: * Characters referred to using character references must match the
375: * production for Char.
376: *
1.135 daniel 377: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 378: */
1.97 daniel 379: int
380: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
381: int val = 0;
1.222 veillard 382: int count = 0;
1.97 daniel 383:
1.111 daniel 384: if (ctxt->token != 0) {
385: val = ctxt->token;
386: ctxt->token = 0;
387: return(val);
388: }
1.222 veillard 389: /*
390: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
391: */
1.152 daniel 392: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 393: (NXT(2) == 'x')) {
394: SKIP(3);
1.222 veillard 395: GROW;
396: while (RAW != ';') { /* loop blocked by count */
397: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 398: val = val * 16 + (CUR - '0');
1.222 veillard 399: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1.97 daniel 400: val = val * 16 + (CUR - 'a') + 10;
1.222 veillard 401: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1.97 daniel 402: val = val * 16 + (CUR - 'A') + 10;
403: else {
1.123 daniel 404: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData,
407: "xmlParseCharRef: invalid hexadecimal value\n");
408: ctxt->wellFormed = 0;
1.180 daniel 409: ctxt->disableSAX = 1;
1.97 daniel 410: val = 0;
411: break;
412: }
413: NEXT;
1.222 veillard 414: count++;
1.97 daniel 415: }
1.164 daniel 416: if (RAW == ';') {
417: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
418: ctxt->nbChars ++;
419: ctxt->input->cur++;
420: }
1.152 daniel 421: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 422: SKIP(2);
1.222 veillard 423: GROW;
424: while (RAW != ';') { /* loop blocked by count */
425: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 426: val = val * 10 + (CUR - '0');
427: else {
1.123 daniel 428: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 429: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
430: ctxt->sax->error(ctxt->userData,
431: "xmlParseCharRef: invalid decimal value\n");
432: ctxt->wellFormed = 0;
1.180 daniel 433: ctxt->disableSAX = 1;
1.97 daniel 434: val = 0;
435: break;
436: }
437: NEXT;
1.222 veillard 438: count++;
1.97 daniel 439: }
1.164 daniel 440: if (RAW == ';') {
441: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
442: ctxt->nbChars ++;
443: ctxt->input->cur++;
444: }
1.97 daniel 445: } else {
1.123 daniel 446: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 448: ctxt->sax->error(ctxt->userData,
449: "xmlParseCharRef: invalid value\n");
1.97 daniel 450: ctxt->wellFormed = 0;
1.180 daniel 451: ctxt->disableSAX = 1;
1.97 daniel 452: }
1.229 veillard 453:
454: /*
455: * [ WFC: Legal Character ]
456: * Characters referred to using character references must match the
457: * production for Char.
458: */
459: if (IS_CHAR(val)) {
460: return(val);
461: } else {
462: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 464: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
465: val);
1.97 daniel 466: ctxt->wellFormed = 0;
1.180 daniel 467: ctxt->disableSAX = 1;
1.97 daniel 468: }
1.229 veillard 469: return(0);
470: }
471:
472: /**
473: * xmlParseStringCharRef:
474: * @ctxt: an XML parser context
475: * @str: a pointer to an index in the string
476: *
477: * parse Reference declarations, variant parsing from a string rather
478: * than an an input flow.
479: *
480: * [66] CharRef ::= '&#' [0-9]+ ';' |
481: * '&#x' [0-9a-fA-F]+ ';'
482: *
483: * [ WFC: Legal Character ]
484: * Characters referred to using character references must match the
485: * production for Char.
486: *
487: * Returns the value parsed (as an int), 0 in case of error, str will be
488: * updated to the current value of the index
489: */
490: int
491: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
492: const xmlChar *ptr;
493: xmlChar cur;
494: int val = 0;
1.98 daniel 495:
1.229 veillard 496: if ((str == NULL) || (*str == NULL)) return(0);
497: ptr = *str;
498: cur = *ptr;
499: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
500: ptr += 3;
501: cur = *ptr;
502: while (cur != ';') { /* Non input consuming loop */
503: if ((cur >= '0') && (cur <= '9'))
504: val = val * 16 + (cur - '0');
505: else if ((cur >= 'a') && (cur <= 'f'))
506: val = val * 16 + (cur - 'a') + 10;
507: else if ((cur >= 'A') && (cur <= 'F'))
508: val = val * 16 + (cur - 'A') + 10;
509: else {
510: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
512: ctxt->sax->error(ctxt->userData,
513: "xmlParseStringCharRef: invalid hexadecimal value\n");
514: ctxt->wellFormed = 0;
515: ctxt->disableSAX = 1;
516: val = 0;
517: break;
518: }
519: ptr++;
520: cur = *ptr;
521: }
522: if (cur == ';')
523: ptr++;
524: } else if ((cur == '&') && (ptr[1] == '#')){
525: ptr += 2;
526: cur = *ptr;
527: while (cur != ';') { /* Non input consuming loops */
528: if ((cur >= '0') && (cur <= '9'))
529: val = val * 10 + (cur - '0');
530: else {
531: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533: ctxt->sax->error(ctxt->userData,
534: "xmlParseStringCharRef: invalid decimal value\n");
535: ctxt->wellFormed = 0;
536: ctxt->disableSAX = 1;
537: val = 0;
538: break;
539: }
540: ptr++;
541: cur = *ptr;
542: }
543: if (cur == ';')
544: ptr++;
545: } else {
546: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 548: ctxt->sax->error(ctxt->userData,
549: "xmlParseCharRef: invalid value\n");
1.97 daniel 550: ctxt->wellFormed = 0;
1.180 daniel 551: ctxt->disableSAX = 1;
1.229 veillard 552: return(0);
1.97 daniel 553: }
1.229 veillard 554: *str = ptr;
1.98 daniel 555:
556: /*
1.229 veillard 557: * [ WFC: Legal Character ]
558: * Characters referred to using character references must match the
559: * production for Char.
1.98 daniel 560: */
1.229 veillard 561: if (IS_CHAR(val)) {
562: return(val);
563: } else {
564: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.98 daniel 565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 566: ctxt->sax->error(ctxt->userData,
567: "CharRef: invalid xmlChar value %d\n", val);
1.98 daniel 568: ctxt->wellFormed = 0;
1.180 daniel 569: ctxt->disableSAX = 1;
1.98 daniel 570: }
1.229 veillard 571: return(0);
1.96 daniel 572: }
573:
574: /**
575: * xmlParserHandlePEReference:
576: * @ctxt: the parser context
577: *
578: * [69] PEReference ::= '%' Name ';'
579: *
1.98 daniel 580: * [ WFC: No Recursion ]
1.229 veillard 581: * A parsed entity must not contain a recursive
1.98 daniel 582: * reference to itself, either directly or indirectly.
583: *
584: * [ WFC: Entity Declared ]
585: * In a document without any DTD, a document with only an internal DTD
586: * subset which contains no parameter entity references, or a document
587: * with "standalone='yes'", ... ... The declaration of a parameter
588: * entity must precede any reference to it...
589: *
590: * [ VC: Entity Declared ]
591: * In a document with an external subset or external parameter entities
592: * with "standalone='no'", ... ... The declaration of a parameter entity
593: * must precede any reference to it...
594: *
595: * [ WFC: In DTD ]
596: * Parameter-entity references may only appear in the DTD.
597: * NOTE: misleading but this is handled.
598: *
599: * A PEReference may have been detected in the current input stream
1.96 daniel 600: * the handling is done accordingly to
601: * http://www.w3.org/TR/REC-xml#entproc
602: * i.e.
603: * - Included in literal in entity values
604: * - Included as Paraemeter Entity reference within DTDs
605: */
606: void
607: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 608: xmlChar *name;
1.96 daniel 609: xmlEntityPtr entity = NULL;
610: xmlParserInputPtr input;
611:
1.126 daniel 612: if (ctxt->token != 0) {
613: return;
614: }
1.152 daniel 615: if (RAW != '%') return;
1.96 daniel 616: switch(ctxt->instate) {
1.109 daniel 617: case XML_PARSER_CDATA_SECTION:
618: return;
1.97 daniel 619: case XML_PARSER_COMMENT:
620: return;
1.140 daniel 621: case XML_PARSER_START_TAG:
622: return;
623: case XML_PARSER_END_TAG:
624: return;
1.96 daniel 625: case XML_PARSER_EOF:
1.123 daniel 626: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
628: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
629: ctxt->wellFormed = 0;
1.180 daniel 630: ctxt->disableSAX = 1;
1.96 daniel 631: return;
632: case XML_PARSER_PROLOG:
1.140 daniel 633: case XML_PARSER_START:
634: case XML_PARSER_MISC:
1.123 daniel 635: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
637: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
638: ctxt->wellFormed = 0;
1.180 daniel 639: ctxt->disableSAX = 1;
1.96 daniel 640: return;
1.97 daniel 641: case XML_PARSER_ENTITY_DECL:
1.96 daniel 642: case XML_PARSER_CONTENT:
643: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 644: case XML_PARSER_PI:
1.168 daniel 645: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 646: /* we just ignore it there */
647: return;
648: case XML_PARSER_EPILOG:
1.123 daniel 649: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 651: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 652: ctxt->wellFormed = 0;
1.180 daniel 653: ctxt->disableSAX = 1;
1.96 daniel 654: return;
1.97 daniel 655: case XML_PARSER_ENTITY_VALUE:
656: /*
657: * NOTE: in the case of entity values, we don't do the
1.127 daniel 658: * substitution here since we need the literal
1.97 daniel 659: * entity value to be able to save the internal
660: * subset of the document.
1.222 veillard 661: * This will be handled by xmlStringDecodeEntities
1.97 daniel 662: */
663: return;
1.96 daniel 664: case XML_PARSER_DTD:
1.98 daniel 665: /*
666: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
667: * In the internal DTD subset, parameter-entity references
668: * can occur only where markup declarations can occur, not
669: * within markup declarations.
670: * In that case this is handled in xmlParseMarkupDecl
671: */
672: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
673: return;
1.245 veillard 674: break;
675: case XML_PARSER_IGNORE:
676: return;
1.96 daniel 677: }
678:
679: NEXT;
680: name = xmlParseName(ctxt);
1.220 veillard 681: if (xmlParserDebugEntities)
1.241 veillard 682: xmlGenericError(xmlGenericErrorContext,
683: "PE Reference: %s\n", name);
1.96 daniel 684: if (name == NULL) {
1.123 daniel 685: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 686: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
687: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
688: ctxt->wellFormed = 0;
1.180 daniel 689: ctxt->disableSAX = 1;
1.96 daniel 690: } else {
1.152 daniel 691: if (RAW == ';') {
1.96 daniel 692: NEXT;
1.98 daniel 693: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
694: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 695: if (entity == NULL) {
1.98 daniel 696:
697: /*
698: * [ WFC: Entity Declared ]
699: * In a document without any DTD, a document with only an
700: * internal DTD subset which contains no parameter entity
701: * references, or a document with "standalone='yes'", ...
702: * ... The declaration of a parameter entity must precede
703: * any reference to it...
704: */
705: if ((ctxt->standalone == 1) ||
706: ((ctxt->hasExternalSubset == 0) &&
707: (ctxt->hasPErefs == 0))) {
708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
709: ctxt->sax->error(ctxt->userData,
710: "PEReference: %%%s; not found\n", name);
711: ctxt->wellFormed = 0;
1.180 daniel 712: ctxt->disableSAX = 1;
1.98 daniel 713: } else {
714: /*
715: * [ VC: Entity Declared ]
716: * In a document with an external subset or external
717: * parameter entities with "standalone='no'", ...
718: * ... The declaration of a parameter entity must precede
719: * any reference to it...
720: */
1.220 veillard 721: if ((!ctxt->disableSAX) &&
722: (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1.212 veillard 723: ctxt->vctxt.error(ctxt->vctxt.userData,
724: "PEReference: %%%s; not found\n", name);
1.220 veillard 725: } else if ((!ctxt->disableSAX) &&
726: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 727: ctxt->sax->warning(ctxt->userData,
728: "PEReference: %%%s; not found\n", name);
729: ctxt->valid = 0;
730: }
1.96 daniel 731: } else {
1.159 daniel 732: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
733: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 734: /*
1.229 veillard 735: * handle the extra spaces added before and after
1.96 daniel 736: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.229 veillard 737: * this is done independantly.
1.96 daniel 738: */
739: input = xmlNewEntityInputStream(ctxt, entity);
740: xmlPushInput(ctxt, input);
1.164 daniel 741: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
742: (RAW == '<') && (NXT(1) == '?') &&
743: (NXT(2) == 'x') && (NXT(3) == 'm') &&
744: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 745: xmlParseTextDecl(ctxt);
1.164 daniel 746: }
747: if (ctxt->token == 0)
748: ctxt->token = ' ';
1.96 daniel 749: } else {
750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
751: ctxt->sax->error(ctxt->userData,
752: "xmlHandlePEReference: %s is not a parameter entity\n",
753: name);
754: ctxt->wellFormed = 0;
1.180 daniel 755: ctxt->disableSAX = 1;
1.96 daniel 756: }
757: }
758: } else {
1.123 daniel 759: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 760: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
761: ctxt->sax->error(ctxt->userData,
762: "xmlHandlePEReference: expecting ';'\n");
763: ctxt->wellFormed = 0;
1.180 daniel 764: ctxt->disableSAX = 1;
1.96 daniel 765: }
1.119 daniel 766: xmlFree(name);
1.97 daniel 767: }
768: }
769:
770: /*
771: * Macro used to grow the current buffer.
772: */
773: #define growBuffer(buffer) { \
774: buffer##_size *= 2; \
1.145 daniel 775: buffer = (xmlChar *) \
776: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 777: if (buffer == NULL) { \
778: perror("realloc failed"); \
1.145 daniel 779: return(NULL); \
1.97 daniel 780: } \
1.96 daniel 781: }
1.77 daniel 782:
783: /**
1.135 daniel 784: * xmlStringDecodeEntities:
785: * @ctxt: the parser context
786: * @str: the input string
787: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
788: * @end: an end marker xmlChar, 0 if none
789: * @end2: an end marker xmlChar, 0 if none
790: * @end3: an end marker xmlChar, 0 if none
791: *
1.222 veillard 792: * Takes a entity string content and process to do the adequate subtitutions.
793: *
1.135 daniel 794: * [67] Reference ::= EntityRef | CharRef
795: *
796: * [69] PEReference ::= '%' Name ';'
797: *
798: * Returns A newly allocated string with the substitution done. The caller
799: * must deallocate it !
800: */
801: xmlChar *
802: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
803: xmlChar end, xmlChar end2, xmlChar end3) {
804: xmlChar *buffer = NULL;
805: int buffer_size = 0;
806:
807: xmlChar *current = NULL;
808: xmlEntityPtr ent;
1.176 daniel 809: int c,l;
810: int nbchars = 0;
1.135 daniel 811:
1.211 veillard 812: if (str == NULL)
813: return(NULL);
814:
1.185 daniel 815: if (ctxt->depth > 40) {
1.230 veillard 816: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
818: ctxt->sax->error(ctxt->userData,
819: "Detected entity reference loop\n");
820: ctxt->wellFormed = 0;
821: ctxt->disableSAX = 1;
822: return(NULL);
823: }
824:
1.135 daniel 825: /*
826: * allocate a translation buffer.
827: */
1.140 daniel 828: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 829: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
830: if (buffer == NULL) {
831: perror("xmlDecodeEntities: malloc failed");
832: return(NULL);
833: }
834:
835: /*
836: * Ok loop until we reach one of the ending char or a size limit.
1.222 veillard 837: * we are operating on already parsed values.
1.135 daniel 838: */
1.176 daniel 839: c = CUR_SCHAR(str, l);
1.222 veillard 840: while ((c != 0) && (c != end) && /* non input consuming loop */
841: (c != end2) && (c != end3)) {
1.135 daniel 842:
1.176 daniel 843: if (c == 0) break;
844: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 845: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 846: if (val != 0) {
847: COPY_BUF(0,buffer,nbchars,val);
848: }
849: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.220 veillard 850: if (xmlParserDebugEntities)
1.241 veillard 851: xmlGenericError(xmlGenericErrorContext,
852: "String decoding Entity Reference: %.30s\n",
1.220 veillard 853: str);
1.135 daniel 854: ent = xmlParseStringEntityRef(ctxt, &str);
1.222 veillard 855: if ((ent != NULL) &&
856: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.219 veillard 857: if (ent->content != NULL) {
858: COPY_BUF(0,buffer,nbchars,ent->content[0]);
859: } else {
860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
861: ctxt->sax->error(ctxt->userData,
862: "internal error entity has no content\n");
863: }
864: } else if ((ent != NULL) && (ent->content != NULL)) {
1.185 daniel 865: xmlChar *rep;
866:
867: ctxt->depth++;
868: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
869: 0, 0, 0);
870: ctxt->depth--;
871: if (rep != NULL) {
872: current = rep;
1.222 veillard 873: while (*current != 0) { /* non input consuming loop */
1.185 daniel 874: buffer[nbchars++] = *current++;
875: if (nbchars >
876: buffer_size - XML_PARSER_BUFFER_SIZE) {
877: growBuffer(buffer);
878: }
1.135 daniel 879: }
1.185 daniel 880: xmlFree(rep);
1.135 daniel 881: }
882: } else if (ent != NULL) {
883: int i = xmlStrlen(ent->name);
884: const xmlChar *cur = ent->name;
885:
1.176 daniel 886: buffer[nbchars++] = '&';
887: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 888: growBuffer(buffer);
889: }
890: for (;i > 0;i--)
1.176 daniel 891: buffer[nbchars++] = *cur++;
892: buffer[nbchars++] = ';';
1.135 daniel 893: }
1.176 daniel 894: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.220 veillard 895: if (xmlParserDebugEntities)
1.241 veillard 896: xmlGenericError(xmlGenericErrorContext,
897: "String decoding PE Reference: %.30s\n", str);
1.135 daniel 898: ent = xmlParseStringPEReference(ctxt, &str);
899: if (ent != NULL) {
1.185 daniel 900: xmlChar *rep;
901:
902: ctxt->depth++;
903: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
904: 0, 0, 0);
905: ctxt->depth--;
906: if (rep != NULL) {
907: current = rep;
1.222 veillard 908: while (*current != 0) { /* non input consuming loop */
1.185 daniel 909: buffer[nbchars++] = *current++;
910: if (nbchars >
911: buffer_size - XML_PARSER_BUFFER_SIZE) {
912: growBuffer(buffer);
913: }
1.135 daniel 914: }
1.185 daniel 915: xmlFree(rep);
1.135 daniel 916: }
917: }
918: } else {
1.176 daniel 919: COPY_BUF(l,buffer,nbchars,c);
920: str += l;
921: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 922: growBuffer(buffer);
923: }
924: }
1.176 daniel 925: c = CUR_SCHAR(str, l);
1.135 daniel 926: }
1.229 veillard 927: buffer[nbchars++] = 0;
928: return(buffer);
1.172 daniel 929: }
930:
1.229 veillard 931:
932: /************************************************************************
933: * *
1.123 daniel 934: * Commodity functions to handle xmlChars *
1.28 daniel 935: * *
936: ************************************************************************/
937:
1.50 daniel 938: /**
939: * xmlStrndup:
1.123 daniel 940: * @cur: the input xmlChar *
1.50 daniel 941: * @len: the len of @cur
942: *
1.123 daniel 943: * a strndup for array of xmlChar's
1.68 daniel 944: *
1.123 daniel 945: * Returns a new xmlChar * or NULL
1.1 veillard 946: */
1.123 daniel 947: xmlChar *
948: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 949: xmlChar *ret;
950:
951: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 952: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 953: if (ret == NULL) {
1.241 veillard 954: xmlGenericError(xmlGenericErrorContext,
955: "malloc of %ld byte failed\n",
1.123 daniel 956: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 957: return(NULL);
958: }
1.123 daniel 959: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 960: ret[len] = 0;
961: return(ret);
962: }
963:
1.50 daniel 964: /**
965: * xmlStrdup:
1.123 daniel 966: * @cur: the input xmlChar *
1.50 daniel 967: *
1.152 daniel 968: * a strdup for array of xmlChar's. Since they are supposed to be
969: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
970: * a termination mark of '0'.
1.68 daniel 971: *
1.123 daniel 972: * Returns a new xmlChar * or NULL
1.1 veillard 973: */
1.123 daniel 974: xmlChar *
975: xmlStrdup(const xmlChar *cur) {
976: const xmlChar *p = cur;
1.1 veillard 977:
1.135 daniel 978: if (cur == NULL) return(NULL);
1.222 veillard 979: while (*p != 0) p++; /* non input consuming */
1.1 veillard 980: return(xmlStrndup(cur, p - cur));
981: }
982:
1.50 daniel 983: /**
984: * xmlCharStrndup:
985: * @cur: the input char *
986: * @len: the len of @cur
987: *
1.123 daniel 988: * a strndup for char's to xmlChar's
1.68 daniel 989: *
1.123 daniel 990: * Returns a new xmlChar * or NULL
1.45 daniel 991: */
992:
1.123 daniel 993: xmlChar *
1.55 daniel 994: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 995: int i;
1.135 daniel 996: xmlChar *ret;
997:
998: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 999: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 1000: if (ret == NULL) {
1.241 veillard 1001: xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1.123 daniel 1002: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1003: return(NULL);
1004: }
1005: for (i = 0;i < len;i++)
1.123 daniel 1006: ret[i] = (xmlChar) cur[i];
1.45 daniel 1007: ret[len] = 0;
1008: return(ret);
1009: }
1010:
1.50 daniel 1011: /**
1012: * xmlCharStrdup:
1013: * @cur: the input char *
1014: * @len: the len of @cur
1015: *
1.123 daniel 1016: * a strdup for char's to xmlChar's
1.68 daniel 1017: *
1.123 daniel 1018: * Returns a new xmlChar * or NULL
1.45 daniel 1019: */
1020:
1.123 daniel 1021: xmlChar *
1.55 daniel 1022: xmlCharStrdup(const char *cur) {
1.45 daniel 1023: const char *p = cur;
1024:
1.135 daniel 1025: if (cur == NULL) return(NULL);
1.222 veillard 1026: while (*p != '\0') p++; /* non input consuming */
1.45 daniel 1027: return(xmlCharStrndup(cur, p - cur));
1028: }
1029:
1.50 daniel 1030: /**
1031: * xmlStrcmp:
1.123 daniel 1032: * @str1: the first xmlChar *
1033: * @str2: the second xmlChar *
1.50 daniel 1034: *
1.123 daniel 1035: * a strcmp for xmlChar's
1.68 daniel 1036: *
1037: * Returns the integer result of the comparison
1.14 veillard 1038: */
1039:
1.55 daniel 1040: int
1.123 daniel 1041: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1042: register int tmp;
1043:
1.229 veillard 1044: if (str1 == str2) return(0);
1.135 daniel 1045: if (str1 == NULL) return(-1);
1046: if (str2 == NULL) return(1);
1.14 veillard 1047: do {
1.232 veillard 1048: tmp = *str1++ - *str2;
1.14 veillard 1049: if (tmp != 0) return(tmp);
1.232 veillard 1050: } while (*str2++ != 0);
1051: return 0;
1.14 veillard 1052: }
1053:
1.50 daniel 1054: /**
1.236 veillard 1055: * xmlStrEqual:
1056: * @str1: the first xmlChar *
1057: * @str2: the second xmlChar *
1058: *
1059: * Check if both string are equal of have same content
1060: * Should be a bit more readable and faster than xmlStrEqual()
1061: *
1062: * Returns 1 if they are equal, 0 if they are different
1063: */
1064:
1065: int
1066: xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1067: if (str1 == str2) return(1);
1068: if (str1 == NULL) return(0);
1069: if (str2 == NULL) return(0);
1070: do {
1071: if (*str1++ != *str2) return(0);
1072: } while (*str2++);
1073: return(1);
1074: }
1075:
1076: /**
1.50 daniel 1077: * xmlStrncmp:
1.123 daniel 1078: * @str1: the first xmlChar *
1079: * @str2: the second xmlChar *
1.50 daniel 1080: * @len: the max comparison length
1081: *
1.123 daniel 1082: * a strncmp for xmlChar's
1.68 daniel 1083: *
1084: * Returns the integer result of the comparison
1.14 veillard 1085: */
1086:
1.55 daniel 1087: int
1.123 daniel 1088: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1089: register int tmp;
1090:
1091: if (len <= 0) return(0);
1.232 veillard 1092: if (str1 == str2) return(0);
1.135 daniel 1093: if (str1 == NULL) return(-1);
1094: if (str2 == NULL) return(1);
1.14 veillard 1095: do {
1.232 veillard 1096: tmp = *str1++ - *str2;
1097: if (tmp != 0 || --len == 0) return(tmp);
1098: } while (*str2++ != 0);
1099: return 0;
1100: }
1101:
1102: static xmlChar casemap[256] = {
1103: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1104: 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1105: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1106: 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1107: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1108: 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1109: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1110: 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1111: 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1112: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1113: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1114: 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1115: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1116: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1117: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1118: 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1119: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1120: 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1121: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1122: 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1123: 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1124: 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1125: 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1126: 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1127: 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1128: 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1129: 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1130: 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1131: 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1132: 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1133: 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1134: 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1135: };
1136:
1137: /**
1138: * xmlStrcasecmp:
1139: * @str1: the first xmlChar *
1140: * @str2: the second xmlChar *
1141: *
1142: * a strcasecmp for xmlChar's
1143: *
1144: * Returns the integer result of the comparison
1145: */
1146:
1147: int
1148: xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1149: register int tmp;
1150:
1151: if (str1 == str2) return(0);
1152: if (str1 == NULL) return(-1);
1153: if (str2 == NULL) return(1);
1154: do {
1155: tmp = casemap[*str1++] - casemap[*str2];
1.14 veillard 1156: if (tmp != 0) return(tmp);
1.232 veillard 1157: } while (*str2++ != 0);
1158: return 0;
1159: }
1160:
1161: /**
1162: * xmlStrncasecmp:
1163: * @str1: the first xmlChar *
1164: * @str2: the second xmlChar *
1165: * @len: the max comparison length
1166: *
1167: * a strncasecmp for xmlChar's
1168: *
1169: * Returns the integer result of the comparison
1170: */
1171:
1172: int
1173: xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1174: register int tmp;
1175:
1176: if (len <= 0) return(0);
1177: if (str1 == str2) return(0);
1178: if (str1 == NULL) return(-1);
1179: if (str2 == NULL) return(1);
1180: do {
1181: tmp = casemap[*str1++] - casemap[*str2];
1182: if (tmp != 0 || --len == 0) return(tmp);
1183: } while (*str2++ != 0);
1184: return 0;
1.14 veillard 1185: }
1186:
1.50 daniel 1187: /**
1188: * xmlStrchr:
1.123 daniel 1189: * @str: the xmlChar * array
1190: * @val: the xmlChar to search
1.50 daniel 1191: *
1.123 daniel 1192: * a strchr for xmlChar's
1.68 daniel 1193: *
1.123 daniel 1194: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1195: */
1196:
1.123 daniel 1197: const xmlChar *
1198: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 1199: if (str == NULL) return(NULL);
1.222 veillard 1200: while (*str != 0) { /* non input consuming */
1.123 daniel 1201: if (*str == val) return((xmlChar *) str);
1.14 veillard 1202: str++;
1203: }
1204: return(NULL);
1.89 daniel 1205: }
1206:
1207: /**
1208: * xmlStrstr:
1.123 daniel 1209: * @str: the xmlChar * array (haystack)
1210: * @val: the xmlChar to search (needle)
1.89 daniel 1211: *
1.123 daniel 1212: * a strstr for xmlChar's
1.89 daniel 1213: *
1.123 daniel 1214: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1215: */
1216:
1.123 daniel 1217: const xmlChar *
1218: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1219: int n;
1220:
1221: if (str == NULL) return(NULL);
1222: if (val == NULL) return(NULL);
1223: n = xmlStrlen(val);
1224:
1225: if (n == 0) return(str);
1.222 veillard 1226: while (*str != 0) { /* non input consuming */
1.89 daniel 1227: if (*str == *val) {
1.123 daniel 1228: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1229: }
1.232 veillard 1230: str++;
1231: }
1232: return(NULL);
1233: }
1234:
1235: /**
1236: * xmlStrcasestr:
1237: * @str: the xmlChar * array (haystack)
1238: * @val: the xmlChar to search (needle)
1239: *
1240: * a case-ignoring strstr for xmlChar's
1241: *
1242: * Returns the xmlChar * for the first occurence or NULL.
1243: */
1244:
1245: const xmlChar *
1246: xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1247: int n;
1248:
1249: if (str == NULL) return(NULL);
1250: if (val == NULL) return(NULL);
1251: n = xmlStrlen(val);
1252:
1253: if (n == 0) return(str);
1254: while (*str != 0) { /* non input consuming */
1255: if (casemap[*str] == casemap[*val])
1256: if (!xmlStrncasecmp(str, val, n)) return(str);
1.89 daniel 1257: str++;
1258: }
1259: return(NULL);
1260: }
1261:
1262: /**
1263: * xmlStrsub:
1.123 daniel 1264: * @str: the xmlChar * array (haystack)
1.89 daniel 1265: * @start: the index of the first char (zero based)
1266: * @len: the length of the substring
1267: *
1268: * Extract a substring of a given string
1269: *
1.123 daniel 1270: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1271: */
1272:
1.123 daniel 1273: xmlChar *
1274: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1275: int i;
1276:
1277: if (str == NULL) return(NULL);
1278: if (start < 0) return(NULL);
1.90 daniel 1279: if (len < 0) return(NULL);
1.89 daniel 1280:
1281: for (i = 0;i < start;i++) {
1282: if (*str == 0) return(NULL);
1283: str++;
1284: }
1285: if (*str == 0) return(NULL);
1286: return(xmlStrndup(str, len));
1.14 veillard 1287: }
1.28 daniel 1288:
1.50 daniel 1289: /**
1290: * xmlStrlen:
1.123 daniel 1291: * @str: the xmlChar * array
1.50 daniel 1292: *
1.127 daniel 1293: * length of a xmlChar's string
1.68 daniel 1294: *
1.123 daniel 1295: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1296: */
1297:
1.55 daniel 1298: int
1.123 daniel 1299: xmlStrlen(const xmlChar *str) {
1.45 daniel 1300: int len = 0;
1301:
1302: if (str == NULL) return(0);
1.222 veillard 1303: while (*str != 0) { /* non input consuming */
1.45 daniel 1304: str++;
1305: len++;
1306: }
1307: return(len);
1308: }
1309:
1.50 daniel 1310: /**
1311: * xmlStrncat:
1.123 daniel 1312: * @cur: the original xmlChar * array
1313: * @add: the xmlChar * array added
1.50 daniel 1314: * @len: the length of @add
1315: *
1.123 daniel 1316: * a strncat for array of xmlChar's
1.68 daniel 1317: *
1.123 daniel 1318: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1319: */
1320:
1.123 daniel 1321: xmlChar *
1322: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1323: int size;
1.123 daniel 1324: xmlChar *ret;
1.45 daniel 1325:
1326: if ((add == NULL) || (len == 0))
1327: return(cur);
1328: if (cur == NULL)
1329: return(xmlStrndup(add, len));
1330:
1331: size = xmlStrlen(cur);
1.204 veillard 1332: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1333: if (ret == NULL) {
1.241 veillard 1334: xmlGenericError(xmlGenericErrorContext,
1335: "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1336: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1337: return(cur);
1338: }
1.123 daniel 1339: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1340: ret[size + len] = 0;
1341: return(ret);
1342: }
1343:
1.50 daniel 1344: /**
1345: * xmlStrcat:
1.123 daniel 1346: * @cur: the original xmlChar * array
1347: * @add: the xmlChar * array added
1.50 daniel 1348: *
1.152 daniel 1349: * a strcat for array of xmlChar's. Since they are supposed to be
1350: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1351: * a termination mark of '0'.
1.68 daniel 1352: *
1.123 daniel 1353: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1354: */
1.123 daniel 1355: xmlChar *
1356: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1357: const xmlChar *p = add;
1.45 daniel 1358:
1359: if (add == NULL) return(cur);
1360: if (cur == NULL)
1361: return(xmlStrdup(add));
1362:
1.222 veillard 1363: while (*p != 0) p++; /* non input consuming */
1.45 daniel 1364: return(xmlStrncat(cur, add, p - add));
1365: }
1366:
1367: /************************************************************************
1368: * *
1369: * Commodity functions, cleanup needed ? *
1370: * *
1371: ************************************************************************/
1372:
1.50 daniel 1373: /**
1374: * areBlanks:
1375: * @ctxt: an XML parser context
1.123 daniel 1376: * @str: a xmlChar *
1.50 daniel 1377: * @len: the size of @str
1378: *
1.45 daniel 1379: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1380: *
1.68 daniel 1381: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1382: */
1383:
1.123 daniel 1384: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1385: int i, ret;
1.45 daniel 1386: xmlNodePtr lastChild;
1387:
1.176 daniel 1388: /*
1389: * Check for xml:space value.
1390: */
1391: if (*(ctxt->space) == 1)
1392: return(0);
1393:
1394: /*
1395: * Check that the string is made of blanks
1396: */
1.45 daniel 1397: for (i = 0;i < len;i++)
1398: if (!(IS_BLANK(str[i]))) return(0);
1399:
1.176 daniel 1400: /*
1401: * Look if the element is mixed content in the Dtd if available
1402: */
1.104 daniel 1403: if (ctxt->myDoc != NULL) {
1404: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1405: if (ret == 0) return(1);
1406: if (ret == 1) return(0);
1407: }
1.176 daniel 1408:
1.104 daniel 1409: /*
1.176 daniel 1410: * Otherwise, heuristic :-\
1.104 daniel 1411: */
1.179 daniel 1412: if (ctxt->keepBlanks)
1413: return(0);
1414: if (RAW != '<') return(0);
1415: if (ctxt->node == NULL) return(0);
1416: if ((ctxt->node->children == NULL) &&
1417: (RAW == '<') && (NXT(1) == '/')) return(0);
1418:
1.45 daniel 1419: lastChild = xmlGetLastChild(ctxt->node);
1420: if (lastChild == NULL) {
1421: if (ctxt->node->content != NULL) return(0);
1422: } else if (xmlNodeIsText(lastChild))
1423: return(0);
1.157 daniel 1424: else if ((ctxt->node->children != NULL) &&
1425: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 1426: return(0);
1.45 daniel 1427: return(1);
1428: }
1429:
1430: /*
1431: * Forward definition for recusive behaviour.
1432: */
1.77 daniel 1433: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1434: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1435:
1.28 daniel 1436: /************************************************************************
1437: * *
1438: * Extra stuff for namespace support *
1439: * Relates to http://www.w3.org/TR/WD-xml-names *
1440: * *
1441: ************************************************************************/
1442:
1.50 daniel 1443: /**
1.72 daniel 1444: * xmlSplitQName:
1.162 daniel 1445: * @ctxt: an XML parser context
1.72 daniel 1446: * @name: an XML parser context
1.123 daniel 1447: * @prefix: a xmlChar **
1.72 daniel 1448: *
1.206 veillard 1449: * parse an UTF8 encoded XML qualified name string
1.72 daniel 1450: *
1451: * [NS 5] QName ::= (Prefix ':')? LocalPart
1452: *
1453: * [NS 6] Prefix ::= NCName
1454: *
1455: * [NS 7] LocalPart ::= NCName
1456: *
1.127 daniel 1457: * Returns the local part, and prefix is updated
1.72 daniel 1458: * to get the Prefix if any.
1459: */
1460:
1.123 daniel 1461: xmlChar *
1.162 daniel 1462: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1463: xmlChar buf[XML_MAX_NAMELEN + 5];
1.222 veillard 1464: xmlChar *buffer = NULL;
1.162 daniel 1465: int len = 0;
1.222 veillard 1466: int max = XML_MAX_NAMELEN;
1.123 daniel 1467: xmlChar *ret = NULL;
1468: const xmlChar *cur = name;
1.206 veillard 1469: int c;
1.72 daniel 1470:
1471: *prefix = NULL;
1.113 daniel 1472:
1473: /* xml: prefix is not really a namespace */
1474: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1475: (cur[2] == 'l') && (cur[3] == ':'))
1476: return(xmlStrdup(name));
1477:
1.162 daniel 1478: /* nasty but valid */
1479: if (cur[0] == ':')
1480: return(xmlStrdup(name));
1481:
1.206 veillard 1482: c = *cur++;
1.222 veillard 1483: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1.206 veillard 1484: buf[len++] = c;
1485: c = *cur++;
1.162 daniel 1486: }
1.222 veillard 1487: if (len >= max) {
1488: /*
1489: * Okay someone managed to make a huge name, so he's ready to pay
1490: * for the processing speed.
1491: */
1492: max = len * 2;
1493:
1494: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1495: if (buffer == NULL) {
1496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1497: ctxt->sax->error(ctxt->userData,
1498: "xmlSplitQName: out of memory\n");
1499: return(NULL);
1500: }
1501: memcpy(buffer, buf, len);
1502: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1503: if (len + 10 > max) {
1504: max *= 2;
1505: buffer = (xmlChar *) xmlRealloc(buffer,
1506: max * sizeof(xmlChar));
1507: if (buffer == NULL) {
1508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1509: ctxt->sax->error(ctxt->userData,
1510: "xmlSplitQName: out of memory\n");
1511: return(NULL);
1512: }
1513: }
1514: buffer[len++] = c;
1515: c = *cur++;
1516: }
1517: buffer[len] = 0;
1518: }
1.72 daniel 1519:
1.222 veillard 1520: if (buffer == NULL)
1521: ret = xmlStrndup(buf, len);
1522: else {
1523: ret = buffer;
1524: buffer = NULL;
1525: max = XML_MAX_NAMELEN;
1526: }
1527:
1.72 daniel 1528:
1.162 daniel 1529: if (c == ':') {
1.206 veillard 1530: c = *cur++;
1531: if (c == 0) return(ret);
1.72 daniel 1532: *prefix = ret;
1.162 daniel 1533: len = 0;
1.72 daniel 1534:
1.222 veillard 1535: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1.206 veillard 1536: buf[len++] = c;
1537: c = *cur++;
1.162 daniel 1538: }
1.222 veillard 1539: if (len >= max) {
1540: /*
1541: * Okay someone managed to make a huge name, so he's ready to pay
1542: * for the processing speed.
1543: */
1.229 veillard 1544: max = len * 2;
1545:
1546: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1547: if (buffer == NULL) {
1.55 daniel 1548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 1549: ctxt->sax->error(ctxt->userData,
1.229 veillard 1550: "xmlSplitQName: out of memory\n");
1551: return(NULL);
1552: }
1553: memcpy(buffer, buf, len);
1554: while (c != 0) { /* tested bigname2.xml */
1555: if (len + 10 > max) {
1556: max *= 2;
1557: buffer = (xmlChar *) xmlRealloc(buffer,
1558: max * sizeof(xmlChar));
1559: if (buffer == NULL) {
1560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1561: ctxt->sax->error(ctxt->userData,
1562: "xmlSplitQName: out of memory\n");
1563: return(NULL);
1564: }
1565: }
1566: buffer[len++] = c;
1567: c = *cur++;
1.122 daniel 1568: }
1.229 veillard 1569: buffer[len] = 0;
1570: }
1571:
1572: if (buffer == NULL)
1573: ret = xmlStrndup(buf, len);
1574: else {
1575: ret = buffer;
1576: }
1.45 daniel 1577: }
1578:
1.229 veillard 1579: return(ret);
1.45 daniel 1580: }
1581:
1.28 daniel 1582: /************************************************************************
1583: * *
1584: * The parser itself *
1585: * Relates to http://www.w3.org/TR/REC-xml *
1586: * *
1587: ************************************************************************/
1.14 veillard 1588:
1.50 daniel 1589: /**
1590: * xmlParseName:
1591: * @ctxt: an XML parser context
1592: *
1593: * parse an XML name.
1.22 daniel 1594: *
1595: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1596: * CombiningChar | Extender
1597: *
1598: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1599: *
1600: * [6] Names ::= Name (S Name)*
1.68 daniel 1601: *
1602: * Returns the Name parsed or NULL
1.1 veillard 1603: */
1604:
1.123 daniel 1605: xmlChar *
1.55 daniel 1606: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 1607: xmlChar buf[XML_MAX_NAMELEN + 5];
1608: int len = 0, l;
1609: int c;
1.222 veillard 1610: int count = 0;
1.1 veillard 1611:
1.91 daniel 1612: GROW;
1.160 daniel 1613: c = CUR_CHAR(l);
1.190 daniel 1614: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1615: (!IS_LETTER(c) && (c != '_') &&
1616: (c != ':'))) {
1.91 daniel 1617: return(NULL);
1618: }
1.40 daniel 1619:
1.222 veillard 1620: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1.190 daniel 1621: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1622: (c == '.') || (c == '-') ||
1623: (c == '_') || (c == ':') ||
1624: (IS_COMBINING(c)) ||
1625: (IS_EXTENDER(c)))) {
1.222 veillard 1626: if (count++ > 100) {
1627: count = 0;
1628: GROW;
1629: }
1.160 daniel 1630: COPY_BUF(l,buf,len,c);
1631: NEXTL(l);
1632: c = CUR_CHAR(l);
1.91 daniel 1633: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1634: /*
1635: * Okay someone managed to make a huge name, so he's ready to pay
1636: * for the processing speed.
1637: */
1638: xmlChar *buffer;
1639: int max = len * 2;
1640:
1641: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1642: if (buffer == NULL) {
1643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644: ctxt->sax->error(ctxt->userData,
1645: "xmlParseName: out of memory\n");
1646: return(NULL);
1647: }
1648: memcpy(buffer, buf, len);
1649: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1.160 daniel 1650: (c == '.') || (c == '-') ||
1651: (c == '_') || (c == ':') ||
1652: (IS_COMBINING(c)) ||
1653: (IS_EXTENDER(c))) {
1.222 veillard 1654: if (count++ > 100) {
1655: count = 0;
1656: GROW;
1657: }
1658: if (len + 10 > max) {
1659: max *= 2;
1660: buffer = (xmlChar *) xmlRealloc(buffer,
1661: max * sizeof(xmlChar));
1662: if (buffer == NULL) {
1663: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1664: ctxt->sax->error(ctxt->userData,
1665: "xmlParseName: out of memory\n");
1666: return(NULL);
1667: }
1668: }
1669: COPY_BUF(l,buffer,len,c);
1.160 daniel 1670: NEXTL(l);
1671: c = CUR_CHAR(l);
1.97 daniel 1672: }
1.222 veillard 1673: buffer[len] = 0;
1674: return(buffer);
1.91 daniel 1675: }
1676: }
1677: return(xmlStrndup(buf, len));
1.22 daniel 1678: }
1679:
1.50 daniel 1680: /**
1.135 daniel 1681: * xmlParseStringName:
1682: * @ctxt: an XML parser context
1.229 veillard 1683: * @str: a pointer to the string pointer (IN/OUT)
1.135 daniel 1684: *
1685: * parse an XML name.
1686: *
1687: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1688: * CombiningChar | Extender
1689: *
1690: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1691: *
1692: * [6] Names ::= Name (S Name)*
1693: *
1694: * Returns the Name parsed or NULL. The str pointer
1695: * is updated to the current location in the string.
1696: */
1697:
1698: xmlChar *
1699: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 1700: xmlChar buf[XML_MAX_NAMELEN + 5];
1701: const xmlChar *cur = *str;
1702: int len = 0, l;
1703: int c;
1.135 daniel 1704:
1.176 daniel 1705: c = CUR_SCHAR(cur, l);
1706: if (!IS_LETTER(c) && (c != '_') &&
1707: (c != ':')) {
1.135 daniel 1708: return(NULL);
1709: }
1710:
1.222 veillard 1711: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1712: (c == '.') || (c == '-') ||
1713: (c == '_') || (c == ':') ||
1714: (IS_COMBINING(c)) ||
1715: (IS_EXTENDER(c))) {
1716: COPY_BUF(l,buf,len,c);
1717: cur += l;
1718: c = CUR_SCHAR(cur, l);
1.222 veillard 1719: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1720: /*
1721: * Okay someone managed to make a huge name, so he's ready to pay
1722: * for the processing speed.
1723: */
1724: xmlChar *buffer;
1725: int max = len * 2;
1726:
1727: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1728: if (buffer == NULL) {
1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt->userData,
1731: "xmlParseStringName: out of memory\n");
1732: return(NULL);
1733: }
1734: memcpy(buffer, buf, len);
1735: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1736: (c == '.') || (c == '-') ||
1737: (c == '_') || (c == ':') ||
1738: (IS_COMBINING(c)) ||
1739: (IS_EXTENDER(c))) {
1.222 veillard 1740: if (len + 10 > max) {
1741: max *= 2;
1742: buffer = (xmlChar *) xmlRealloc(buffer,
1743: max * sizeof(xmlChar));
1744: if (buffer == NULL) {
1745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1746: ctxt->sax->error(ctxt->userData,
1747: "xmlParseStringName: out of memory\n");
1748: return(NULL);
1749: }
1750: }
1751: COPY_BUF(l,buffer,len,c);
1.176 daniel 1752: cur += l;
1753: c = CUR_SCHAR(cur, l);
1754: }
1.222 veillard 1755: buffer[len] = 0;
1756: *str = cur;
1757: return(buffer);
1.176 daniel 1758: }
1.135 daniel 1759: }
1.176 daniel 1760: *str = cur;
1761: return(xmlStrndup(buf, len));
1.135 daniel 1762: }
1763:
1764: /**
1.50 daniel 1765: * xmlParseNmtoken:
1766: * @ctxt: an XML parser context
1767: *
1768: * parse an XML Nmtoken.
1.22 daniel 1769: *
1770: * [7] Nmtoken ::= (NameChar)+
1771: *
1772: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1773: *
1774: * Returns the Nmtoken parsed or NULL
1.22 daniel 1775: */
1776:
1.123 daniel 1777: xmlChar *
1.55 daniel 1778: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.222 veillard 1779: xmlChar buf[XML_MAX_NAMELEN + 5];
1780: int len = 0, l;
1781: int c;
1782: int count = 0;
1.22 daniel 1783:
1.91 daniel 1784: GROW;
1.160 daniel 1785: c = CUR_CHAR(l);
1.222 veillard 1786:
1787: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1788: (c == '.') || (c == '-') ||
1789: (c == '_') || (c == ':') ||
1790: (IS_COMBINING(c)) ||
1791: (IS_EXTENDER(c))) {
1.222 veillard 1792: if (count++ > 100) {
1793: count = 0;
1794: GROW;
1795: }
1.160 daniel 1796: COPY_BUF(l,buf,len,c);
1797: NEXTL(l);
1798: c = CUR_CHAR(l);
1.91 daniel 1799: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1800: /*
1801: * Okay someone managed to make a huge token, so he's ready to pay
1802: * for the processing speed.
1803: */
1804: xmlChar *buffer;
1805: int max = len * 2;
1806:
1807: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1808: if (buffer == NULL) {
1809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1810: ctxt->sax->error(ctxt->userData,
1811: "xmlParseNmtoken: out of memory\n");
1812: return(NULL);
1813: }
1814: memcpy(buffer, buf, len);
1815: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1816: (c == '.') || (c == '-') ||
1817: (c == '_') || (c == ':') ||
1818: (IS_COMBINING(c)) ||
1819: (IS_EXTENDER(c))) {
1.222 veillard 1820: if (count++ > 100) {
1821: count = 0;
1822: GROW;
1823: }
1824: if (len + 10 > max) {
1825: max *= 2;
1826: buffer = (xmlChar *) xmlRealloc(buffer,
1827: max * sizeof(xmlChar));
1828: if (buffer == NULL) {
1829: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1830: ctxt->sax->error(ctxt->userData,
1831: "xmlParseName: out of memory\n");
1832: return(NULL);
1833: }
1834: }
1835: COPY_BUF(l,buffer,len,c);
1.160 daniel 1836: NEXTL(l);
1837: c = CUR_CHAR(l);
1838: }
1.222 veillard 1839: buffer[len] = 0;
1840: return(buffer);
1.91 daniel 1841: }
1842: }
1.168 daniel 1843: if (len == 0)
1844: return(NULL);
1.91 daniel 1845: return(xmlStrndup(buf, len));
1.1 veillard 1846: }
1847:
1.50 daniel 1848: /**
1849: * xmlParseEntityValue:
1850: * @ctxt: an XML parser context
1.78 daniel 1851: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1852: *
1.229 veillard 1853: * parse a value for ENTITY declarations
1.24 daniel 1854: *
1855: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1856: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1857: *
1.78 daniel 1858: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1859: */
1860:
1.123 daniel 1861: xmlChar *
1862: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 1863: xmlChar *buf = NULL;
1864: int len = 0;
1.140 daniel 1865: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 1866: int c, l;
1.135 daniel 1867: xmlChar stop;
1.123 daniel 1868: xmlChar *ret = NULL;
1.176 daniel 1869: const xmlChar *cur = NULL;
1.98 daniel 1870: xmlParserInputPtr input;
1.24 daniel 1871:
1.152 daniel 1872: if (RAW == '"') stop = '"';
1873: else if (RAW == '\'') stop = '\'';
1.135 daniel 1874: else {
1875: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1878: ctxt->wellFormed = 0;
1.180 daniel 1879: ctxt->disableSAX = 1;
1.135 daniel 1880: return(NULL);
1881: }
1882: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1883: if (buf == NULL) {
1.241 veillard 1884: xmlGenericError(xmlGenericErrorContext,
1885: "malloc of %d byte failed\n", size);
1.135 daniel 1886: return(NULL);
1887: }
1.94 daniel 1888:
1.135 daniel 1889: /*
1890: * The content of the entity definition is copied in a buffer.
1891: */
1.94 daniel 1892:
1.135 daniel 1893: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1894: input = ctxt->input;
1895: GROW;
1896: NEXT;
1.152 daniel 1897: c = CUR_CHAR(l);
1.135 daniel 1898: /*
1899: * NOTE: 4.4.5 Included in Literal
1900: * When a parameter entity reference appears in a literal entity
1901: * value, ... a single or double quote character in the replacement
1902: * text is always treated as a normal data character and will not
1903: * terminate the literal.
1904: * In practice it means we stop the loop only when back at parsing
1905: * the initial entity and the quote is found
1906: */
1.222 veillard 1907: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1908: (ctxt->input != input))) {
1.152 daniel 1909: if (len + 5 >= size) {
1.135 daniel 1910: size *= 2;
1.204 veillard 1911: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 1912: if (buf == NULL) {
1.241 veillard 1913: xmlGenericError(xmlGenericErrorContext,
1914: "realloc of %d byte failed\n", size);
1.135 daniel 1915: return(NULL);
1.94 daniel 1916: }
1.79 daniel 1917: }
1.152 daniel 1918: COPY_BUF(l,buf,len,c);
1919: NEXTL(l);
1.98 daniel 1920: /*
1.135 daniel 1921: * Pop-up of finished entities.
1.98 daniel 1922: */
1.222 veillard 1923: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1.135 daniel 1924: xmlPopInput(ctxt);
1.152 daniel 1925:
1.221 veillard 1926: GROW;
1.152 daniel 1927: c = CUR_CHAR(l);
1.135 daniel 1928: if (c == 0) {
1.94 daniel 1929: GROW;
1.152 daniel 1930: c = CUR_CHAR(l);
1.79 daniel 1931: }
1.135 daniel 1932: }
1933: buf[len] = 0;
1934:
1935: /*
1.176 daniel 1936: * Raise problem w.r.t. '&' and '%' being used in non-entities
1937: * reference constructs. Note Charref will be handled in
1938: * xmlStringDecodeEntities()
1939: */
1940: cur = buf;
1.223 veillard 1941: while (*cur != 0) { /* non input consuming */
1.176 daniel 1942: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1943: xmlChar *name;
1944: xmlChar tmp = *cur;
1945:
1946: cur++;
1947: name = xmlParseStringName(ctxt, &cur);
1948: if ((name == NULL) || (*cur != ';')) {
1.230 veillard 1949: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1.176 daniel 1950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1951: ctxt->sax->error(ctxt->userData,
1952: "EntityValue: '%c' forbidden except for entities references\n",
1953: tmp);
1954: ctxt->wellFormed = 0;
1.180 daniel 1955: ctxt->disableSAX = 1;
1.176 daniel 1956: }
1957: if ((ctxt->inSubset == 1) && (tmp == '%')) {
1.230 veillard 1958: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1.176 daniel 1959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1960: ctxt->sax->error(ctxt->userData,
1961: "EntityValue: PEReferences forbidden in internal subset\n",
1962: tmp);
1963: ctxt->wellFormed = 0;
1.180 daniel 1964: ctxt->disableSAX = 1;
1.176 daniel 1965: }
1966: if (name != NULL)
1967: xmlFree(name);
1968: }
1969: cur++;
1970: }
1971:
1972: /*
1.135 daniel 1973: * Then PEReference entities are substituted.
1974: */
1975: if (c != stop) {
1976: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 1977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 1978: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1979: ctxt->wellFormed = 0;
1.180 daniel 1980: ctxt->disableSAX = 1;
1.170 daniel 1981: xmlFree(buf);
1.135 daniel 1982: } else {
1983: NEXT;
1984: /*
1985: * NOTE: 4.4.7 Bypassed
1986: * When a general entity reference appears in the EntityValue in
1987: * an entity declaration, it is bypassed and left as is.
1.176 daniel 1988: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 1989: */
1990: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1991: 0, 0, 0);
1992: if (orig != NULL)
1993: *orig = buf;
1994: else
1995: xmlFree(buf);
1.24 daniel 1996: }
1997:
1998: return(ret);
1999: }
2000:
1.50 daniel 2001: /**
2002: * xmlParseAttValue:
2003: * @ctxt: an XML parser context
2004: *
2005: * parse a value for an attribute
1.78 daniel 2006: * Note: the parser won't do substitution of entities here, this
1.113 daniel 2007: * will be handled later in xmlStringGetNodeList
1.29 daniel 2008: *
2009: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2010: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2011: *
1.129 daniel 2012: * 3.3.3 Attribute-Value Normalization:
2013: * Before the value of an attribute is passed to the application or
2014: * checked for validity, the XML processor must normalize it as follows:
2015: * - a character reference is processed by appending the referenced
2016: * character to the attribute value
2017: * - an entity reference is processed by recursively processing the
2018: * replacement text of the entity
2019: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2020: * appending #x20 to the normalized value, except that only a single
2021: * #x20 is appended for a "#xD#xA" sequence that is part of an external
2022: * parsed entity or the literal entity value of an internal parsed entity
2023: * - other characters are processed by appending them to the normalized value
1.130 daniel 2024: * If the declared value is not CDATA, then the XML processor must further
2025: * process the normalized attribute value by discarding any leading and
2026: * trailing space (#x20) characters, and by replacing sequences of space
2027: * (#x20) characters by a single space (#x20) character.
2028: * All attributes for which no declaration has been read should be treated
2029: * by a non-validating parser as if declared CDATA.
1.129 daniel 2030: *
2031: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 2032: */
2033:
1.123 daniel 2034: xmlChar *
1.55 daniel 2035: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 2036: xmlChar limit = 0;
1.198 daniel 2037: xmlChar *buf = NULL;
2038: int len = 0;
2039: int buf_size = 0;
2040: int c, l;
1.129 daniel 2041: xmlChar *current = NULL;
2042: xmlEntityPtr ent;
2043:
1.29 daniel 2044:
1.91 daniel 2045: SHRINK;
1.151 daniel 2046: if (NXT(0) == '"') {
1.96 daniel 2047: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 2048: limit = '"';
1.40 daniel 2049: NEXT;
1.151 daniel 2050: } else if (NXT(0) == '\'') {
1.129 daniel 2051: limit = '\'';
1.96 daniel 2052: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2053: NEXT;
1.29 daniel 2054: } else {
1.123 daniel 2055: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2057: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2058: ctxt->wellFormed = 0;
1.180 daniel 2059: ctxt->disableSAX = 1;
1.129 daniel 2060: return(NULL);
1.29 daniel 2061: }
2062:
1.129 daniel 2063: /*
2064: * allocate a translation buffer.
2065: */
1.198 daniel 2066: buf_size = XML_PARSER_BUFFER_SIZE;
2067: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2068: if (buf == NULL) {
1.129 daniel 2069: perror("xmlParseAttValue: malloc failed");
2070: return(NULL);
2071: }
2072:
2073: /*
2074: * Ok loop until we reach one of the ending char or a size limit.
2075: */
1.198 daniel 2076: c = CUR_CHAR(l);
1.223 veillard 2077: while (((NXT(0) != limit) && /* checked */
2078: (c != '<')) || (ctxt->token != 0)) {
1.198 daniel 2079: if (c == 0) break;
1.205 veillard 2080: if (ctxt->token == '&') {
1.229 veillard 2081: /*
2082: * The reparsing will be done in xmlStringGetNodeList()
2083: * called by the attribute() function in SAX.c
2084: */
1.205 veillard 2085: static xmlChar buffer[6] = "&";
2086:
2087: if (len > buf_size - 10) {
2088: growBuffer(buf);
2089: }
2090: current = &buffer[0];
1.223 veillard 2091: while (*current != 0) { /* non input consuming */
1.205 veillard 2092: buf[len++] = *current++;
2093: }
2094: ctxt->token = 0;
2095: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 2096: int val = xmlParseCharRef(ctxt);
1.229 veillard 2097: if (val == '&') {
2098: /*
2099: * The reparsing will be done in xmlStringGetNodeList()
2100: * called by the attribute() function in SAX.c
2101: */
2102: static xmlChar buffer[6] = "&";
2103:
2104: if (len > buf_size - 10) {
2105: growBuffer(buf);
2106: }
2107: current = &buffer[0];
2108: while (*current != 0) { /* non input consuming */
2109: buf[len++] = *current++;
2110: }
2111: } else {
1.239 veillard 2112: len += xmlCopyChar(0, &buf[len], val);
1.229 veillard 2113: }
1.198 daniel 2114: } else if (c == '&') {
1.129 daniel 2115: ent = xmlParseEntityRef(ctxt);
2116: if ((ent != NULL) &&
2117: (ctxt->replaceEntities != 0)) {
1.185 daniel 2118: xmlChar *rep;
2119:
1.186 daniel 2120: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2121: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 2122: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 2123: if (rep != NULL) {
2124: current = rep;
1.223 veillard 2125: while (*current != 0) { /* non input consuming */
1.198 daniel 2126: buf[len++] = *current++;
2127: if (len > buf_size - 10) {
2128: growBuffer(buf);
1.186 daniel 2129: }
1.185 daniel 2130: }
1.186 daniel 2131: xmlFree(rep);
1.129 daniel 2132: }
1.186 daniel 2133: } else {
2134: if (ent->content != NULL)
1.198 daniel 2135: buf[len++] = ent->content[0];
1.129 daniel 2136: }
2137: } else if (ent != NULL) {
2138: int i = xmlStrlen(ent->name);
2139: const xmlChar *cur = ent->name;
2140:
1.186 daniel 2141: /*
2142: * This may look absurd but is needed to detect
2143: * entities problems
2144: */
1.211 veillard 2145: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2146: (ent->content != NULL)) {
1.186 daniel 2147: xmlChar *rep;
2148: rep = xmlStringDecodeEntities(ctxt, ent->content,
2149: XML_SUBSTITUTE_REF, 0, 0, 0);
2150: if (rep != NULL)
2151: xmlFree(rep);
2152: }
2153:
2154: /*
2155: * Just output the reference
2156: */
1.198 daniel 2157: buf[len++] = '&';
2158: if (len > buf_size - i - 10) {
2159: growBuffer(buf);
1.129 daniel 2160: }
2161: for (;i > 0;i--)
1.198 daniel 2162: buf[len++] = *cur++;
2163: buf[len++] = ';';
1.129 daniel 2164: }
2165: } else {
1.198 daniel 2166: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2167: COPY_BUF(l,buf,len,0x20);
2168: if (len > buf_size - 10) {
2169: growBuffer(buf);
1.129 daniel 2170: }
2171: } else {
1.198 daniel 2172: COPY_BUF(l,buf,len,c);
2173: if (len > buf_size - 10) {
2174: growBuffer(buf);
1.129 daniel 2175: }
2176: }
1.198 daniel 2177: NEXTL(l);
1.129 daniel 2178: }
1.198 daniel 2179: GROW;
2180: c = CUR_CHAR(l);
1.129 daniel 2181: }
1.198 daniel 2182: buf[len++] = 0;
1.152 daniel 2183: if (RAW == '<') {
1.230 veillard 2184: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.129 daniel 2185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2186: ctxt->sax->error(ctxt->userData,
2187: "Unescaped '<' not allowed in attributes values\n");
2188: ctxt->wellFormed = 0;
1.180 daniel 2189: ctxt->disableSAX = 1;
1.152 daniel 2190: } else if (RAW != limit) {
1.230 veillard 2191: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.129 daniel 2192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2193: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2194: ctxt->wellFormed = 0;
1.180 daniel 2195: ctxt->disableSAX = 1;
1.129 daniel 2196: } else
2197: NEXT;
1.198 daniel 2198: return(buf);
1.29 daniel 2199: }
2200:
1.50 daniel 2201: /**
2202: * xmlParseSystemLiteral:
2203: * @ctxt: an XML parser context
2204: *
2205: * parse an XML Literal
1.21 daniel 2206: *
1.22 daniel 2207: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2208: *
2209: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2210: */
2211:
1.123 daniel 2212: xmlChar *
1.55 daniel 2213: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2214: xmlChar *buf = NULL;
2215: int len = 0;
1.140 daniel 2216: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2217: int cur, l;
1.135 daniel 2218: xmlChar stop;
1.168 daniel 2219: int state = ctxt->instate;
1.223 veillard 2220: int count = 0;
1.21 daniel 2221:
1.91 daniel 2222: SHRINK;
1.152 daniel 2223: if (RAW == '"') {
1.40 daniel 2224: NEXT;
1.135 daniel 2225: stop = '"';
1.152 daniel 2226: } else if (RAW == '\'') {
1.40 daniel 2227: NEXT;
1.135 daniel 2228: stop = '\'';
1.21 daniel 2229: } else {
1.230 veillard 2230: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2231: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2232: ctxt->sax->error(ctxt->userData,
2233: "SystemLiteral \" or ' expected\n");
1.59 daniel 2234: ctxt->wellFormed = 0;
1.180 daniel 2235: ctxt->disableSAX = 1;
1.135 daniel 2236: return(NULL);
1.21 daniel 2237: }
2238:
1.135 daniel 2239: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2240: if (buf == NULL) {
1.241 veillard 2241: xmlGenericError(xmlGenericErrorContext,
2242: "malloc of %d byte failed\n", size);
1.135 daniel 2243: return(NULL);
2244: }
1.168 daniel 2245: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 2246: cur = CUR_CHAR(l);
1.223 veillard 2247: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
1.152 daniel 2248: if (len + 5 >= size) {
1.135 daniel 2249: size *= 2;
1.204 veillard 2250: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2251: if (buf == NULL) {
1.241 veillard 2252: xmlGenericError(xmlGenericErrorContext,
2253: "realloc of %d byte failed\n", size);
1.204 veillard 2254: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2255: return(NULL);
2256: }
2257: }
1.223 veillard 2258: count++;
2259: if (count > 50) {
2260: GROW;
2261: count = 0;
2262: }
1.152 daniel 2263: COPY_BUF(l,buf,len,cur);
2264: NEXTL(l);
2265: cur = CUR_CHAR(l);
1.135 daniel 2266: if (cur == 0) {
2267: GROW;
2268: SHRINK;
1.152 daniel 2269: cur = CUR_CHAR(l);
1.135 daniel 2270: }
2271: }
2272: buf[len] = 0;
1.204 veillard 2273: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2274: if (!IS_CHAR(cur)) {
1.230 veillard 2275: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2277: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2278: ctxt->wellFormed = 0;
1.180 daniel 2279: ctxt->disableSAX = 1;
1.135 daniel 2280: } else {
2281: NEXT;
2282: }
2283: return(buf);
1.21 daniel 2284: }
2285:
1.50 daniel 2286: /**
2287: * xmlParsePubidLiteral:
2288: * @ctxt: an XML parser context
1.21 daniel 2289: *
1.50 daniel 2290: * parse an XML public literal
1.68 daniel 2291: *
2292: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2293: *
2294: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2295: */
2296:
1.123 daniel 2297: xmlChar *
1.55 daniel 2298: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2299: xmlChar *buf = NULL;
2300: int len = 0;
1.140 daniel 2301: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 2302: xmlChar cur;
2303: xmlChar stop;
1.223 veillard 2304: int count = 0;
1.125 daniel 2305:
1.91 daniel 2306: SHRINK;
1.152 daniel 2307: if (RAW == '"') {
1.40 daniel 2308: NEXT;
1.135 daniel 2309: stop = '"';
1.152 daniel 2310: } else if (RAW == '\'') {
1.40 daniel 2311: NEXT;
1.135 daniel 2312: stop = '\'';
1.21 daniel 2313: } else {
1.230 veillard 2314: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2316: ctxt->sax->error(ctxt->userData,
2317: "SystemLiteral \" or ' expected\n");
1.59 daniel 2318: ctxt->wellFormed = 0;
1.180 daniel 2319: ctxt->disableSAX = 1;
1.135 daniel 2320: return(NULL);
2321: }
2322: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2323: if (buf == NULL) {
1.241 veillard 2324: xmlGenericError(xmlGenericErrorContext,
2325: "malloc of %d byte failed\n", size);
1.135 daniel 2326: return(NULL);
2327: }
2328: cur = CUR;
1.223 veillard 2329: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
1.135 daniel 2330: if (len + 1 >= size) {
2331: size *= 2;
1.204 veillard 2332: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2333: if (buf == NULL) {
1.241 veillard 2334: xmlGenericError(xmlGenericErrorContext,
2335: "realloc of %d byte failed\n", size);
1.135 daniel 2336: return(NULL);
2337: }
2338: }
2339: buf[len++] = cur;
1.223 veillard 2340: count++;
2341: if (count > 50) {
2342: GROW;
2343: count = 0;
2344: }
1.135 daniel 2345: NEXT;
2346: cur = CUR;
2347: if (cur == 0) {
2348: GROW;
2349: SHRINK;
2350: cur = CUR;
2351: }
2352: }
2353: buf[len] = 0;
2354: if (cur != stop) {
1.230 veillard 2355: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2357: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2358: ctxt->wellFormed = 0;
1.180 daniel 2359: ctxt->disableSAX = 1;
1.135 daniel 2360: } else {
2361: NEXT;
1.21 daniel 2362: }
1.135 daniel 2363: return(buf);
1.21 daniel 2364: }
2365:
1.50 daniel 2366: /**
2367: * xmlParseCharData:
2368: * @ctxt: an XML parser context
2369: * @cdata: int indicating whether we are within a CDATA section
2370: *
2371: * parse a CharData section.
2372: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2373: *
1.151 daniel 2374: * The right angle bracket (>) may be represented using the string ">",
2375: * and must, for compatibility, be escaped using ">" or a character
2376: * reference when it appears in the string "]]>" in content, when that
2377: * string is not marking the end of a CDATA section.
2378: *
1.27 daniel 2379: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2380: */
2381:
1.55 daniel 2382: void
2383: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 2384: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 2385: int nbchar = 0;
1.152 daniel 2386: int cur, l;
1.223 veillard 2387: int count = 0;
1.27 daniel 2388:
1.91 daniel 2389: SHRINK;
1.223 veillard 2390: GROW;
1.152 daniel 2391: cur = CUR_CHAR(l);
1.223 veillard 2392: while (((cur != '<') || (ctxt->token == '<')) && /* checked */
1.190 daniel 2393: ((cur != '&') || (ctxt->token == '&')) &&
1.229 veillard 2394: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
1.97 daniel 2395: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2396: (NXT(2) == '>')) {
2397: if (cdata) break;
2398: else {
1.230 veillard 2399: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 2401: ctxt->sax->error(ctxt->userData,
1.59 daniel 2402: "Sequence ']]>' not allowed in content\n");
1.151 daniel 2403: /* Should this be relaxed ??? I see a "must here */
2404: ctxt->wellFormed = 0;
1.180 daniel 2405: ctxt->disableSAX = 1;
1.59 daniel 2406: }
2407: }
1.152 daniel 2408: COPY_BUF(l,buf,nbchar,cur);
2409: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 2410: /*
2411: * Ok the segment is to be consumed as chars.
2412: */
1.171 daniel 2413: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2414: if (areBlanks(ctxt, buf, nbchar)) {
2415: if (ctxt->sax->ignorableWhitespace != NULL)
2416: ctxt->sax->ignorableWhitespace(ctxt->userData,
2417: buf, nbchar);
2418: } else {
2419: if (ctxt->sax->characters != NULL)
2420: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2421: }
2422: }
2423: nbchar = 0;
2424: }
1.223 veillard 2425: count++;
2426: if (count > 50) {
2427: GROW;
2428: count = 0;
2429: }
1.152 daniel 2430: NEXTL(l);
2431: cur = CUR_CHAR(l);
1.27 daniel 2432: }
1.91 daniel 2433: if (nbchar != 0) {
2434: /*
2435: * Ok the segment is to be consumed as chars.
2436: */
1.171 daniel 2437: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2438: if (areBlanks(ctxt, buf, nbchar)) {
2439: if (ctxt->sax->ignorableWhitespace != NULL)
2440: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2441: } else {
2442: if (ctxt->sax->characters != NULL)
2443: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2444: }
2445: }
1.45 daniel 2446: }
1.27 daniel 2447: }
2448:
1.50 daniel 2449: /**
2450: * xmlParseExternalID:
2451: * @ctxt: an XML parser context
1.123 daniel 2452: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2453: * @strict: indicate whether we should restrict parsing to only
2454: * production [75], see NOTE below
1.50 daniel 2455: *
1.67 daniel 2456: * Parse an External ID or a Public ID
2457: *
2458: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2459: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2460: *
2461: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2462: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2463: *
2464: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2465: *
1.68 daniel 2466: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2467: * case publicID receives PubidLiteral, is strict is off
2468: * it is possible to return NULL and have publicID set.
1.22 daniel 2469: */
2470:
1.123 daniel 2471: xmlChar *
2472: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2473: xmlChar *URI = NULL;
1.22 daniel 2474:
1.91 daniel 2475: SHRINK;
1.152 daniel 2476: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 2477: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2478: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2479: SKIP(6);
1.59 daniel 2480: if (!IS_BLANK(CUR)) {
1.230 veillard 2481: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2482: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2483: ctxt->sax->error(ctxt->userData,
1.59 daniel 2484: "Space required after 'SYSTEM'\n");
2485: ctxt->wellFormed = 0;
1.180 daniel 2486: ctxt->disableSAX = 1;
1.59 daniel 2487: }
1.42 daniel 2488: SKIP_BLANKS;
1.39 daniel 2489: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2490: if (URI == NULL) {
1.230 veillard 2491: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2493: ctxt->sax->error(ctxt->userData,
1.39 daniel 2494: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2495: ctxt->wellFormed = 0;
1.180 daniel 2496: ctxt->disableSAX = 1;
1.59 daniel 2497: }
1.152 daniel 2498: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 2499: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2500: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2501: SKIP(6);
1.59 daniel 2502: if (!IS_BLANK(CUR)) {
1.230 veillard 2503: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2505: ctxt->sax->error(ctxt->userData,
1.59 daniel 2506: "Space required after 'PUBLIC'\n");
2507: ctxt->wellFormed = 0;
1.180 daniel 2508: ctxt->disableSAX = 1;
1.59 daniel 2509: }
1.42 daniel 2510: SKIP_BLANKS;
1.39 daniel 2511: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2512: if (*publicID == NULL) {
1.230 veillard 2513: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.55 daniel 2514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2515: ctxt->sax->error(ctxt->userData,
1.39 daniel 2516: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2517: ctxt->wellFormed = 0;
1.180 daniel 2518: ctxt->disableSAX = 1;
1.59 daniel 2519: }
1.67 daniel 2520: if (strict) {
2521: /*
2522: * We don't handle [83] so "S SystemLiteral" is required.
2523: */
2524: if (!IS_BLANK(CUR)) {
1.230 veillard 2525: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2527: ctxt->sax->error(ctxt->userData,
1.67 daniel 2528: "Space required after the Public Identifier\n");
2529: ctxt->wellFormed = 0;
1.180 daniel 2530: ctxt->disableSAX = 1;
1.67 daniel 2531: }
2532: } else {
2533: /*
2534: * We handle [83] so we return immediately, if
2535: * "S SystemLiteral" is not detected. From a purely parsing
2536: * point of view that's a nice mess.
2537: */
1.135 daniel 2538: const xmlChar *ptr;
2539: GROW;
2540:
2541: ptr = CUR_PTR;
1.67 daniel 2542: if (!IS_BLANK(*ptr)) return(NULL);
2543:
1.223 veillard 2544: while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
1.173 daniel 2545: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 2546: }
1.42 daniel 2547: SKIP_BLANKS;
1.39 daniel 2548: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2549: if (URI == NULL) {
1.230 veillard 2550: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2552: ctxt->sax->error(ctxt->userData,
1.39 daniel 2553: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2554: ctxt->wellFormed = 0;
1.180 daniel 2555: ctxt->disableSAX = 1;
1.59 daniel 2556: }
1.22 daniel 2557: }
1.39 daniel 2558: return(URI);
1.22 daniel 2559: }
2560:
1.50 daniel 2561: /**
2562: * xmlParseComment:
1.69 daniel 2563: * @ctxt: an XML parser context
1.50 daniel 2564: *
1.3 veillard 2565: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2566: * The spec says that "For compatibility, the string "--" (double-hyphen)
2567: * must not occur within comments. "
1.22 daniel 2568: *
2569: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2570: */
1.72 daniel 2571: void
1.114 daniel 2572: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 2573: xmlChar *buf = NULL;
1.195 daniel 2574: int len;
1.140 daniel 2575: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2576: int q, ql;
2577: int r, rl;
2578: int cur, l;
1.140 daniel 2579: xmlParserInputState state;
1.187 daniel 2580: xmlParserInputPtr input = ctxt->input;
1.223 veillard 2581: int count = 0;
1.3 veillard 2582:
2583: /*
1.22 daniel 2584: * Check that there is a comment right here.
1.3 veillard 2585: */
1.152 daniel 2586: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 2587: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2588:
1.140 daniel 2589: state = ctxt->instate;
1.97 daniel 2590: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2591: SHRINK;
1.40 daniel 2592: SKIP(4);
1.135 daniel 2593: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2594: if (buf == NULL) {
1.241 veillard 2595: xmlGenericError(xmlGenericErrorContext,
2596: "malloc of %d byte failed\n", size);
1.140 daniel 2597: ctxt->instate = state;
1.135 daniel 2598: return;
2599: }
1.152 daniel 2600: q = CUR_CHAR(ql);
2601: NEXTL(ql);
2602: r = CUR_CHAR(rl);
2603: NEXTL(rl);
2604: cur = CUR_CHAR(l);
1.195 daniel 2605: len = 0;
1.223 veillard 2606: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2607: ((cur != '>') ||
2608: (r != '-') || (q != '-'))) {
1.195 daniel 2609: if ((r == '-') && (q == '-') && (len > 1)) {
1.230 veillard 2610: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.55 daniel 2611: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2612: ctxt->sax->error(ctxt->userData,
1.38 daniel 2613: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2614: ctxt->wellFormed = 0;
1.180 daniel 2615: ctxt->disableSAX = 1;
1.59 daniel 2616: }
1.152 daniel 2617: if (len + 5 >= size) {
1.135 daniel 2618: size *= 2;
1.204 veillard 2619: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2620: if (buf == NULL) {
1.241 veillard 2621: xmlGenericError(xmlGenericErrorContext,
2622: "realloc of %d byte failed\n", size);
1.140 daniel 2623: ctxt->instate = state;
1.135 daniel 2624: return;
2625: }
2626: }
1.152 daniel 2627: COPY_BUF(ql,buf,len,q);
1.135 daniel 2628: q = r;
1.152 daniel 2629: ql = rl;
1.135 daniel 2630: r = cur;
1.152 daniel 2631: rl = l;
1.223 veillard 2632:
2633: count++;
2634: if (count > 50) {
2635: GROW;
2636: count = 0;
2637: }
1.152 daniel 2638: NEXTL(l);
2639: cur = CUR_CHAR(l);
1.135 daniel 2640: if (cur == 0) {
2641: SHRINK;
2642: GROW;
1.152 daniel 2643: cur = CUR_CHAR(l);
1.135 daniel 2644: }
1.3 veillard 2645: }
1.135 daniel 2646: buf[len] = 0;
2647: if (!IS_CHAR(cur)) {
1.230 veillard 2648: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.55 daniel 2649: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2650: ctxt->sax->error(ctxt->userData,
1.135 daniel 2651: "Comment not terminated \n<!--%.50s\n", buf);
1.59 daniel 2652: ctxt->wellFormed = 0;
1.180 daniel 2653: ctxt->disableSAX = 1;
1.178 daniel 2654: xmlFree(buf);
1.3 veillard 2655: } else {
1.187 daniel 2656: if (input != ctxt->input) {
1.230 veillard 2657: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2659: ctxt->sax->error(ctxt->userData,
2660: "Comment doesn't start and stop in the same entity\n");
2661: ctxt->wellFormed = 0;
2662: ctxt->disableSAX = 1;
2663: }
1.40 daniel 2664: NEXT;
1.171 daniel 2665: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2666: (!ctxt->disableSAX))
1.135 daniel 2667: ctxt->sax->comment(ctxt->userData, buf);
2668: xmlFree(buf);
1.3 veillard 2669: }
1.140 daniel 2670: ctxt->instate = state;
1.3 veillard 2671: }
2672:
1.50 daniel 2673: /**
2674: * xmlParsePITarget:
2675: * @ctxt: an XML parser context
2676: *
2677: * parse the name of a PI
1.22 daniel 2678: *
2679: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2680: *
2681: * Returns the PITarget name or NULL
1.22 daniel 2682: */
2683:
1.123 daniel 2684: xmlChar *
1.55 daniel 2685: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2686: xmlChar *name;
1.22 daniel 2687:
2688: name = xmlParseName(ctxt);
1.139 daniel 2689: if ((name != NULL) &&
1.22 daniel 2690: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2691: ((name[1] == 'm') || (name[1] == 'M')) &&
2692: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 2693: int i;
1.177 daniel 2694: if ((name[0] == 'x') && (name[1] == 'm') &&
2695: (name[2] == 'l') && (name[3] == 0)) {
1.230 veillard 2696: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2698: ctxt->sax->error(ctxt->userData,
2699: "XML declaration allowed only at the start of the document\n");
2700: ctxt->wellFormed = 0;
1.180 daniel 2701: ctxt->disableSAX = 1;
1.151 daniel 2702: return(name);
2703: } else if (name[3] == 0) {
1.230 veillard 2704: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2706: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2707: ctxt->wellFormed = 0;
1.180 daniel 2708: ctxt->disableSAX = 1;
1.151 daniel 2709: return(name);
2710: }
1.139 daniel 2711: for (i = 0;;i++) {
2712: if (xmlW3CPIs[i] == NULL) break;
1.236 veillard 2713: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
1.139 daniel 2714: return(name);
2715: }
2716: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
1.230 veillard 2717: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.139 daniel 2718: ctxt->sax->warning(ctxt->userData,
1.122 daniel 2719: "xmlParsePItarget: invalid name prefix 'xml'\n");
2720: }
1.22 daniel 2721: }
2722: return(name);
2723: }
2724:
1.50 daniel 2725: /**
2726: * xmlParsePI:
2727: * @ctxt: an XML parser context
2728: *
2729: * parse an XML Processing Instruction.
1.22 daniel 2730: *
2731: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2732: *
1.69 daniel 2733: * The processing is transfered to SAX once parsed.
1.3 veillard 2734: */
2735:
1.55 daniel 2736: void
2737: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 2738: xmlChar *buf = NULL;
2739: int len = 0;
1.140 daniel 2740: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2741: int cur, l;
1.123 daniel 2742: xmlChar *target;
1.140 daniel 2743: xmlParserInputState state;
1.223 veillard 2744: int count = 0;
1.22 daniel 2745:
1.152 daniel 2746: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 2747: xmlParserInputPtr input = ctxt->input;
1.140 daniel 2748: state = ctxt->instate;
2749: ctxt->instate = XML_PARSER_PI;
1.3 veillard 2750: /*
2751: * this is a Processing Instruction.
2752: */
1.40 daniel 2753: SKIP(2);
1.91 daniel 2754: SHRINK;
1.3 veillard 2755:
2756: /*
1.22 daniel 2757: * Parse the target name and check for special support like
2758: * namespace.
1.3 veillard 2759: */
1.22 daniel 2760: target = xmlParsePITarget(ctxt);
2761: if (target != NULL) {
1.156 daniel 2762: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 2763: if (input != ctxt->input) {
1.230 veillard 2764: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2766: ctxt->sax->error(ctxt->userData,
2767: "PI declaration doesn't start and stop in the same entity\n");
2768: ctxt->wellFormed = 0;
2769: ctxt->disableSAX = 1;
2770: }
1.156 daniel 2771: SKIP(2);
2772:
2773: /*
2774: * SAX: PI detected.
2775: */
1.171 daniel 2776: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 2777: (ctxt->sax->processingInstruction != NULL))
2778: ctxt->sax->processingInstruction(ctxt->userData,
2779: target, NULL);
2780: ctxt->instate = state;
1.170 daniel 2781: xmlFree(target);
1.156 daniel 2782: return;
2783: }
1.135 daniel 2784: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2785: if (buf == NULL) {
1.241 veillard 2786: xmlGenericError(xmlGenericErrorContext,
2787: "malloc of %d byte failed\n", size);
1.140 daniel 2788: ctxt->instate = state;
1.135 daniel 2789: return;
2790: }
2791: cur = CUR;
2792: if (!IS_BLANK(cur)) {
1.230 veillard 2793: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2795: ctxt->sax->error(ctxt->userData,
2796: "xmlParsePI: PI %s space expected\n", target);
2797: ctxt->wellFormed = 0;
1.180 daniel 2798: ctxt->disableSAX = 1;
1.114 daniel 2799: }
2800: SKIP_BLANKS;
1.152 daniel 2801: cur = CUR_CHAR(l);
1.223 veillard 2802: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2803: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 2804: if (len + 5 >= size) {
1.135 daniel 2805: size *= 2;
1.204 veillard 2806: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2807: if (buf == NULL) {
1.241 veillard 2808: xmlGenericError(xmlGenericErrorContext,
2809: "realloc of %d byte failed\n", size);
1.140 daniel 2810: ctxt->instate = state;
1.135 daniel 2811: return;
2812: }
1.223 veillard 2813: }
2814: count++;
2815: if (count > 50) {
2816: GROW;
2817: count = 0;
1.135 daniel 2818: }
1.152 daniel 2819: COPY_BUF(l,buf,len,cur);
2820: NEXTL(l);
2821: cur = CUR_CHAR(l);
1.135 daniel 2822: if (cur == 0) {
2823: SHRINK;
2824: GROW;
1.152 daniel 2825: cur = CUR_CHAR(l);
1.135 daniel 2826: }
2827: }
2828: buf[len] = 0;
1.152 daniel 2829: if (cur != '?') {
1.230 veillard 2830: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2832: ctxt->sax->error(ctxt->userData,
1.72 daniel 2833: "xmlParsePI: PI %s never end ...\n", target);
2834: ctxt->wellFormed = 0;
1.180 daniel 2835: ctxt->disableSAX = 1;
1.22 daniel 2836: } else {
1.187 daniel 2837: if (input != ctxt->input) {
1.230 veillard 2838: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2840: ctxt->sax->error(ctxt->userData,
2841: "PI declaration doesn't start and stop in the same entity\n");
2842: ctxt->wellFormed = 0;
2843: ctxt->disableSAX = 1;
2844: }
1.72 daniel 2845: SKIP(2);
1.44 daniel 2846:
1.72 daniel 2847: /*
2848: * SAX: PI detected.
2849: */
1.171 daniel 2850: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 2851: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2852: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 2853: target, buf);
1.22 daniel 2854: }
1.135 daniel 2855: xmlFree(buf);
1.119 daniel 2856: xmlFree(target);
1.3 veillard 2857: } else {
1.230 veillard 2858: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.55 daniel 2859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2860: ctxt->sax->error(ctxt->userData,
2861: "xmlParsePI : no target name\n");
1.59 daniel 2862: ctxt->wellFormed = 0;
1.180 daniel 2863: ctxt->disableSAX = 1;
1.22 daniel 2864: }
1.140 daniel 2865: ctxt->instate = state;
1.22 daniel 2866: }
2867: }
2868:
1.50 daniel 2869: /**
2870: * xmlParseNotationDecl:
2871: * @ctxt: an XML parser context
2872: *
2873: * parse a notation declaration
1.22 daniel 2874: *
2875: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2876: *
2877: * Hence there is actually 3 choices:
2878: * 'PUBLIC' S PubidLiteral
2879: * 'PUBLIC' S PubidLiteral S SystemLiteral
2880: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2881: *
1.67 daniel 2882: * See the NOTE on xmlParseExternalID().
1.22 daniel 2883: */
2884:
1.55 daniel 2885: void
2886: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2887: xmlChar *name;
2888: xmlChar *Pubid;
2889: xmlChar *Systemid;
1.22 daniel 2890:
1.152 daniel 2891: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2892: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2893: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2894: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2895: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 2896: xmlParserInputPtr input = ctxt->input;
1.91 daniel 2897: SHRINK;
1.40 daniel 2898: SKIP(10);
1.67 daniel 2899: if (!IS_BLANK(CUR)) {
1.230 veillard 2900: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2902: ctxt->sax->error(ctxt->userData,
2903: "Space required after '<!NOTATION'\n");
1.67 daniel 2904: ctxt->wellFormed = 0;
1.180 daniel 2905: ctxt->disableSAX = 1;
1.67 daniel 2906: return;
2907: }
2908: SKIP_BLANKS;
1.22 daniel 2909:
2910: name = xmlParseName(ctxt);
2911: if (name == NULL) {
1.230 veillard 2912: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.55 daniel 2913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2914: ctxt->sax->error(ctxt->userData,
2915: "NOTATION: Name expected here\n");
1.67 daniel 2916: ctxt->wellFormed = 0;
1.180 daniel 2917: ctxt->disableSAX = 1;
1.67 daniel 2918: return;
2919: }
2920: if (!IS_BLANK(CUR)) {
1.230 veillard 2921: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2922: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2923: ctxt->sax->error(ctxt->userData,
1.67 daniel 2924: "Space required after the NOTATION name'\n");
1.59 daniel 2925: ctxt->wellFormed = 0;
1.180 daniel 2926: ctxt->disableSAX = 1;
1.22 daniel 2927: return;
2928: }
1.42 daniel 2929: SKIP_BLANKS;
1.67 daniel 2930:
1.22 daniel 2931: /*
1.67 daniel 2932: * Parse the IDs.
1.22 daniel 2933: */
1.160 daniel 2934: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 2935: SKIP_BLANKS;
2936:
1.152 daniel 2937: if (RAW == '>') {
1.187 daniel 2938: if (input != ctxt->input) {
1.230 veillard 2939: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2941: ctxt->sax->error(ctxt->userData,
2942: "Notation declaration doesn't start and stop in the same entity\n");
2943: ctxt->wellFormed = 0;
2944: ctxt->disableSAX = 1;
2945: }
1.40 daniel 2946: NEXT;
1.171 daniel 2947: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2948: (ctxt->sax->notationDecl != NULL))
1.74 daniel 2949: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2950: } else {
1.230 veillard 2951: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 2952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2953: ctxt->sax->error(ctxt->userData,
1.67 daniel 2954: "'>' required to close NOTATION declaration\n");
2955: ctxt->wellFormed = 0;
1.180 daniel 2956: ctxt->disableSAX = 1;
1.67 daniel 2957: }
1.119 daniel 2958: xmlFree(name);
2959: if (Systemid != NULL) xmlFree(Systemid);
2960: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 2961: }
2962: }
2963:
1.50 daniel 2964: /**
2965: * xmlParseEntityDecl:
2966: * @ctxt: an XML parser context
2967: *
2968: * parse <!ENTITY declarations
1.22 daniel 2969: *
2970: * [70] EntityDecl ::= GEDecl | PEDecl
2971: *
2972: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2973: *
2974: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2975: *
2976: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2977: *
2978: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2979: *
2980: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2981: *
2982: * [ VC: Notation Declared ]
1.116 daniel 2983: * The Name must match the declared name of a notation.
1.22 daniel 2984: */
2985:
1.55 daniel 2986: void
2987: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2988: xmlChar *name = NULL;
2989: xmlChar *value = NULL;
2990: xmlChar *URI = NULL, *literal = NULL;
2991: xmlChar *ndata = NULL;
1.39 daniel 2992: int isParameter = 0;
1.123 daniel 2993: xmlChar *orig = NULL;
1.22 daniel 2994:
1.94 daniel 2995: GROW;
1.152 daniel 2996: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2997: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2998: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2999: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 3000: xmlParserInputPtr input = ctxt->input;
1.96 daniel 3001: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 3002: SHRINK;
1.40 daniel 3003: SKIP(8);
1.59 daniel 3004: if (!IS_BLANK(CUR)) {
1.230 veillard 3005: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3007: ctxt->sax->error(ctxt->userData,
3008: "Space required after '<!ENTITY'\n");
1.59 daniel 3009: ctxt->wellFormed = 0;
1.180 daniel 3010: ctxt->disableSAX = 1;
1.59 daniel 3011: }
3012: SKIP_BLANKS;
1.40 daniel 3013:
1.152 daniel 3014: if (RAW == '%') {
1.40 daniel 3015: NEXT;
1.59 daniel 3016: if (!IS_BLANK(CUR)) {
1.230 veillard 3017: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3019: ctxt->sax->error(ctxt->userData,
3020: "Space required after '%'\n");
1.59 daniel 3021: ctxt->wellFormed = 0;
1.180 daniel 3022: ctxt->disableSAX = 1;
1.59 daniel 3023: }
1.42 daniel 3024: SKIP_BLANKS;
1.39 daniel 3025: isParameter = 1;
1.22 daniel 3026: }
3027:
3028: name = xmlParseName(ctxt);
1.24 daniel 3029: if (name == NULL) {
1.230 veillard 3030: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3032: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3033: ctxt->wellFormed = 0;
1.180 daniel 3034: ctxt->disableSAX = 1;
1.24 daniel 3035: return;
3036: }
1.59 daniel 3037: if (!IS_BLANK(CUR)) {
1.230 veillard 3038: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3040: ctxt->sax->error(ctxt->userData,
1.59 daniel 3041: "Space required after the entity name\n");
3042: ctxt->wellFormed = 0;
1.180 daniel 3043: ctxt->disableSAX = 1;
1.59 daniel 3044: }
1.42 daniel 3045: SKIP_BLANKS;
1.24 daniel 3046:
1.22 daniel 3047: /*
1.68 daniel 3048: * handle the various case of definitions...
1.22 daniel 3049: */
1.39 daniel 3050: if (isParameter) {
1.225 veillard 3051: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3052: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3053: if (value) {
1.171 daniel 3054: if ((ctxt->sax != NULL) &&
3055: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3056: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3057: XML_INTERNAL_PARAMETER_ENTITY,
3058: NULL, NULL, value);
3059: }
1.225 veillard 3060: } else {
1.67 daniel 3061: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3062: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3063: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3064: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3065: ctxt->sax->error(ctxt->userData,
3066: "Entity value required\n");
3067: ctxt->wellFormed = 0;
1.180 daniel 3068: ctxt->disableSAX = 1;
1.169 daniel 3069: }
1.39 daniel 3070: if (URI) {
1.193 daniel 3071: xmlURIPtr uri;
3072:
3073: uri = xmlParseURI((const char *) URI);
3074: if (uri == NULL) {
1.230 veillard 3075: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3076: if ((ctxt->sax != NULL) &&
3077: (!ctxt->disableSAX) &&
3078: (ctxt->sax->error != NULL))
3079: ctxt->sax->error(ctxt->userData,
3080: "Invalid URI: %s\n", URI);
3081: ctxt->wellFormed = 0;
3082: } else {
3083: if (uri->fragment != NULL) {
1.230 veillard 3084: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3085: if ((ctxt->sax != NULL) &&
3086: (!ctxt->disableSAX) &&
3087: (ctxt->sax->error != NULL))
3088: ctxt->sax->error(ctxt->userData,
3089: "Fragment not allowed: %s\n", URI);
3090: ctxt->wellFormed = 0;
3091: } else {
3092: if ((ctxt->sax != NULL) &&
3093: (!ctxt->disableSAX) &&
3094: (ctxt->sax->entityDecl != NULL))
3095: ctxt->sax->entityDecl(ctxt->userData, name,
3096: XML_EXTERNAL_PARAMETER_ENTITY,
3097: literal, URI, NULL);
3098: }
3099: xmlFreeURI(uri);
3100: }
1.39 daniel 3101: }
1.24 daniel 3102: }
3103: } else {
1.152 daniel 3104: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3105: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 3106: if ((ctxt->sax != NULL) &&
3107: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3108: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3109: XML_INTERNAL_GENERAL_ENTITY,
3110: NULL, NULL, value);
3111: } else {
1.67 daniel 3112: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3113: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3114: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3116: ctxt->sax->error(ctxt->userData,
3117: "Entity value required\n");
3118: ctxt->wellFormed = 0;
1.180 daniel 3119: ctxt->disableSAX = 1;
1.169 daniel 3120: }
1.193 daniel 3121: if (URI) {
3122: xmlURIPtr uri;
3123:
3124: uri = xmlParseURI((const char *)URI);
3125: if (uri == NULL) {
1.230 veillard 3126: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3127: if ((ctxt->sax != NULL) &&
3128: (!ctxt->disableSAX) &&
3129: (ctxt->sax->error != NULL))
3130: ctxt->sax->error(ctxt->userData,
3131: "Invalid URI: %s\n", URI);
3132: ctxt->wellFormed = 0;
3133: } else {
3134: if (uri->fragment != NULL) {
1.230 veillard 3135: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3136: if ((ctxt->sax != NULL) &&
3137: (!ctxt->disableSAX) &&
3138: (ctxt->sax->error != NULL))
3139: ctxt->sax->error(ctxt->userData,
3140: "Fragment not allowed: %s\n", URI);
3141: ctxt->wellFormed = 0;
3142: }
3143: xmlFreeURI(uri);
3144: }
3145: }
1.152 daniel 3146: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.230 veillard 3147: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3149: ctxt->sax->error(ctxt->userData,
1.59 daniel 3150: "Space required before 'NDATA'\n");
3151: ctxt->wellFormed = 0;
1.180 daniel 3152: ctxt->disableSAX = 1;
1.59 daniel 3153: }
1.42 daniel 3154: SKIP_BLANKS;
1.152 daniel 3155: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 3156: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3157: (NXT(4) == 'A')) {
3158: SKIP(5);
1.59 daniel 3159: if (!IS_BLANK(CUR)) {
1.230 veillard 3160: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3162: ctxt->sax->error(ctxt->userData,
1.59 daniel 3163: "Space required after 'NDATA'\n");
3164: ctxt->wellFormed = 0;
1.180 daniel 3165: ctxt->disableSAX = 1;
1.59 daniel 3166: }
1.42 daniel 3167: SKIP_BLANKS;
1.24 daniel 3168: ndata = xmlParseName(ctxt);
1.171 daniel 3169: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 3170: (ctxt->sax->unparsedEntityDecl != NULL))
3171: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3172: literal, URI, ndata);
3173: } else {
1.171 daniel 3174: if ((ctxt->sax != NULL) &&
3175: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3176: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3177: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3178: literal, URI, NULL);
1.24 daniel 3179: }
3180: }
3181: }
1.42 daniel 3182: SKIP_BLANKS;
1.152 daniel 3183: if (RAW != '>') {
1.230 veillard 3184: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3186: ctxt->sax->error(ctxt->userData,
1.31 daniel 3187: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3188: ctxt->wellFormed = 0;
1.180 daniel 3189: ctxt->disableSAX = 1;
1.187 daniel 3190: } else {
3191: if (input != ctxt->input) {
1.230 veillard 3192: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3194: ctxt->sax->error(ctxt->userData,
3195: "Entity declaration doesn't start and stop in the same entity\n");
3196: ctxt->wellFormed = 0;
3197: ctxt->disableSAX = 1;
3198: }
1.40 daniel 3199: NEXT;
1.187 daniel 3200: }
1.78 daniel 3201: if (orig != NULL) {
3202: /*
1.98 daniel 3203: * Ugly mechanism to save the raw entity value.
1.78 daniel 3204: */
3205: xmlEntityPtr cur = NULL;
3206:
1.98 daniel 3207: if (isParameter) {
3208: if ((ctxt->sax != NULL) &&
3209: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3210: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3211: } else {
3212: if ((ctxt->sax != NULL) &&
3213: (ctxt->sax->getEntity != NULL))
1.120 daniel 3214: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3215: }
3216: if (cur != NULL) {
3217: if (cur->orig != NULL)
1.119 daniel 3218: xmlFree(orig);
1.98 daniel 3219: else
3220: cur->orig = orig;
3221: } else
1.119 daniel 3222: xmlFree(orig);
1.78 daniel 3223: }
1.119 daniel 3224: if (name != NULL) xmlFree(name);
3225: if (value != NULL) xmlFree(value);
3226: if (URI != NULL) xmlFree(URI);
3227: if (literal != NULL) xmlFree(literal);
3228: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3229: }
3230: }
3231:
1.50 daniel 3232: /**
1.59 daniel 3233: * xmlParseDefaultDecl:
3234: * @ctxt: an XML parser context
3235: * @value: Receive a possible fixed default value for the attribute
3236: *
3237: * Parse an attribute default declaration
3238: *
3239: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3240: *
1.99 daniel 3241: * [ VC: Required Attribute ]
1.117 daniel 3242: * if the default declaration is the keyword #REQUIRED, then the
3243: * attribute must be specified for all elements of the type in the
3244: * attribute-list declaration.
1.99 daniel 3245: *
3246: * [ VC: Attribute Default Legal ]
1.102 daniel 3247: * The declared default value must meet the lexical constraints of
3248: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3249: *
3250: * [ VC: Fixed Attribute Default ]
1.117 daniel 3251: * if an attribute has a default value declared with the #FIXED
3252: * keyword, instances of that attribute must match the default value.
1.99 daniel 3253: *
3254: * [ WFC: No < in Attribute Values ]
3255: * handled in xmlParseAttValue()
3256: *
1.59 daniel 3257: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3258: * or XML_ATTRIBUTE_FIXED.
3259: */
3260:
3261: int
1.123 daniel 3262: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3263: int val;
1.123 daniel 3264: xmlChar *ret;
1.59 daniel 3265:
3266: *value = NULL;
1.152 daniel 3267: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 3268: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3269: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3270: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3271: (NXT(8) == 'D')) {
3272: SKIP(9);
3273: return(XML_ATTRIBUTE_REQUIRED);
3274: }
1.152 daniel 3275: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 3276: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3277: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3278: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3279: SKIP(8);
3280: return(XML_ATTRIBUTE_IMPLIED);
3281: }
3282: val = XML_ATTRIBUTE_NONE;
1.152 daniel 3283: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 3284: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3285: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3286: SKIP(6);
3287: val = XML_ATTRIBUTE_FIXED;
3288: if (!IS_BLANK(CUR)) {
1.230 veillard 3289: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3291: ctxt->sax->error(ctxt->userData,
3292: "Space required after '#FIXED'\n");
1.59 daniel 3293: ctxt->wellFormed = 0;
1.180 daniel 3294: ctxt->disableSAX = 1;
1.59 daniel 3295: }
3296: SKIP_BLANKS;
3297: }
3298: ret = xmlParseAttValue(ctxt);
1.96 daniel 3299: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3300: if (ret == NULL) {
3301: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3302: ctxt->sax->error(ctxt->userData,
1.59 daniel 3303: "Attribute default value declaration error\n");
3304: ctxt->wellFormed = 0;
1.180 daniel 3305: ctxt->disableSAX = 1;
1.59 daniel 3306: } else
3307: *value = ret;
3308: return(val);
3309: }
3310:
3311: /**
1.66 daniel 3312: * xmlParseNotationType:
3313: * @ctxt: an XML parser context
3314: *
3315: * parse an Notation attribute type.
3316: *
1.99 daniel 3317: * Note: the leading 'NOTATION' S part has already being parsed...
3318: *
1.66 daniel 3319: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3320: *
1.99 daniel 3321: * [ VC: Notation Attributes ]
1.117 daniel 3322: * Values of this type must match one of the notation names included
1.99 daniel 3323: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3324: *
3325: * Returns: the notation attribute tree built while parsing
3326: */
3327:
3328: xmlEnumerationPtr
3329: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3330: xmlChar *name;
1.66 daniel 3331: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3332:
1.152 daniel 3333: if (RAW != '(') {
1.230 veillard 3334: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3336: ctxt->sax->error(ctxt->userData,
3337: "'(' required to start 'NOTATION'\n");
1.66 daniel 3338: ctxt->wellFormed = 0;
1.180 daniel 3339: ctxt->disableSAX = 1;
1.66 daniel 3340: return(NULL);
3341: }
1.91 daniel 3342: SHRINK;
1.66 daniel 3343: do {
3344: NEXT;
3345: SKIP_BLANKS;
3346: name = xmlParseName(ctxt);
3347: if (name == NULL) {
1.230 veillard 3348: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3349: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3350: ctxt->sax->error(ctxt->userData,
1.66 daniel 3351: "Name expected in NOTATION declaration\n");
3352: ctxt->wellFormed = 0;
1.180 daniel 3353: ctxt->disableSAX = 1;
1.66 daniel 3354: return(ret);
3355: }
3356: cur = xmlCreateEnumeration(name);
1.119 daniel 3357: xmlFree(name);
1.66 daniel 3358: if (cur == NULL) return(ret);
3359: if (last == NULL) ret = last = cur;
3360: else {
3361: last->next = cur;
3362: last = cur;
3363: }
3364: SKIP_BLANKS;
1.152 daniel 3365: } while (RAW == '|');
3366: if (RAW != ')') {
1.230 veillard 3367: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3368: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3369: ctxt->sax->error(ctxt->userData,
1.66 daniel 3370: "')' required to finish NOTATION declaration\n");
3371: ctxt->wellFormed = 0;
1.180 daniel 3372: ctxt->disableSAX = 1;
1.170 daniel 3373: if ((last != NULL) && (last != ret))
3374: xmlFreeEnumeration(last);
1.66 daniel 3375: return(ret);
3376: }
3377: NEXT;
3378: return(ret);
3379: }
3380:
3381: /**
3382: * xmlParseEnumerationType:
3383: * @ctxt: an XML parser context
3384: *
3385: * parse an Enumeration attribute type.
3386: *
3387: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3388: *
1.99 daniel 3389: * [ VC: Enumeration ]
1.117 daniel 3390: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3391: * the declaration
3392: *
1.66 daniel 3393: * Returns: the enumeration attribute tree built while parsing
3394: */
3395:
3396: xmlEnumerationPtr
3397: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3398: xmlChar *name;
1.66 daniel 3399: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3400:
1.152 daniel 3401: if (RAW != '(') {
1.230 veillard 3402: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3404: ctxt->sax->error(ctxt->userData,
1.66 daniel 3405: "'(' required to start ATTLIST enumeration\n");
3406: ctxt->wellFormed = 0;
1.180 daniel 3407: ctxt->disableSAX = 1;
1.66 daniel 3408: return(NULL);
3409: }
1.91 daniel 3410: SHRINK;
1.66 daniel 3411: do {
3412: NEXT;
3413: SKIP_BLANKS;
3414: name = xmlParseNmtoken(ctxt);
3415: if (name == NULL) {
1.230 veillard 3416: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3417: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3418: ctxt->sax->error(ctxt->userData,
1.66 daniel 3419: "NmToken expected in ATTLIST enumeration\n");
3420: ctxt->wellFormed = 0;
1.180 daniel 3421: ctxt->disableSAX = 1;
1.66 daniel 3422: return(ret);
3423: }
3424: cur = xmlCreateEnumeration(name);
1.119 daniel 3425: xmlFree(name);
1.66 daniel 3426: if (cur == NULL) return(ret);
3427: if (last == NULL) ret = last = cur;
3428: else {
3429: last->next = cur;
3430: last = cur;
3431: }
3432: SKIP_BLANKS;
1.152 daniel 3433: } while (RAW == '|');
3434: if (RAW != ')') {
1.230 veillard 3435: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3437: ctxt->sax->error(ctxt->userData,
1.66 daniel 3438: "')' required to finish ATTLIST enumeration\n");
3439: ctxt->wellFormed = 0;
1.180 daniel 3440: ctxt->disableSAX = 1;
1.66 daniel 3441: return(ret);
3442: }
3443: NEXT;
3444: return(ret);
3445: }
3446:
3447: /**
1.50 daniel 3448: * xmlParseEnumeratedType:
3449: * @ctxt: an XML parser context
1.66 daniel 3450: * @tree: the enumeration tree built while parsing
1.50 daniel 3451: *
1.66 daniel 3452: * parse an Enumerated attribute type.
1.22 daniel 3453: *
3454: * [57] EnumeratedType ::= NotationType | Enumeration
3455: *
3456: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3457: *
1.50 daniel 3458: *
1.66 daniel 3459: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3460: */
3461:
1.66 daniel 3462: int
3463: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 3464: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 3465: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3466: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3467: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3468: SKIP(8);
3469: if (!IS_BLANK(CUR)) {
1.230 veillard 3470: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3472: ctxt->sax->error(ctxt->userData,
3473: "Space required after 'NOTATION'\n");
1.66 daniel 3474: ctxt->wellFormed = 0;
1.180 daniel 3475: ctxt->disableSAX = 1;
1.66 daniel 3476: return(0);
3477: }
3478: SKIP_BLANKS;
3479: *tree = xmlParseNotationType(ctxt);
3480: if (*tree == NULL) return(0);
3481: return(XML_ATTRIBUTE_NOTATION);
3482: }
3483: *tree = xmlParseEnumerationType(ctxt);
3484: if (*tree == NULL) return(0);
3485: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3486: }
3487:
1.50 daniel 3488: /**
3489: * xmlParseAttributeType:
3490: * @ctxt: an XML parser context
1.66 daniel 3491: * @tree: the enumeration tree built while parsing
1.50 daniel 3492: *
1.59 daniel 3493: * parse the Attribute list def for an element
1.22 daniel 3494: *
3495: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3496: *
3497: * [55] StringType ::= 'CDATA'
3498: *
3499: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3500: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3501: *
1.102 daniel 3502: * Validity constraints for attribute values syntax are checked in
3503: * xmlValidateAttributeValue()
3504: *
1.99 daniel 3505: * [ VC: ID ]
1.117 daniel 3506: * Values of type ID must match the Name production. A name must not
1.99 daniel 3507: * appear more than once in an XML document as a value of this type;
3508: * i.e., ID values must uniquely identify the elements which bear them.
3509: *
3510: * [ VC: One ID per Element Type ]
1.117 daniel 3511: * No element type may have more than one ID attribute specified.
1.99 daniel 3512: *
3513: * [ VC: ID Attribute Default ]
1.117 daniel 3514: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3515: *
3516: * [ VC: IDREF ]
1.102 daniel 3517: * Values of type IDREF must match the Name production, and values
1.140 daniel 3518: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 3519: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3520: * values must match the value of some ID attribute.
3521: *
3522: * [ VC: Entity Name ]
1.102 daniel 3523: * Values of type ENTITY must match the Name production, values
1.140 daniel 3524: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 3525: * name of an unparsed entity declared in the DTD.
1.99 daniel 3526: *
3527: * [ VC: Name Token ]
1.102 daniel 3528: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3529: * of type NMTOKENS must match Nmtokens.
3530: *
1.69 daniel 3531: * Returns the attribute type
1.22 daniel 3532: */
1.59 daniel 3533: int
1.66 daniel 3534: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3535: SHRINK;
1.152 daniel 3536: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 3537: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3538: (NXT(4) == 'A')) {
3539: SKIP(5);
1.66 daniel 3540: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 3541: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 3542: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3543: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3544: SKIP(6);
3545: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 3546: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 3547: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3548: (NXT(4) == 'F')) {
3549: SKIP(5);
1.59 daniel 3550: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 3551: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 3552: SKIP(2);
3553: return(XML_ATTRIBUTE_ID);
1.152 daniel 3554: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3555: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3556: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3557: SKIP(6);
1.59 daniel 3558: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 3559: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3560: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3561: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3562: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3563: SKIP(8);
1.59 daniel 3564: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 3565: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 3566: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3567: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3568: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3569: SKIP(8);
3570: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 3571: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 3572: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3573: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3574: (NXT(6) == 'N')) {
3575: SKIP(7);
1.59 daniel 3576: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3577: }
1.66 daniel 3578: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3579: }
3580:
1.50 daniel 3581: /**
3582: * xmlParseAttributeListDecl:
3583: * @ctxt: an XML parser context
3584: *
3585: * : parse the Attribute list def for an element
1.22 daniel 3586: *
3587: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3588: *
3589: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3590: *
1.22 daniel 3591: */
1.55 daniel 3592: void
3593: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3594: xmlChar *elemName;
3595: xmlChar *attrName;
1.103 daniel 3596: xmlEnumerationPtr tree;
1.22 daniel 3597:
1.152 daniel 3598: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3599: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3600: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3601: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3602: (NXT(8) == 'T')) {
1.187 daniel 3603: xmlParserInputPtr input = ctxt->input;
3604:
1.40 daniel 3605: SKIP(9);
1.59 daniel 3606: if (!IS_BLANK(CUR)) {
1.230 veillard 3607: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3609: ctxt->sax->error(ctxt->userData,
3610: "Space required after '<!ATTLIST'\n");
1.59 daniel 3611: ctxt->wellFormed = 0;
1.180 daniel 3612: ctxt->disableSAX = 1;
1.59 daniel 3613: }
1.42 daniel 3614: SKIP_BLANKS;
1.59 daniel 3615: elemName = xmlParseName(ctxt);
3616: if (elemName == NULL) {
1.230 veillard 3617: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3619: ctxt->sax->error(ctxt->userData,
3620: "ATTLIST: no name for Element\n");
1.59 daniel 3621: ctxt->wellFormed = 0;
1.180 daniel 3622: ctxt->disableSAX = 1;
1.22 daniel 3623: return;
3624: }
1.42 daniel 3625: SKIP_BLANKS;
1.220 veillard 3626: GROW;
1.152 daniel 3627: while (RAW != '>') {
1.123 daniel 3628: const xmlChar *check = CUR_PTR;
1.59 daniel 3629: int type;
3630: int def;
1.123 daniel 3631: xmlChar *defaultValue = NULL;
1.59 daniel 3632:
1.220 veillard 3633: GROW;
1.103 daniel 3634: tree = NULL;
1.59 daniel 3635: attrName = xmlParseName(ctxt);
3636: if (attrName == NULL) {
1.230 veillard 3637: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3638: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3639: ctxt->sax->error(ctxt->userData,
3640: "ATTLIST: no name for Attribute\n");
1.59 daniel 3641: ctxt->wellFormed = 0;
1.180 daniel 3642: ctxt->disableSAX = 1;
1.59 daniel 3643: break;
3644: }
1.97 daniel 3645: GROW;
1.59 daniel 3646: if (!IS_BLANK(CUR)) {
1.230 veillard 3647: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3648: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3649: ctxt->sax->error(ctxt->userData,
1.59 daniel 3650: "Space required after the attribute name\n");
3651: ctxt->wellFormed = 0;
1.180 daniel 3652: ctxt->disableSAX = 1;
1.170 daniel 3653: if (attrName != NULL)
3654: xmlFree(attrName);
3655: if (defaultValue != NULL)
3656: xmlFree(defaultValue);
1.59 daniel 3657: break;
3658: }
3659: SKIP_BLANKS;
3660:
1.66 daniel 3661: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 3662: if (type <= 0) {
3663: if (attrName != NULL)
3664: xmlFree(attrName);
3665: if (defaultValue != NULL)
3666: xmlFree(defaultValue);
3667: break;
3668: }
1.22 daniel 3669:
1.97 daniel 3670: GROW;
1.59 daniel 3671: if (!IS_BLANK(CUR)) {
1.230 veillard 3672: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3674: ctxt->sax->error(ctxt->userData,
1.59 daniel 3675: "Space required after the attribute type\n");
3676: ctxt->wellFormed = 0;
1.180 daniel 3677: ctxt->disableSAX = 1;
1.170 daniel 3678: if (attrName != NULL)
3679: xmlFree(attrName);
3680: if (defaultValue != NULL)
3681: xmlFree(defaultValue);
3682: if (tree != NULL)
3683: xmlFreeEnumeration(tree);
1.59 daniel 3684: break;
3685: }
1.42 daniel 3686: SKIP_BLANKS;
1.59 daniel 3687:
3688: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 3689: if (def <= 0) {
3690: if (attrName != NULL)
3691: xmlFree(attrName);
3692: if (defaultValue != NULL)
3693: xmlFree(defaultValue);
3694: if (tree != NULL)
3695: xmlFreeEnumeration(tree);
3696: break;
3697: }
1.59 daniel 3698:
1.97 daniel 3699: GROW;
1.152 daniel 3700: if (RAW != '>') {
1.59 daniel 3701: if (!IS_BLANK(CUR)) {
1.230 veillard 3702: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3704: ctxt->sax->error(ctxt->userData,
1.59 daniel 3705: "Space required after the attribute default value\n");
3706: ctxt->wellFormed = 0;
1.180 daniel 3707: ctxt->disableSAX = 1;
1.170 daniel 3708: if (attrName != NULL)
3709: xmlFree(attrName);
3710: if (defaultValue != NULL)
3711: xmlFree(defaultValue);
3712: if (tree != NULL)
3713: xmlFreeEnumeration(tree);
1.59 daniel 3714: break;
3715: }
3716: SKIP_BLANKS;
3717: }
1.40 daniel 3718: if (check == CUR_PTR) {
1.230 veillard 3719: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3721: ctxt->sax->error(ctxt->userData,
1.59 daniel 3722: "xmlParseAttributeListDecl: detected internal error\n");
1.170 daniel 3723: if (attrName != NULL)
3724: xmlFree(attrName);
3725: if (defaultValue != NULL)
3726: xmlFree(defaultValue);
3727: if (tree != NULL)
3728: xmlFreeEnumeration(tree);
1.22 daniel 3729: break;
3730: }
1.171 daniel 3731: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3732: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3733: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3734: type, def, defaultValue, tree);
1.59 daniel 3735: if (attrName != NULL)
1.119 daniel 3736: xmlFree(attrName);
1.59 daniel 3737: if (defaultValue != NULL)
1.119 daniel 3738: xmlFree(defaultValue);
1.97 daniel 3739: GROW;
1.22 daniel 3740: }
1.187 daniel 3741: if (RAW == '>') {
3742: if (input != ctxt->input) {
1.230 veillard 3743: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3745: ctxt->sax->error(ctxt->userData,
3746: "Attribute list declaration doesn't start and stop in the same entity\n");
3747: ctxt->wellFormed = 0;
3748: ctxt->disableSAX = 1;
3749: }
1.40 daniel 3750: NEXT;
1.187 daniel 3751: }
1.22 daniel 3752:
1.119 daniel 3753: xmlFree(elemName);
1.22 daniel 3754: }
3755: }
3756:
1.50 daniel 3757: /**
1.61 daniel 3758: * xmlParseElementMixedContentDecl:
3759: * @ctxt: an XML parser context
3760: *
3761: * parse the declaration for a Mixed Element content
3762: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3763: *
3764: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3765: * '(' S? '#PCDATA' S? ')'
3766: *
1.99 daniel 3767: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3768: *
3769: * [ VC: No Duplicate Types ]
1.117 daniel 3770: * The same name must not appear more than once in a single
3771: * mixed-content declaration.
1.99 daniel 3772: *
1.61 daniel 3773: * returns: the list of the xmlElementContentPtr describing the element choices
3774: */
3775: xmlElementContentPtr
1.62 daniel 3776: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3777: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3778: xmlChar *elem = NULL;
1.61 daniel 3779:
1.97 daniel 3780: GROW;
1.152 daniel 3781: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 3782: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3783: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3784: (NXT(6) == 'A')) {
3785: SKIP(7);
3786: SKIP_BLANKS;
1.91 daniel 3787: SHRINK;
1.152 daniel 3788: if (RAW == ')') {
1.187 daniel 3789: ctxt->entity = ctxt->input;
1.63 daniel 3790: NEXT;
3791: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 3792: if (RAW == '*') {
1.136 daniel 3793: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3794: NEXT;
3795: }
1.63 daniel 3796: return(ret);
3797: }
1.152 daniel 3798: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 3799: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3800: if (ret == NULL) return(NULL);
1.99 daniel 3801: }
1.152 daniel 3802: while (RAW == '|') {
1.64 daniel 3803: NEXT;
1.61 daniel 3804: if (elem == NULL) {
3805: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3806: if (ret == NULL) return(NULL);
3807: ret->c1 = cur;
1.64 daniel 3808: cur = ret;
1.61 daniel 3809: } else {
1.64 daniel 3810: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3811: if (n == NULL) return(NULL);
3812: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3813: cur->c2 = n;
3814: cur = n;
1.119 daniel 3815: xmlFree(elem);
1.61 daniel 3816: }
3817: SKIP_BLANKS;
3818: elem = xmlParseName(ctxt);
3819: if (elem == NULL) {
1.230 veillard 3820: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3822: ctxt->sax->error(ctxt->userData,
1.61 daniel 3823: "xmlParseElementMixedContentDecl : Name expected\n");
3824: ctxt->wellFormed = 0;
1.180 daniel 3825: ctxt->disableSAX = 1;
1.61 daniel 3826: xmlFreeElementContent(cur);
3827: return(NULL);
3828: }
3829: SKIP_BLANKS;
1.97 daniel 3830: GROW;
1.61 daniel 3831: }
1.152 daniel 3832: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 3833: if (elem != NULL) {
1.61 daniel 3834: cur->c2 = xmlNewElementContent(elem,
3835: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3836: xmlFree(elem);
1.66 daniel 3837: }
1.65 daniel 3838: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 3839: ctxt->entity = ctxt->input;
1.64 daniel 3840: SKIP(2);
1.61 daniel 3841: } else {
1.119 daniel 3842: if (elem != NULL) xmlFree(elem);
1.230 veillard 3843: xmlFreeElementContent(ret);
3844: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3845: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3846: ctxt->sax->error(ctxt->userData,
1.63 daniel 3847: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3848: ctxt->wellFormed = 0;
1.180 daniel 3849: ctxt->disableSAX = 1;
1.61 daniel 3850: return(NULL);
3851: }
3852:
3853: } else {
1.230 veillard 3854: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3855: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3856: ctxt->sax->error(ctxt->userData,
1.61 daniel 3857: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3858: ctxt->wellFormed = 0;
1.180 daniel 3859: ctxt->disableSAX = 1;
1.61 daniel 3860: }
3861: return(ret);
3862: }
3863:
3864: /**
3865: * xmlParseElementChildrenContentDecl:
1.50 daniel 3866: * @ctxt: an XML parser context
3867: *
1.61 daniel 3868: * parse the declaration for a Mixed Element content
3869: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3870: *
1.61 daniel 3871: *
1.22 daniel 3872: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3873: *
3874: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3875: *
3876: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3877: *
3878: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3879: *
1.99 daniel 3880: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3881: * TODO Parameter-entity replacement text must be properly nested
3882: * with parenthetized groups. That is to say, if either of the
3883: * opening or closing parentheses in a choice, seq, or Mixed
3884: * construct is contained in the replacement text for a parameter
3885: * entity, both must be contained in the same replacement text. For
3886: * interoperability, if a parameter-entity reference appears in a
3887: * choice, seq, or Mixed construct, its replacement text should not
3888: * be empty, and neither the first nor last non-blank character of
3889: * the replacement text should be a connector (| or ,).
3890: *
1.62 daniel 3891: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3892: * hierarchy.
3893: */
3894: xmlElementContentPtr
1.246 ! veillard 3895: #ifdef VMS
! 3896: xmlParseElementChildrenContentD
! 3897: #else
! 3898: xmlParseElementChildrenContentDecl
! 3899: #endif
! 3900: (xmlParserCtxtPtr ctxt) {
1.63 daniel 3901: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 3902: xmlChar *elem;
3903: xmlChar type = 0;
1.62 daniel 3904:
3905: SKIP_BLANKS;
1.94 daniel 3906: GROW;
1.152 daniel 3907: if (RAW == '(') {
1.63 daniel 3908: /* Recurse on first child */
1.62 daniel 3909: NEXT;
3910: SKIP_BLANKS;
3911: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3912: SKIP_BLANKS;
1.101 daniel 3913: GROW;
1.62 daniel 3914: } else {
3915: elem = xmlParseName(ctxt);
3916: if (elem == NULL) {
1.230 veillard 3917: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3919: ctxt->sax->error(ctxt->userData,
1.62 daniel 3920: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3921: ctxt->wellFormed = 0;
1.180 daniel 3922: ctxt->disableSAX = 1;
1.62 daniel 3923: return(NULL);
3924: }
3925: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3926: GROW;
1.152 daniel 3927: if (RAW == '?') {
1.104 daniel 3928: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3929: NEXT;
1.152 daniel 3930: } else if (RAW == '*') {
1.104 daniel 3931: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3932: NEXT;
1.152 daniel 3933: } else if (RAW == '+') {
1.104 daniel 3934: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3935: NEXT;
3936: } else {
1.104 daniel 3937: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3938: }
1.119 daniel 3939: xmlFree(elem);
1.101 daniel 3940: GROW;
1.62 daniel 3941: }
3942: SKIP_BLANKS;
1.91 daniel 3943: SHRINK;
1.152 daniel 3944: while (RAW != ')') {
1.63 daniel 3945: /*
3946: * Each loop we parse one separator and one element.
3947: */
1.152 daniel 3948: if (RAW == ',') {
1.62 daniel 3949: if (type == 0) type = CUR;
3950:
3951: /*
3952: * Detect "Name | Name , Name" error
3953: */
3954: else if (type != CUR) {
1.230 veillard 3955: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3956: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3957: ctxt->sax->error(ctxt->userData,
1.62 daniel 3958: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3959: type);
3960: ctxt->wellFormed = 0;
1.180 daniel 3961: ctxt->disableSAX = 1;
1.170 daniel 3962: if ((op != NULL) && (op != ret))
3963: xmlFreeElementContent(op);
1.211 veillard 3964: if ((last != NULL) && (last != ret) &&
3965: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3966: xmlFreeElementContent(last);
3967: if (ret != NULL)
3968: xmlFreeElementContent(ret);
1.62 daniel 3969: return(NULL);
3970: }
1.64 daniel 3971: NEXT;
1.62 daniel 3972:
1.63 daniel 3973: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3974: if (op == NULL) {
3975: xmlFreeElementContent(ret);
3976: return(NULL);
3977: }
3978: if (last == NULL) {
3979: op->c1 = ret;
1.65 daniel 3980: ret = cur = op;
1.63 daniel 3981: } else {
3982: cur->c2 = op;
3983: op->c1 = last;
3984: cur =op;
1.65 daniel 3985: last = NULL;
1.63 daniel 3986: }
1.152 daniel 3987: } else if (RAW == '|') {
1.62 daniel 3988: if (type == 0) type = CUR;
3989:
3990: /*
1.63 daniel 3991: * Detect "Name , Name | Name" error
1.62 daniel 3992: */
3993: else if (type != CUR) {
1.230 veillard 3994: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3995: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3996: ctxt->sax->error(ctxt->userData,
1.62 daniel 3997: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3998: type);
3999: ctxt->wellFormed = 0;
1.180 daniel 4000: ctxt->disableSAX = 1;
1.211 veillard 4001: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 4002: xmlFreeElementContent(op);
1.211 veillard 4003: if ((last != NULL) && (last != ret) &&
4004: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4005: xmlFreeElementContent(last);
4006: if (ret != NULL)
4007: xmlFreeElementContent(ret);
1.62 daniel 4008: return(NULL);
4009: }
1.64 daniel 4010: NEXT;
1.62 daniel 4011:
1.63 daniel 4012: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4013: if (op == NULL) {
1.170 daniel 4014: if ((op != NULL) && (op != ret))
4015: xmlFreeElementContent(op);
1.211 veillard 4016: if ((last != NULL) && (last != ret) &&
4017: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4018: xmlFreeElementContent(last);
4019: if (ret != NULL)
4020: xmlFreeElementContent(ret);
1.63 daniel 4021: return(NULL);
4022: }
4023: if (last == NULL) {
4024: op->c1 = ret;
1.65 daniel 4025: ret = cur = op;
1.63 daniel 4026: } else {
4027: cur->c2 = op;
4028: op->c1 = last;
4029: cur =op;
1.65 daniel 4030: last = NULL;
1.63 daniel 4031: }
1.62 daniel 4032: } else {
1.230 veillard 4033: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 4034: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4035: ctxt->sax->error(ctxt->userData,
1.62 daniel 4036: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4037: ctxt->wellFormed = 0;
1.180 daniel 4038: ctxt->disableSAX = 1;
1.170 daniel 4039: if ((op != NULL) && (op != ret))
4040: xmlFreeElementContent(op);
1.211 veillard 4041: if ((last != NULL) && (last != ret) &&
4042: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4043: xmlFreeElementContent(last);
4044: if (ret != NULL)
4045: xmlFreeElementContent(ret);
1.62 daniel 4046: return(NULL);
4047: }
1.101 daniel 4048: GROW;
1.62 daniel 4049: SKIP_BLANKS;
1.101 daniel 4050: GROW;
1.152 daniel 4051: if (RAW == '(') {
1.63 daniel 4052: /* Recurse on second child */
1.62 daniel 4053: NEXT;
4054: SKIP_BLANKS;
1.65 daniel 4055: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4056: SKIP_BLANKS;
4057: } else {
4058: elem = xmlParseName(ctxt);
4059: if (elem == NULL) {
1.230 veillard 4060: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4062: ctxt->sax->error(ctxt->userData,
1.122 daniel 4063: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.62 daniel 4064: ctxt->wellFormed = 0;
1.180 daniel 4065: ctxt->disableSAX = 1;
1.170 daniel 4066: if ((op != NULL) && (op != ret))
4067: xmlFreeElementContent(op);
1.211 veillard 4068: if ((last != NULL) && (last != ret) &&
4069: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4070: xmlFreeElementContent(last);
4071: if (ret != NULL)
4072: xmlFreeElementContent(ret);
1.62 daniel 4073: return(NULL);
4074: }
1.65 daniel 4075: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4076: xmlFree(elem);
1.152 daniel 4077: if (RAW == '?') {
1.105 daniel 4078: last->ocur = XML_ELEMENT_CONTENT_OPT;
4079: NEXT;
1.152 daniel 4080: } else if (RAW == '*') {
1.105 daniel 4081: last->ocur = XML_ELEMENT_CONTENT_MULT;
4082: NEXT;
1.152 daniel 4083: } else if (RAW == '+') {
1.105 daniel 4084: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4085: NEXT;
4086: } else {
4087: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4088: }
1.63 daniel 4089: }
4090: SKIP_BLANKS;
1.97 daniel 4091: GROW;
1.64 daniel 4092: }
1.65 daniel 4093: if ((cur != NULL) && (last != NULL)) {
4094: cur->c2 = last;
1.62 daniel 4095: }
1.187 daniel 4096: ctxt->entity = ctxt->input;
1.62 daniel 4097: NEXT;
1.152 daniel 4098: if (RAW == '?') {
1.62 daniel 4099: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4100: NEXT;
1.152 daniel 4101: } else if (RAW == '*') {
1.62 daniel 4102: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4103: NEXT;
1.152 daniel 4104: } else if (RAW == '+') {
1.62 daniel 4105: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4106: NEXT;
4107: }
4108: return(ret);
1.61 daniel 4109: }
4110:
4111: /**
4112: * xmlParseElementContentDecl:
4113: * @ctxt: an XML parser context
4114: * @name: the name of the element being defined.
4115: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4116: *
1.61 daniel 4117: * parse the declaration for an Element content either Mixed or Children,
4118: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4119: *
4120: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4121: *
1.61 daniel 4122: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4123: */
4124:
1.61 daniel 4125: int
1.123 daniel 4126: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4127: xmlElementContentPtr *result) {
4128:
4129: xmlElementContentPtr tree = NULL;
1.187 daniel 4130: xmlParserInputPtr input = ctxt->input;
1.61 daniel 4131: int res;
4132:
4133: *result = NULL;
4134:
1.152 daniel 4135: if (RAW != '(') {
1.230 veillard 4136: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4137: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4138: ctxt->sax->error(ctxt->userData,
1.61 daniel 4139: "xmlParseElementContentDecl : '(' expected\n");
4140: ctxt->wellFormed = 0;
1.180 daniel 4141: ctxt->disableSAX = 1;
1.61 daniel 4142: return(-1);
4143: }
4144: NEXT;
1.97 daniel 4145: GROW;
1.61 daniel 4146: SKIP_BLANKS;
1.152 daniel 4147: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4148: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4149: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4150: (NXT(6) == 'A')) {
1.62 daniel 4151: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4152: res = XML_ELEMENT_TYPE_MIXED;
4153: } else {
1.62 daniel 4154: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4155: res = XML_ELEMENT_TYPE_ELEMENT;
4156: }
1.187 daniel 4157: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
1.230 veillard 4158: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4159: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4160: ctxt->sax->error(ctxt->userData,
4161: "Element content declaration doesn't start and stop in the same entity\n");
4162: ctxt->wellFormed = 0;
4163: ctxt->disableSAX = 1;
4164: }
1.61 daniel 4165: SKIP_BLANKS;
1.63 daniel 4166: *result = tree;
1.61 daniel 4167: return(res);
1.22 daniel 4168: }
4169:
1.50 daniel 4170: /**
4171: * xmlParseElementDecl:
4172: * @ctxt: an XML parser context
4173: *
4174: * parse an Element declaration.
1.22 daniel 4175: *
4176: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4177: *
1.99 daniel 4178: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4179: * No element type may be declared more than once
1.69 daniel 4180: *
4181: * Returns the type of the element, or -1 in case of error
1.22 daniel 4182: */
1.59 daniel 4183: int
1.55 daniel 4184: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4185: xmlChar *name;
1.59 daniel 4186: int ret = -1;
1.61 daniel 4187: xmlElementContentPtr content = NULL;
1.22 daniel 4188:
1.97 daniel 4189: GROW;
1.152 daniel 4190: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4191: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4192: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4193: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4194: (NXT(8) == 'T')) {
1.187 daniel 4195: xmlParserInputPtr input = ctxt->input;
4196:
1.40 daniel 4197: SKIP(9);
1.59 daniel 4198: if (!IS_BLANK(CUR)) {
1.230 veillard 4199: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4200: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4201: ctxt->sax->error(ctxt->userData,
1.59 daniel 4202: "Space required after 'ELEMENT'\n");
4203: ctxt->wellFormed = 0;
1.180 daniel 4204: ctxt->disableSAX = 1;
1.59 daniel 4205: }
1.42 daniel 4206: SKIP_BLANKS;
1.22 daniel 4207: name = xmlParseName(ctxt);
4208: if (name == NULL) {
1.230 veillard 4209: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4210: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4211: ctxt->sax->error(ctxt->userData,
1.59 daniel 4212: "xmlParseElementDecl: no name for Element\n");
4213: ctxt->wellFormed = 0;
1.180 daniel 4214: ctxt->disableSAX = 1;
1.59 daniel 4215: return(-1);
4216: }
4217: if (!IS_BLANK(CUR)) {
1.230 veillard 4218: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4219: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4220: ctxt->sax->error(ctxt->userData,
1.59 daniel 4221: "Space required after the element name\n");
4222: ctxt->wellFormed = 0;
1.180 daniel 4223: ctxt->disableSAX = 1;
1.22 daniel 4224: }
1.42 daniel 4225: SKIP_BLANKS;
1.152 daniel 4226: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 4227: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4228: (NXT(4) == 'Y')) {
4229: SKIP(5);
1.22 daniel 4230: /*
4231: * Element must always be empty.
4232: */
1.59 daniel 4233: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 4234: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 4235: (NXT(2) == 'Y')) {
4236: SKIP(3);
1.22 daniel 4237: /*
4238: * Element is a generic container.
4239: */
1.59 daniel 4240: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 4241: } else if (RAW == '(') {
1.61 daniel 4242: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4243: } else {
1.98 daniel 4244: /*
4245: * [ WFC: PEs in Internal Subset ] error handling.
4246: */
1.152 daniel 4247: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 4248: (ctxt->inputNr == 1)) {
1.230 veillard 4249: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4250: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4251: ctxt->sax->error(ctxt->userData,
4252: "PEReference: forbidden within markup decl in internal subset\n");
4253: } else {
1.230 veillard 4254: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4256: ctxt->sax->error(ctxt->userData,
4257: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4258: }
1.61 daniel 4259: ctxt->wellFormed = 0;
1.180 daniel 4260: ctxt->disableSAX = 1;
1.119 daniel 4261: if (name != NULL) xmlFree(name);
1.61 daniel 4262: return(-1);
1.22 daniel 4263: }
1.142 daniel 4264:
4265: SKIP_BLANKS;
4266: /*
4267: * Pop-up of finished entities.
4268: */
1.152 daniel 4269: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 4270: xmlPopInput(ctxt);
1.42 daniel 4271: SKIP_BLANKS;
1.142 daniel 4272:
1.152 daniel 4273: if (RAW != '>') {
1.230 veillard 4274: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 4275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4276: ctxt->sax->error(ctxt->userData,
1.31 daniel 4277: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4278: ctxt->wellFormed = 0;
1.180 daniel 4279: ctxt->disableSAX = 1;
1.61 daniel 4280: } else {
1.187 daniel 4281: if (input != ctxt->input) {
1.230 veillard 4282: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4284: ctxt->sax->error(ctxt->userData,
4285: "Element declaration doesn't start and stop in the same entity\n");
4286: ctxt->wellFormed = 0;
4287: ctxt->disableSAX = 1;
4288: }
4289:
1.40 daniel 4290: NEXT;
1.171 daniel 4291: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4292: (ctxt->sax->elementDecl != NULL))
1.76 daniel 4293: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4294: content);
1.61 daniel 4295: }
1.84 daniel 4296: if (content != NULL) {
4297: xmlFreeElementContent(content);
4298: }
1.61 daniel 4299: if (name != NULL) {
1.119 daniel 4300: xmlFree(name);
1.61 daniel 4301: }
1.22 daniel 4302: }
1.59 daniel 4303: return(ret);
1.22 daniel 4304: }
4305:
1.50 daniel 4306: /**
4307: * xmlParseMarkupDecl:
4308: * @ctxt: an XML parser context
4309: *
4310: * parse Markup declarations
1.22 daniel 4311: *
4312: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4313: * NotationDecl | PI | Comment
4314: *
1.98 daniel 4315: * [ VC: Proper Declaration/PE Nesting ]
1.229 veillard 4316: * Parameter-entity replacement text must be properly nested with
1.98 daniel 4317: * markup declarations. That is to say, if either the first character
4318: * or the last character of a markup declaration (markupdecl above) is
4319: * contained in the replacement text for a parameter-entity reference,
4320: * both must be contained in the same replacement text.
4321: *
4322: * [ WFC: PEs in Internal Subset ]
4323: * In the internal DTD subset, parameter-entity references can occur
4324: * only where markup declarations can occur, not within markup declarations.
4325: * (This does not apply to references that occur in external parameter
4326: * entities or to the external subset.)
1.22 daniel 4327: */
1.55 daniel 4328: void
4329: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4330: GROW;
1.22 daniel 4331: xmlParseElementDecl(ctxt);
4332: xmlParseAttributeListDecl(ctxt);
4333: xmlParseEntityDecl(ctxt);
4334: xmlParseNotationDecl(ctxt);
4335: xmlParsePI(ctxt);
1.114 daniel 4336: xmlParseComment(ctxt);
1.98 daniel 4337: /*
4338: * This is only for internal subset. On external entities,
4339: * the replacement is done before parsing stage
4340: */
4341: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4342: xmlParsePEReference(ctxt);
1.97 daniel 4343: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4344: }
4345:
1.50 daniel 4346: /**
1.76 daniel 4347: * xmlParseTextDecl:
4348: * @ctxt: an XML parser context
4349: *
4350: * parse an XML declaration header for external entities
4351: *
4352: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 4353: *
4354: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 4355: */
4356:
1.172 daniel 4357: void
1.76 daniel 4358: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4359: xmlChar *version;
1.76 daniel 4360:
4361: /*
4362: * We know that '<?xml' is here.
4363: */
1.193 daniel 4364: if ((RAW == '<') && (NXT(1) == '?') &&
4365: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4366: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4367: SKIP(5);
4368: } else {
1.230 veillard 4369: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
1.193 daniel 4370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4371: ctxt->sax->error(ctxt->userData,
4372: "Text declaration '<?xml' required\n");
4373: ctxt->wellFormed = 0;
4374: ctxt->disableSAX = 1;
4375:
4376: return;
4377: }
1.76 daniel 4378:
4379: if (!IS_BLANK(CUR)) {
1.230 veillard 4380: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4382: ctxt->sax->error(ctxt->userData,
4383: "Space needed after '<?xml'\n");
1.76 daniel 4384: ctxt->wellFormed = 0;
1.180 daniel 4385: ctxt->disableSAX = 1;
1.76 daniel 4386: }
4387: SKIP_BLANKS;
4388:
4389: /*
4390: * We may have the VersionInfo here.
4391: */
4392: version = xmlParseVersionInfo(ctxt);
4393: if (version == NULL)
4394: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 4395: ctxt->input->version = version;
1.76 daniel 4396:
4397: /*
4398: * We must have the encoding declaration
4399: */
4400: if (!IS_BLANK(CUR)) {
1.230 veillard 4401: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4403: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.76 daniel 4404: ctxt->wellFormed = 0;
1.180 daniel 4405: ctxt->disableSAX = 1;
1.76 daniel 4406: }
1.195 daniel 4407: xmlParseEncodingDecl(ctxt);
1.193 daniel 4408: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4409: /*
4410: * The XML REC instructs us to stop parsing right here
4411: */
4412: return;
4413: }
1.76 daniel 4414:
4415: SKIP_BLANKS;
1.152 daniel 4416: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 4417: SKIP(2);
1.152 daniel 4418: } else if (RAW == '>') {
1.76 daniel 4419: /* Deprecated old WD ... */
1.230 veillard 4420: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4422: ctxt->sax->error(ctxt->userData,
4423: "XML declaration must end-up with '?>'\n");
1.76 daniel 4424: ctxt->wellFormed = 0;
1.180 daniel 4425: ctxt->disableSAX = 1;
1.76 daniel 4426: NEXT;
4427: } else {
1.230 veillard 4428: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4429: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4430: ctxt->sax->error(ctxt->userData,
4431: "parsing XML declaration: '?>' expected\n");
1.76 daniel 4432: ctxt->wellFormed = 0;
1.180 daniel 4433: ctxt->disableSAX = 1;
1.76 daniel 4434: MOVETO_ENDTAG(CUR_PTR);
4435: NEXT;
4436: }
4437: }
4438:
4439: /*
4440: * xmlParseConditionalSections
4441: * @ctxt: an XML parser context
4442: *
4443: * [61] conditionalSect ::= includeSect | ignoreSect
4444: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4445: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4446: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4447: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4448: */
4449:
4450: void
4451: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 4452: SKIP(3);
4453: SKIP_BLANKS;
1.168 daniel 4454: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4455: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4456: (NXT(6) == 'E')) {
1.165 daniel 4457: SKIP(7);
1.168 daniel 4458: SKIP_BLANKS;
4459: if (RAW != '[') {
1.230 veillard 4460: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4461: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4462: ctxt->sax->error(ctxt->userData,
4463: "XML conditional section '[' expected\n");
4464: ctxt->wellFormed = 0;
1.180 daniel 4465: ctxt->disableSAX = 1;
1.168 daniel 4466: } else {
4467: NEXT;
4468: }
1.220 veillard 4469: if (xmlParserDebugEntities) {
4470: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4471: xmlGenericError(xmlGenericErrorContext,
4472: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4473: ctxt->input->line);
1.241 veillard 4474: xmlGenericError(xmlGenericErrorContext,
4475: "Entering INCLUDE Conditional Section\n");
1.220 veillard 4476: }
4477:
1.165 daniel 4478: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4479: (NXT(2) != '>'))) {
4480: const xmlChar *check = CUR_PTR;
4481: int cons = ctxt->input->consumed;
4482: int tok = ctxt->token;
4483:
4484: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4485: xmlParseConditionalSections(ctxt);
4486: } else if (IS_BLANK(CUR)) {
4487: NEXT;
4488: } else if (RAW == '%') {
4489: xmlParsePEReference(ctxt);
4490: } else
4491: xmlParseMarkupDecl(ctxt);
4492:
4493: /*
4494: * Pop-up of finished entities.
4495: */
4496: while ((RAW == 0) && (ctxt->inputNr > 1))
4497: xmlPopInput(ctxt);
4498:
4499: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4500: (tok == ctxt->token)) {
1.230 veillard 4501: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.165 daniel 4502: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4503: ctxt->sax->error(ctxt->userData,
4504: "Content error in the external subset\n");
4505: ctxt->wellFormed = 0;
1.180 daniel 4506: ctxt->disableSAX = 1;
1.165 daniel 4507: break;
4508: }
4509: }
1.220 veillard 4510: if (xmlParserDebugEntities) {
4511: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4512: xmlGenericError(xmlGenericErrorContext,
4513: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4514: ctxt->input->line);
1.241 veillard 4515: xmlGenericError(xmlGenericErrorContext,
4516: "Leaving INCLUDE Conditional Section\n");
1.220 veillard 4517: }
4518:
1.168 daniel 4519: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4520: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 4521: int state;
1.245 veillard 4522: int instate;
4523: int depth = 0;
1.171 daniel 4524:
1.168 daniel 4525: SKIP(6);
4526: SKIP_BLANKS;
4527: if (RAW != '[') {
1.230 veillard 4528: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4530: ctxt->sax->error(ctxt->userData,
4531: "XML conditional section '[' expected\n");
4532: ctxt->wellFormed = 0;
1.180 daniel 4533: ctxt->disableSAX = 1;
1.168 daniel 4534: } else {
4535: NEXT;
4536: }
1.220 veillard 4537: if (xmlParserDebugEntities) {
4538: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4539: xmlGenericError(xmlGenericErrorContext,
4540: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4541: ctxt->input->line);
1.241 veillard 4542: xmlGenericError(xmlGenericErrorContext,
4543: "Entering IGNORE Conditional Section\n");
1.220 veillard 4544: }
1.171 daniel 4545:
1.143 daniel 4546: /*
1.171 daniel 4547: * Parse up to the end of the conditionnal section
4548: * But disable SAX event generating DTD building in the meantime
1.143 daniel 4549: */
1.171 daniel 4550: state = ctxt->disableSAX;
1.245 veillard 4551: instate = ctxt->instate;
1.220 veillard 4552: ctxt->disableSAX = 1;
1.245 veillard 4553: ctxt->instate = XML_PARSER_IGNORE;
1.171 daniel 4554:
1.245 veillard 4555: while (depth >= 0) {
4556: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4557: depth++;
4558: SKIP(3);
4559: continue;
4560: }
4561: if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4562: if (--depth >= 0) SKIP(3);
4563: continue;
4564: }
4565: NEXT;
4566: continue;
4567: }
1.171 daniel 4568:
1.245 veillard 4569: ctxt->disableSAX = state;
4570: ctxt->instate = instate;
1.143 daniel 4571:
1.220 veillard 4572: if (xmlParserDebugEntities) {
4573: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4574: xmlGenericError(xmlGenericErrorContext,
4575: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4576: ctxt->input->line);
1.241 veillard 4577: xmlGenericError(xmlGenericErrorContext,
4578: "Leaving IGNORE Conditional Section\n");
1.220 veillard 4579: }
4580:
1.168 daniel 4581: } else {
1.230 veillard 4582: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4583: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584: ctxt->sax->error(ctxt->userData,
4585: "XML conditional section INCLUDE or IGNORE keyword expected\n");
4586: ctxt->wellFormed = 0;
1.180 daniel 4587: ctxt->disableSAX = 1;
1.143 daniel 4588: }
4589:
1.152 daniel 4590: if (RAW == 0)
1.143 daniel 4591: SHRINK;
4592:
1.152 daniel 4593: if (RAW == 0) {
1.230 veillard 4594: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4596: ctxt->sax->error(ctxt->userData,
4597: "XML conditional section not closed\n");
4598: ctxt->wellFormed = 0;
1.180 daniel 4599: ctxt->disableSAX = 1;
1.143 daniel 4600: } else {
4601: SKIP(3);
1.76 daniel 4602: }
4603: }
4604:
4605: /**
1.124 daniel 4606: * xmlParseExternalSubset:
1.76 daniel 4607: * @ctxt: an XML parser context
1.124 daniel 4608: * @ExternalID: the external identifier
4609: * @SystemID: the system identifier (or URL)
1.76 daniel 4610: *
4611: * parse Markup declarations from an external subset
4612: *
4613: * [30] extSubset ::= textDecl? extSubsetDecl
4614: *
4615: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4616: */
4617: void
1.123 daniel 4618: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4619: const xmlChar *SystemID) {
1.132 daniel 4620: GROW;
1.152 daniel 4621: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 4622: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4623: (NXT(4) == 'l')) {
1.172 daniel 4624: xmlParseTextDecl(ctxt);
1.193 daniel 4625: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4626: /*
4627: * The XML REC instructs us to stop parsing right here
4628: */
4629: ctxt->instate = XML_PARSER_EOF;
4630: return;
4631: }
1.76 daniel 4632: }
1.79 daniel 4633: if (ctxt->myDoc == NULL) {
1.116 daniel 4634: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4635: }
4636: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4637: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4638:
1.96 daniel 4639: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4640: ctxt->external = 1;
1.152 daniel 4641: while (((RAW == '<') && (NXT(1) == '?')) ||
4642: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 4643: IS_BLANK(CUR)) {
1.123 daniel 4644: const xmlChar *check = CUR_PTR;
1.115 daniel 4645: int cons = ctxt->input->consumed;
1.164 daniel 4646: int tok = ctxt->token;
1.115 daniel 4647:
1.221 veillard 4648: GROW;
1.152 daniel 4649: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 4650: xmlParseConditionalSections(ctxt);
4651: } else if (IS_BLANK(CUR)) {
4652: NEXT;
1.152 daniel 4653: } else if (RAW == '%') {
1.76 daniel 4654: xmlParsePEReference(ctxt);
4655: } else
4656: xmlParseMarkupDecl(ctxt);
1.77 daniel 4657:
4658: /*
4659: * Pop-up of finished entities.
4660: */
1.166 daniel 4661: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 4662: xmlPopInput(ctxt);
4663:
1.164 daniel 4664: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4665: (tok == ctxt->token)) {
1.230 veillard 4666: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4668: ctxt->sax->error(ctxt->userData,
4669: "Content error in the external subset\n");
4670: ctxt->wellFormed = 0;
1.180 daniel 4671: ctxt->disableSAX = 1;
1.115 daniel 4672: break;
4673: }
1.76 daniel 4674: }
4675:
1.152 daniel 4676: if (RAW != 0) {
1.230 veillard 4677: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4679: ctxt->sax->error(ctxt->userData,
4680: "Extra content at the end of the document\n");
4681: ctxt->wellFormed = 0;
1.180 daniel 4682: ctxt->disableSAX = 1;
1.76 daniel 4683: }
4684:
4685: }
4686:
4687: /**
1.77 daniel 4688: * xmlParseReference:
4689: * @ctxt: an XML parser context
4690: *
4691: * parse and handle entity references in content, depending on the SAX
4692: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4693: * CharRef, a predefined entity, if there is no reference() callback.
4694: * or if the parser was asked to switch to that mode.
1.77 daniel 4695: *
4696: * [67] Reference ::= EntityRef | CharRef
4697: */
4698: void
4699: xmlParseReference(xmlParserCtxtPtr ctxt) {
4700: xmlEntityPtr ent;
1.123 daniel 4701: xmlChar *val;
1.152 daniel 4702: if (RAW != '&') return;
1.77 daniel 4703:
4704: if (NXT(1) == '#') {
1.152 daniel 4705: int i = 0;
1.153 daniel 4706: xmlChar out[10];
4707: int hex = NXT(2);
1.77 daniel 4708: int val = xmlParseCharRef(ctxt);
1.152 daniel 4709:
1.198 daniel 4710: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 4711: /*
4712: * So we are using non-UTF-8 buffers
4713: * Check that the char fit on 8bits, if not
4714: * generate a CharRef.
4715: */
4716: if (val <= 0xFF) {
4717: out[0] = val;
4718: out[1] = 0;
1.171 daniel 4719: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4720: (!ctxt->disableSAX))
1.153 daniel 4721: ctxt->sax->characters(ctxt->userData, out, 1);
4722: } else {
4723: if ((hex == 'x') || (hex == 'X'))
4724: sprintf((char *)out, "#x%X", val);
4725: else
4726: sprintf((char *)out, "#%d", val);
1.171 daniel 4727: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4728: (!ctxt->disableSAX))
1.153 daniel 4729: ctxt->sax->reference(ctxt->userData, out);
4730: }
4731: } else {
4732: /*
4733: * Just encode the value in UTF-8
4734: */
4735: COPY_BUF(0 ,out, i, val);
4736: out[i] = 0;
1.171 daniel 4737: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4738: (!ctxt->disableSAX))
1.153 daniel 4739: ctxt->sax->characters(ctxt->userData, out, i);
4740: }
1.77 daniel 4741: } else {
4742: ent = xmlParseEntityRef(ctxt);
4743: if (ent == NULL) return;
4744: if ((ent->name != NULL) &&
1.159 daniel 4745: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 4746: xmlNodePtr list = NULL;
4747: int ret;
4748:
4749:
4750: /*
4751: * The first reference to the entity trigger a parsing phase
4752: * where the ent->children is filled with the result from
4753: * the parsing.
4754: */
4755: if (ent->children == NULL) {
4756: xmlChar *value;
4757: value = ent->content;
4758:
4759: /*
4760: * Check that this entity is well formed
4761: */
4762: if ((value != NULL) &&
4763: (value[1] == 0) && (value[0] == '<') &&
1.236 veillard 4764: (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
1.180 daniel 4765: /*
1.222 veillard 4766: * DONE: get definite answer on this !!!
1.180 daniel 4767: * Lots of entity decls are used to declare a single
4768: * char
4769: * <!ENTITY lt "<">
4770: * Which seems to be valid since
4771: * 2.4: The ampersand character (&) and the left angle
4772: * bracket (<) may appear in their literal form only
4773: * when used ... They are also legal within the literal
4774: * entity value of an internal entity declaration;i
4775: * see "4.3.2 Well-Formed Parsed Entities".
4776: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4777: * Looking at the OASIS test suite and James Clark
4778: * tests, this is broken. However the XML REC uses
4779: * it. Is the XML REC not well-formed ????
4780: * This is a hack to avoid this problem
1.222 veillard 4781: *
4782: * ANSWER: since lt gt amp .. are already defined,
4783: * this is a redefinition and hence the fact that the
4784: * contentis not well balanced is not a Wf error, this
4785: * is lousy but acceptable.
1.180 daniel 4786: */
4787: list = xmlNewDocText(ctxt->myDoc, value);
4788: if (list != NULL) {
4789: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4790: (ent->children == NULL)) {
4791: ent->children = list;
4792: ent->last = list;
4793: list->parent = (xmlNodePtr) ent;
4794: } else {
4795: xmlFreeNodeList(list);
4796: }
4797: } else if (list != NULL) {
4798: xmlFreeNodeList(list);
4799: }
1.181 daniel 4800: } else {
1.180 daniel 4801: /*
4802: * 4.3.2: An internal general parsed entity is well-formed
4803: * if its replacement text matches the production labeled
4804: * content.
4805: */
1.185 daniel 4806: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4807: ctxt->depth++;
1.180 daniel 4808: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 4809: ctxt->sax, NULL, ctxt->depth,
4810: value, &list);
4811: ctxt->depth--;
4812: } else if (ent->etype ==
4813: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4814: ctxt->depth++;
1.180 daniel 4815: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 4816: ctxt->sax, NULL, ctxt->depth,
1.228 veillard 4817: ent->URI, ent->ExternalID, &list);
1.185 daniel 4818: ctxt->depth--;
4819: } else {
1.180 daniel 4820: ret = -1;
4821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4822: ctxt->sax->error(ctxt->userData,
4823: "Internal: invalid entity type\n");
4824: }
1.185 daniel 4825: if (ret == XML_ERR_ENTITY_LOOP) {
1.230 veillard 4826: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 4827: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4828: ctxt->sax->error(ctxt->userData,
4829: "Detected entity reference loop\n");
4830: ctxt->wellFormed = 0;
4831: ctxt->disableSAX = 1;
4832: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 4833: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4834: (ent->children == NULL)) {
4835: ent->children = list;
4836: while (list != NULL) {
4837: list->parent = (xmlNodePtr) ent;
4838: if (list->next == NULL)
4839: ent->last = list;
4840: list = list->next;
4841: }
4842: } else {
4843: xmlFreeNodeList(list);
4844: }
4845: } else if (ret > 0) {
1.230 veillard 4846: ctxt->errNo = ret;
1.180 daniel 4847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4848: ctxt->sax->error(ctxt->userData,
4849: "Entity value required\n");
4850: ctxt->wellFormed = 0;
4851: ctxt->disableSAX = 1;
4852: } else if (list != NULL) {
4853: xmlFreeNodeList(list);
4854: }
4855: }
4856: }
1.113 daniel 4857: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 4858: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 4859: /*
4860: * Create a node.
4861: */
4862: ctxt->sax->reference(ctxt->userData, ent->name);
4863: return;
4864: } else if (ctxt->replaceEntities) {
1.222 veillard 4865: if ((ctxt->node != NULL) && (ent->children != NULL)) {
4866: /*
4867: * Seems we are generating the DOM content, do
4868: * a simple tree copy
4869: */
4870: xmlNodePtr new;
4871: new = xmlCopyNodeList(ent->children);
4872:
4873: xmlAddChildList(ctxt->node, new);
4874: /*
4875: * This is to avoid a nasty side effect, see
4876: * characters() in SAX.c
4877: */
4878: ctxt->nodemem = 0;
4879: ctxt->nodelen = 0;
4880: return;
4881: } else {
4882: /*
4883: * Probably running in SAX mode
4884: */
4885: xmlParserInputPtr input;
1.79 daniel 4886:
1.222 veillard 4887: input = xmlNewEntityInputStream(ctxt, ent);
4888: xmlPushInput(ctxt, input);
4889: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4890: (RAW == '<') && (NXT(1) == '?') &&
4891: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4892: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4893: xmlParseTextDecl(ctxt);
4894: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4895: /*
4896: * The XML REC instructs us to stop parsing right here
4897: */
4898: ctxt->instate = XML_PARSER_EOF;
4899: return;
4900: }
4901: if (input->standalone == 1) {
1.230 veillard 4902: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
1.222 veillard 4903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4904: ctxt->sax->error(ctxt->userData,
4905: "external parsed entities cannot be standalone\n");
4906: ctxt->wellFormed = 0;
4907: ctxt->disableSAX = 1;
4908: }
1.167 daniel 4909: }
1.222 veillard 4910: return;
1.167 daniel 4911: }
1.113 daniel 4912: }
1.222 veillard 4913: } else {
4914: val = ent->content;
4915: if (val == NULL) return;
4916: /*
4917: * inline the entity.
4918: */
4919: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4920: (!ctxt->disableSAX))
4921: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
1.77 daniel 4922: }
4923: }
1.24 daniel 4924: }
4925:
1.50 daniel 4926: /**
4927: * xmlParseEntityRef:
4928: * @ctxt: an XML parser context
4929: *
4930: * parse ENTITY references declarations
1.24 daniel 4931: *
4932: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4933: *
1.98 daniel 4934: * [ WFC: Entity Declared ]
4935: * In a document without any DTD, a document with only an internal DTD
4936: * subset which contains no parameter entity references, or a document
4937: * with "standalone='yes'", the Name given in the entity reference
4938: * must match that in an entity declaration, except that well-formed
4939: * documents need not declare any of the following entities: amp, lt,
4940: * gt, apos, quot. The declaration of a parameter entity must precede
4941: * any reference to it. Similarly, the declaration of a general entity
4942: * must precede any reference to it which appears in a default value in an
4943: * attribute-list declaration. Note that if entities are declared in the
4944: * external subset or in external parameter entities, a non-validating
4945: * processor is not obligated to read and process their declarations;
4946: * for such documents, the rule that an entity must be declared is a
4947: * well-formedness constraint only if standalone='yes'.
4948: *
4949: * [ WFC: Parsed Entity ]
4950: * An entity reference must not contain the name of an unparsed entity
4951: *
1.77 daniel 4952: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4953: */
1.77 daniel 4954: xmlEntityPtr
1.55 daniel 4955: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 4956: xmlChar *name;
1.72 daniel 4957: xmlEntityPtr ent = NULL;
1.24 daniel 4958:
1.91 daniel 4959: GROW;
1.111 daniel 4960:
1.152 daniel 4961: if (RAW == '&') {
1.40 daniel 4962: NEXT;
1.24 daniel 4963: name = xmlParseName(ctxt);
4964: if (name == NULL) {
1.230 veillard 4965: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4966: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4967: ctxt->sax->error(ctxt->userData,
4968: "xmlParseEntityRef: no name\n");
1.59 daniel 4969: ctxt->wellFormed = 0;
1.180 daniel 4970: ctxt->disableSAX = 1;
1.24 daniel 4971: } else {
1.152 daniel 4972: if (RAW == ';') {
1.40 daniel 4973: NEXT;
1.24 daniel 4974: /*
1.77 daniel 4975: * Ask first SAX for entity resolution, otherwise try the
4976: * predefined set.
4977: */
4978: if (ctxt->sax != NULL) {
4979: if (ctxt->sax->getEntity != NULL)
4980: ent = ctxt->sax->getEntity(ctxt->userData, name);
4981: if (ent == NULL)
4982: ent = xmlGetPredefinedEntity(name);
4983: }
4984: /*
1.98 daniel 4985: * [ WFC: Entity Declared ]
4986: * In a document without any DTD, a document with only an
4987: * internal DTD subset which contains no parameter entity
4988: * references, or a document with "standalone='yes'", the
4989: * Name given in the entity reference must match that in an
4990: * entity declaration, except that well-formed documents
4991: * need not declare any of the following entities: amp, lt,
4992: * gt, apos, quot.
4993: * The declaration of a parameter entity must precede any
4994: * reference to it.
4995: * Similarly, the declaration of a general entity must
4996: * precede any reference to it which appears in a default
4997: * value in an attribute-list declaration. Note that if
4998: * entities are declared in the external subset or in
4999: * external parameter entities, a non-validating processor
5000: * is not obligated to read and process their declarations;
5001: * for such documents, the rule that an entity must be
5002: * declared is a well-formedness constraint only if
5003: * standalone='yes'.
1.59 daniel 5004: */
1.77 daniel 5005: if (ent == NULL) {
1.98 daniel 5006: if ((ctxt->standalone == 1) ||
5007: ((ctxt->hasExternalSubset == 0) &&
5008: (ctxt->hasPErefs == 0))) {
1.230 veillard 5009: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 5010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5011: ctxt->sax->error(ctxt->userData,
5012: "Entity '%s' not defined\n", name);
5013: ctxt->wellFormed = 0;
1.180 daniel 5014: ctxt->disableSAX = 1;
1.77 daniel 5015: } else {
1.230 veillard 5016: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.98 daniel 5017: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5018: ctxt->sax->warning(ctxt->userData,
5019: "Entity '%s' not defined\n", name);
1.59 daniel 5020: }
1.77 daniel 5021: }
1.59 daniel 5022:
5023: /*
1.98 daniel 5024: * [ WFC: Parsed Entity ]
5025: * An entity reference must not contain the name of an
5026: * unparsed entity
5027: */
1.159 daniel 5028: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5029: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5031: ctxt->sax->error(ctxt->userData,
5032: "Entity reference to unparsed entity %s\n", name);
5033: ctxt->wellFormed = 0;
1.180 daniel 5034: ctxt->disableSAX = 1;
1.98 daniel 5035: }
5036:
5037: /*
5038: * [ WFC: No External Entity References ]
5039: * Attribute values cannot contain direct or indirect
5040: * entity references to external entities.
5041: */
5042: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5043: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5044: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5046: ctxt->sax->error(ctxt->userData,
5047: "Attribute references external entity '%s'\n", name);
5048: ctxt->wellFormed = 0;
1.180 daniel 5049: ctxt->disableSAX = 1;
1.98 daniel 5050: }
5051: /*
5052: * [ WFC: No < in Attribute Values ]
5053: * The replacement text of any entity referred to directly or
5054: * indirectly in an attribute value (other than "<") must
5055: * not contain a <.
1.59 daniel 5056: */
1.98 daniel 5057: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5058: (ent != NULL) &&
1.236 veillard 5059: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5060: (ent->content != NULL) &&
5061: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5062: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5064: ctxt->sax->error(ctxt->userData,
5065: "'<' in entity '%s' is not allowed in attributes values\n", name);
5066: ctxt->wellFormed = 0;
1.180 daniel 5067: ctxt->disableSAX = 1;
1.98 daniel 5068: }
5069:
5070: /*
5071: * Internal check, no parameter entities here ...
5072: */
5073: else {
1.159 daniel 5074: switch (ent->etype) {
1.59 daniel 5075: case XML_INTERNAL_PARAMETER_ENTITY:
5076: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5077: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5078: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5079: ctxt->sax->error(ctxt->userData,
1.59 daniel 5080: "Attempt to reference the parameter entity '%s'\n", name);
5081: ctxt->wellFormed = 0;
1.180 daniel 5082: ctxt->disableSAX = 1;
5083: break;
5084: default:
1.59 daniel 5085: break;
5086: }
5087: }
5088:
5089: /*
1.98 daniel 5090: * [ WFC: No Recursion ]
1.229 veillard 5091: * A parsed entity must not contain a recursive reference
1.117 daniel 5092: * to itself, either directly or indirectly.
1.229 veillard 5093: * Done somewhere else
1.59 daniel 5094: */
1.77 daniel 5095:
1.24 daniel 5096: } else {
1.230 veillard 5097: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5099: ctxt->sax->error(ctxt->userData,
1.59 daniel 5100: "xmlParseEntityRef: expecting ';'\n");
5101: ctxt->wellFormed = 0;
1.180 daniel 5102: ctxt->disableSAX = 1;
1.24 daniel 5103: }
1.119 daniel 5104: xmlFree(name);
1.24 daniel 5105: }
5106: }
1.77 daniel 5107: return(ent);
1.24 daniel 5108: }
1.229 veillard 5109:
1.135 daniel 5110: /**
5111: * xmlParseStringEntityRef:
5112: * @ctxt: an XML parser context
5113: * @str: a pointer to an index in the string
5114: *
5115: * parse ENTITY references declarations, but this version parses it from
5116: * a string value.
5117: *
5118: * [68] EntityRef ::= '&' Name ';'
5119: *
5120: * [ WFC: Entity Declared ]
5121: * In a document without any DTD, a document with only an internal DTD
5122: * subset which contains no parameter entity references, or a document
5123: * with "standalone='yes'", the Name given in the entity reference
5124: * must match that in an entity declaration, except that well-formed
5125: * documents need not declare any of the following entities: amp, lt,
5126: * gt, apos, quot. The declaration of a parameter entity must precede
5127: * any reference to it. Similarly, the declaration of a general entity
5128: * must precede any reference to it which appears in a default value in an
5129: * attribute-list declaration. Note that if entities are declared in the
5130: * external subset or in external parameter entities, a non-validating
5131: * processor is not obligated to read and process their declarations;
5132: * for such documents, the rule that an entity must be declared is a
5133: * well-formedness constraint only if standalone='yes'.
5134: *
5135: * [ WFC: Parsed Entity ]
5136: * An entity reference must not contain the name of an unparsed entity
5137: *
5138: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5139: * is updated to the current location in the string.
5140: */
5141: xmlEntityPtr
5142: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5143: xmlChar *name;
5144: const xmlChar *ptr;
5145: xmlChar cur;
5146: xmlEntityPtr ent = NULL;
5147:
1.156 daniel 5148: if ((str == NULL) || (*str == NULL))
5149: return(NULL);
1.135 daniel 5150: ptr = *str;
5151: cur = *ptr;
5152: if (cur == '&') {
5153: ptr++;
5154: cur = *ptr;
5155: name = xmlParseStringName(ctxt, &ptr);
5156: if (name == NULL) {
1.230 veillard 5157: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5158: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5159: ctxt->sax->error(ctxt->userData,
5160: "xmlParseEntityRef: no name\n");
5161: ctxt->wellFormed = 0;
1.180 daniel 5162: ctxt->disableSAX = 1;
1.135 daniel 5163: } else {
1.185 daniel 5164: if (*ptr == ';') {
5165: ptr++;
1.135 daniel 5166: /*
5167: * Ask first SAX for entity resolution, otherwise try the
5168: * predefined set.
5169: */
5170: if (ctxt->sax != NULL) {
5171: if (ctxt->sax->getEntity != NULL)
5172: ent = ctxt->sax->getEntity(ctxt->userData, name);
5173: if (ent == NULL)
5174: ent = xmlGetPredefinedEntity(name);
5175: }
5176: /*
5177: * [ WFC: Entity Declared ]
5178: * In a document without any DTD, a document with only an
5179: * internal DTD subset which contains no parameter entity
5180: * references, or a document with "standalone='yes'", the
5181: * Name given in the entity reference must match that in an
5182: * entity declaration, except that well-formed documents
5183: * need not declare any of the following entities: amp, lt,
5184: * gt, apos, quot.
5185: * The declaration of a parameter entity must precede any
5186: * reference to it.
5187: * Similarly, the declaration of a general entity must
5188: * precede any reference to it which appears in a default
5189: * value in an attribute-list declaration. Note that if
5190: * entities are declared in the external subset or in
5191: * external parameter entities, a non-validating processor
5192: * is not obligated to read and process their declarations;
5193: * for such documents, the rule that an entity must be
5194: * declared is a well-formedness constraint only if
5195: * standalone='yes'.
5196: */
5197: if (ent == NULL) {
5198: if ((ctxt->standalone == 1) ||
5199: ((ctxt->hasExternalSubset == 0) &&
5200: (ctxt->hasPErefs == 0))) {
1.230 veillard 5201: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5202: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5203: ctxt->sax->error(ctxt->userData,
5204: "Entity '%s' not defined\n", name);
5205: ctxt->wellFormed = 0;
1.180 daniel 5206: ctxt->disableSAX = 1;
1.135 daniel 5207: } else {
1.230 veillard 5208: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.135 daniel 5209: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5210: ctxt->sax->warning(ctxt->userData,
5211: "Entity '%s' not defined\n", name);
5212: }
5213: }
5214:
5215: /*
5216: * [ WFC: Parsed Entity ]
5217: * An entity reference must not contain the name of an
5218: * unparsed entity
5219: */
1.159 daniel 5220: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5221: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.135 daniel 5222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5223: ctxt->sax->error(ctxt->userData,
5224: "Entity reference to unparsed entity %s\n", name);
5225: ctxt->wellFormed = 0;
1.180 daniel 5226: ctxt->disableSAX = 1;
1.135 daniel 5227: }
5228:
5229: /*
5230: * [ WFC: No External Entity References ]
5231: * Attribute values cannot contain direct or indirect
5232: * entity references to external entities.
5233: */
5234: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5235: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5236: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.135 daniel 5237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5238: ctxt->sax->error(ctxt->userData,
5239: "Attribute references external entity '%s'\n", name);
5240: ctxt->wellFormed = 0;
1.180 daniel 5241: ctxt->disableSAX = 1;
1.135 daniel 5242: }
5243: /*
5244: * [ WFC: No < in Attribute Values ]
5245: * The replacement text of any entity referred to directly or
5246: * indirectly in an attribute value (other than "<") must
5247: * not contain a <.
5248: */
5249: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5250: (ent != NULL) &&
1.236 veillard 5251: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.135 daniel 5252: (ent->content != NULL) &&
5253: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5254: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.135 daniel 5255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256: ctxt->sax->error(ctxt->userData,
5257: "'<' in entity '%s' is not allowed in attributes values\n", name);
5258: ctxt->wellFormed = 0;
1.180 daniel 5259: ctxt->disableSAX = 1;
1.135 daniel 5260: }
5261:
5262: /*
5263: * Internal check, no parameter entities here ...
5264: */
5265: else {
1.159 daniel 5266: switch (ent->etype) {
1.135 daniel 5267: case XML_INTERNAL_PARAMETER_ENTITY:
5268: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5269: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.135 daniel 5270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5271: ctxt->sax->error(ctxt->userData,
5272: "Attempt to reference the parameter entity '%s'\n", name);
5273: ctxt->wellFormed = 0;
1.180 daniel 5274: ctxt->disableSAX = 1;
5275: break;
5276: default:
1.135 daniel 5277: break;
5278: }
5279: }
5280:
5281: /*
5282: * [ WFC: No Recursion ]
1.229 veillard 5283: * A parsed entity must not contain a recursive reference
1.135 daniel 5284: * to itself, either directly or indirectly.
1.229 veillard 5285: * Done somewhwere else
1.135 daniel 5286: */
5287:
5288: } else {
1.230 veillard 5289: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5291: ctxt->sax->error(ctxt->userData,
5292: "xmlParseEntityRef: expecting ';'\n");
5293: ctxt->wellFormed = 0;
1.180 daniel 5294: ctxt->disableSAX = 1;
1.135 daniel 5295: }
5296: xmlFree(name);
5297: }
5298: }
1.185 daniel 5299: *str = ptr;
1.135 daniel 5300: return(ent);
5301: }
1.24 daniel 5302:
1.50 daniel 5303: /**
5304: * xmlParsePEReference:
5305: * @ctxt: an XML parser context
5306: *
5307: * parse PEReference declarations
1.77 daniel 5308: * The entity content is handled directly by pushing it's content as
5309: * a new input stream.
1.22 daniel 5310: *
5311: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5312: *
1.98 daniel 5313: * [ WFC: No Recursion ]
1.229 veillard 5314: * A parsed entity must not contain a recursive
1.98 daniel 5315: * reference to itself, either directly or indirectly.
5316: *
5317: * [ WFC: Entity Declared ]
5318: * In a document without any DTD, a document with only an internal DTD
5319: * subset which contains no parameter entity references, or a document
5320: * with "standalone='yes'", ... ... The declaration of a parameter
5321: * entity must precede any reference to it...
5322: *
5323: * [ VC: Entity Declared ]
5324: * In a document with an external subset or external parameter entities
5325: * with "standalone='no'", ... ... The declaration of a parameter entity
5326: * must precede any reference to it...
5327: *
5328: * [ WFC: In DTD ]
5329: * Parameter-entity references may only appear in the DTD.
5330: * NOTE: misleading but this is handled.
1.22 daniel 5331: */
1.77 daniel 5332: void
1.55 daniel 5333: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5334: xmlChar *name;
1.72 daniel 5335: xmlEntityPtr entity = NULL;
1.50 daniel 5336: xmlParserInputPtr input;
1.22 daniel 5337:
1.152 daniel 5338: if (RAW == '%') {
1.40 daniel 5339: NEXT;
1.22 daniel 5340: name = xmlParseName(ctxt);
5341: if (name == NULL) {
1.230 veillard 5342: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5344: ctxt->sax->error(ctxt->userData,
5345: "xmlParsePEReference: no name\n");
1.59 daniel 5346: ctxt->wellFormed = 0;
1.180 daniel 5347: ctxt->disableSAX = 1;
1.22 daniel 5348: } else {
1.152 daniel 5349: if (RAW == ';') {
1.40 daniel 5350: NEXT;
1.98 daniel 5351: if ((ctxt->sax != NULL) &&
5352: (ctxt->sax->getParameterEntity != NULL))
5353: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5354: name);
1.45 daniel 5355: if (entity == NULL) {
1.98 daniel 5356: /*
5357: * [ WFC: Entity Declared ]
5358: * In a document without any DTD, a document with only an
5359: * internal DTD subset which contains no parameter entity
5360: * references, or a document with "standalone='yes'", ...
5361: * ... The declaration of a parameter entity must precede
5362: * any reference to it...
5363: */
5364: if ((ctxt->standalone == 1) ||
5365: ((ctxt->hasExternalSubset == 0) &&
5366: (ctxt->hasPErefs == 0))) {
1.230 veillard 5367: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.220 veillard 5368: if ((!ctxt->disableSAX) &&
5369: (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5370: ctxt->sax->error(ctxt->userData,
5371: "PEReference: %%%s; not found\n", name);
5372: ctxt->wellFormed = 0;
1.180 daniel 5373: ctxt->disableSAX = 1;
1.98 daniel 5374: } else {
5375: /*
5376: * [ VC: Entity Declared ]
5377: * In a document with an external subset or external
5378: * parameter entities with "standalone='no'", ...
5379: * ... The declaration of a parameter entity must precede
5380: * any reference to it...
5381: */
1.220 veillard 5382: if ((!ctxt->disableSAX) &&
5383: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 5384: ctxt->sax->warning(ctxt->userData,
5385: "PEReference: %%%s; not found\n", name);
5386: ctxt->valid = 0;
5387: }
1.50 daniel 5388: } else {
1.98 daniel 5389: /*
5390: * Internal checking in case the entity quest barfed
5391: */
1.159 daniel 5392: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5393: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5394: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5395: ctxt->sax->warning(ctxt->userData,
5396: "Internal: %%%s; is not a parameter entity\n", name);
5397: } else {
1.164 daniel 5398: /*
5399: * TODO !!!
5400: * handle the extra spaces added before and after
5401: * c.f. http://www.w3.org/TR/REC-xml#as-PE
5402: */
1.98 daniel 5403: input = xmlNewEntityInputStream(ctxt, entity);
5404: xmlPushInput(ctxt, input);
1.164 daniel 5405: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5406: (RAW == '<') && (NXT(1) == '?') &&
5407: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5408: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5409: xmlParseTextDecl(ctxt);
1.193 daniel 5410: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5411: /*
5412: * The XML REC instructs us to stop parsing
5413: * right here
5414: */
5415: ctxt->instate = XML_PARSER_EOF;
5416: xmlFree(name);
5417: return;
5418: }
1.164 daniel 5419: }
5420: if (ctxt->token == 0)
5421: ctxt->token = ' ';
1.98 daniel 5422: }
1.45 daniel 5423: }
1.98 daniel 5424: ctxt->hasPErefs = 1;
1.22 daniel 5425: } else {
1.230 veillard 5426: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5428: ctxt->sax->error(ctxt->userData,
1.59 daniel 5429: "xmlParsePEReference: expecting ';'\n");
5430: ctxt->wellFormed = 0;
1.180 daniel 5431: ctxt->disableSAX = 1;
1.22 daniel 5432: }
1.119 daniel 5433: xmlFree(name);
1.3 veillard 5434: }
5435: }
5436: }
5437:
1.50 daniel 5438: /**
1.135 daniel 5439: * xmlParseStringPEReference:
5440: * @ctxt: an XML parser context
5441: * @str: a pointer to an index in the string
5442: *
5443: * parse PEReference declarations
5444: *
5445: * [69] PEReference ::= '%' Name ';'
5446: *
5447: * [ WFC: No Recursion ]
1.229 veillard 5448: * A parsed entity must not contain a recursive
1.135 daniel 5449: * reference to itself, either directly or indirectly.
5450: *
5451: * [ WFC: Entity Declared ]
5452: * In a document without any DTD, a document with only an internal DTD
5453: * subset which contains no parameter entity references, or a document
5454: * with "standalone='yes'", ... ... The declaration of a parameter
5455: * entity must precede any reference to it...
5456: *
5457: * [ VC: Entity Declared ]
5458: * In a document with an external subset or external parameter entities
5459: * with "standalone='no'", ... ... The declaration of a parameter entity
5460: * must precede any reference to it...
5461: *
5462: * [ WFC: In DTD ]
5463: * Parameter-entity references may only appear in the DTD.
5464: * NOTE: misleading but this is handled.
5465: *
5466: * Returns the string of the entity content.
5467: * str is updated to the current value of the index
5468: */
5469: xmlEntityPtr
5470: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5471: const xmlChar *ptr;
5472: xmlChar cur;
5473: xmlChar *name;
5474: xmlEntityPtr entity = NULL;
5475:
5476: if ((str == NULL) || (*str == NULL)) return(NULL);
5477: ptr = *str;
5478: cur = *ptr;
5479: if (cur == '%') {
5480: ptr++;
5481: cur = *ptr;
5482: name = xmlParseStringName(ctxt, &ptr);
5483: if (name == NULL) {
1.230 veillard 5484: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5486: ctxt->sax->error(ctxt->userData,
5487: "xmlParseStringPEReference: no name\n");
5488: ctxt->wellFormed = 0;
1.180 daniel 5489: ctxt->disableSAX = 1;
1.135 daniel 5490: } else {
5491: cur = *ptr;
5492: if (cur == ';') {
5493: ptr++;
5494: cur = *ptr;
5495: if ((ctxt->sax != NULL) &&
5496: (ctxt->sax->getParameterEntity != NULL))
5497: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5498: name);
5499: if (entity == NULL) {
5500: /*
5501: * [ WFC: Entity Declared ]
5502: * In a document without any DTD, a document with only an
5503: * internal DTD subset which contains no parameter entity
5504: * references, or a document with "standalone='yes'", ...
5505: * ... The declaration of a parameter entity must precede
5506: * any reference to it...
5507: */
5508: if ((ctxt->standalone == 1) ||
5509: ((ctxt->hasExternalSubset == 0) &&
5510: (ctxt->hasPErefs == 0))) {
1.230 veillard 5511: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5513: ctxt->sax->error(ctxt->userData,
5514: "PEReference: %%%s; not found\n", name);
5515: ctxt->wellFormed = 0;
1.180 daniel 5516: ctxt->disableSAX = 1;
1.135 daniel 5517: } else {
5518: /*
5519: * [ VC: Entity Declared ]
5520: * In a document with an external subset or external
5521: * parameter entities with "standalone='no'", ...
5522: * ... The declaration of a parameter entity must
5523: * precede any reference to it...
5524: */
5525: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5526: ctxt->sax->warning(ctxt->userData,
5527: "PEReference: %%%s; not found\n", name);
5528: ctxt->valid = 0;
5529: }
5530: } else {
5531: /*
5532: * Internal checking in case the entity quest barfed
5533: */
1.159 daniel 5534: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5535: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 5536: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5537: ctxt->sax->warning(ctxt->userData,
5538: "Internal: %%%s; is not a parameter entity\n", name);
5539: }
5540: }
5541: ctxt->hasPErefs = 1;
5542: } else {
1.230 veillard 5543: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5544: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5545: ctxt->sax->error(ctxt->userData,
5546: "xmlParseStringPEReference: expecting ';'\n");
5547: ctxt->wellFormed = 0;
1.180 daniel 5548: ctxt->disableSAX = 1;
1.135 daniel 5549: }
5550: xmlFree(name);
5551: }
5552: }
5553: *str = ptr;
5554: return(entity);
5555: }
5556:
5557: /**
1.181 daniel 5558: * xmlParseDocTypeDecl:
1.50 daniel 5559: * @ctxt: an XML parser context
5560: *
5561: * parse a DOCTYPE declaration
1.21 daniel 5562: *
1.22 daniel 5563: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5564: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 5565: *
5566: * [ VC: Root Element Type ]
1.99 daniel 5567: * The Name in the document type declaration must match the element
1.98 daniel 5568: * type of the root element.
1.21 daniel 5569: */
5570:
1.55 daniel 5571: void
5572: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 5573: xmlChar *name = NULL;
1.123 daniel 5574: xmlChar *ExternalID = NULL;
5575: xmlChar *URI = NULL;
1.21 daniel 5576:
5577: /*
5578: * We know that '<!DOCTYPE' has been detected.
5579: */
1.40 daniel 5580: SKIP(9);
1.21 daniel 5581:
1.42 daniel 5582: SKIP_BLANKS;
1.21 daniel 5583:
5584: /*
5585: * Parse the DOCTYPE name.
5586: */
5587: name = xmlParseName(ctxt);
5588: if (name == NULL) {
1.230 veillard 5589: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5590: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5591: ctxt->sax->error(ctxt->userData,
5592: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 5593: ctxt->wellFormed = 0;
1.180 daniel 5594: ctxt->disableSAX = 1;
1.21 daniel 5595: }
1.165 daniel 5596: ctxt->intSubName = name;
1.21 daniel 5597:
1.42 daniel 5598: SKIP_BLANKS;
1.21 daniel 5599:
5600: /*
1.22 daniel 5601: * Check for SystemID and ExternalID
5602: */
1.67 daniel 5603: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 5604:
5605: if ((URI != NULL) || (ExternalID != NULL)) {
5606: ctxt->hasExternalSubset = 1;
5607: }
1.165 daniel 5608: ctxt->extSubURI = URI;
5609: ctxt->extSubSystem = ExternalID;
1.98 daniel 5610:
1.42 daniel 5611: SKIP_BLANKS;
1.36 daniel 5612:
1.76 daniel 5613: /*
1.165 daniel 5614: * Create and update the internal subset.
1.76 daniel 5615: */
1.171 daniel 5616: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5617: (!ctxt->disableSAX))
1.74 daniel 5618: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 5619:
5620: /*
1.140 daniel 5621: * Is there any internal subset declarations ?
5622: * they are handled separately in xmlParseInternalSubset()
5623: */
1.152 daniel 5624: if (RAW == '[')
1.140 daniel 5625: return;
5626:
5627: /*
5628: * We should be at the end of the DOCTYPE declaration.
5629: */
1.152 daniel 5630: if (RAW != '>') {
1.230 veillard 5631: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.140 daniel 5632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5633: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5634: ctxt->wellFormed = 0;
1.180 daniel 5635: ctxt->disableSAX = 1;
1.140 daniel 5636: }
5637: NEXT;
5638: }
5639:
5640: /**
1.181 daniel 5641: * xmlParseInternalsubset:
1.140 daniel 5642: * @ctxt: an XML parser context
5643: *
5644: * parse the internal subset declaration
5645: *
5646: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5647: */
5648:
5649: void
5650: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5651: /*
1.22 daniel 5652: * Is there any DTD definition ?
5653: */
1.152 daniel 5654: if (RAW == '[') {
1.96 daniel 5655: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 5656: NEXT;
1.22 daniel 5657: /*
5658: * Parse the succession of Markup declarations and
5659: * PEReferences.
5660: * Subsequence (markupdecl | PEReference | S)*
5661: */
1.152 daniel 5662: while (RAW != ']') {
1.123 daniel 5663: const xmlChar *check = CUR_PTR;
1.115 daniel 5664: int cons = ctxt->input->consumed;
1.22 daniel 5665:
1.42 daniel 5666: SKIP_BLANKS;
1.22 daniel 5667: xmlParseMarkupDecl(ctxt);
1.50 daniel 5668: xmlParsePEReference(ctxt);
1.22 daniel 5669:
1.115 daniel 5670: /*
5671: * Pop-up of finished entities.
5672: */
1.152 daniel 5673: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 5674: xmlPopInput(ctxt);
5675:
1.118 daniel 5676: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.230 veillard 5677: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5679: ctxt->sax->error(ctxt->userData,
1.140 daniel 5680: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 5681: ctxt->wellFormed = 0;
1.180 daniel 5682: ctxt->disableSAX = 1;
1.22 daniel 5683: break;
5684: }
5685: }
1.209 veillard 5686: if (RAW == ']') {
5687: NEXT;
5688: SKIP_BLANKS;
5689: }
1.22 daniel 5690: }
5691:
5692: /*
5693: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 5694: */
1.152 daniel 5695: if (RAW != '>') {
1.230 veillard 5696: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.55 daniel 5697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5698: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 5699: ctxt->wellFormed = 0;
1.180 daniel 5700: ctxt->disableSAX = 1;
1.21 daniel 5701: }
1.40 daniel 5702: NEXT;
1.21 daniel 5703: }
5704:
1.50 daniel 5705: /**
5706: * xmlParseAttribute:
5707: * @ctxt: an XML parser context
1.123 daniel 5708: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 5709: *
5710: * parse an attribute
1.3 veillard 5711: *
1.22 daniel 5712: * [41] Attribute ::= Name Eq AttValue
5713: *
1.98 daniel 5714: * [ WFC: No External Entity References ]
5715: * Attribute values cannot contain direct or indirect entity references
5716: * to external entities.
5717: *
5718: * [ WFC: No < in Attribute Values ]
5719: * The replacement text of any entity referred to directly or indirectly in
5720: * an attribute value (other than "<") must not contain a <.
5721: *
5722: * [ VC: Attribute Value Type ]
1.117 daniel 5723: * The attribute must have been declared; the value must be of the type
1.99 daniel 5724: * declared for it.
1.98 daniel 5725: *
1.22 daniel 5726: * [25] Eq ::= S? '=' S?
5727: *
1.29 daniel 5728: * With namespace:
5729: *
5730: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 5731: *
5732: * Also the case QName == xmlns:??? is handled independently as a namespace
5733: * definition.
1.69 daniel 5734: *
1.72 daniel 5735: * Returns the attribute name, and the value in *value.
1.3 veillard 5736: */
5737:
1.123 daniel 5738: xmlChar *
5739: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5740: xmlChar *name, *val;
1.3 veillard 5741:
1.72 daniel 5742: *value = NULL;
5743: name = xmlParseName(ctxt);
1.22 daniel 5744: if (name == NULL) {
1.230 veillard 5745: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5747: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 5748: ctxt->wellFormed = 0;
1.180 daniel 5749: ctxt->disableSAX = 1;
1.52 daniel 5750: return(NULL);
1.3 veillard 5751: }
5752:
5753: /*
1.29 daniel 5754: * read the value
1.3 veillard 5755: */
1.42 daniel 5756: SKIP_BLANKS;
1.152 daniel 5757: if (RAW == '=') {
1.40 daniel 5758: NEXT;
1.42 daniel 5759: SKIP_BLANKS;
1.72 daniel 5760: val = xmlParseAttValue(ctxt);
1.96 daniel 5761: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 5762: } else {
1.230 veillard 5763: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.55 daniel 5764: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5765: ctxt->sax->error(ctxt->userData,
1.59 daniel 5766: "Specification mandate value for attribute %s\n", name);
5767: ctxt->wellFormed = 0;
1.180 daniel 5768: ctxt->disableSAX = 1;
1.170 daniel 5769: xmlFree(name);
1.52 daniel 5770: return(NULL);
1.43 daniel 5771: }
5772:
1.172 daniel 5773: /*
5774: * Check that xml:lang conforms to the specification
1.222 veillard 5775: * No more registered as an error, just generate a warning now
5776: * since this was deprecated in XML second edition
1.172 daniel 5777: */
1.236 veillard 5778: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
1.172 daniel 5779: if (!xmlCheckLanguageID(val)) {
1.222 veillard 5780: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5781: ctxt->sax->warning(ctxt->userData,
5782: "Malformed value for xml:lang : %s\n", val);
1.172 daniel 5783: }
5784: }
5785:
1.176 daniel 5786: /*
5787: * Check that xml:space conforms to the specification
5788: */
1.236 veillard 5789: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5790: if (xmlStrEqual(val, BAD_CAST "default"))
1.176 daniel 5791: *(ctxt->space) = 0;
1.236 veillard 5792: else if (xmlStrEqual(val, BAD_CAST "preserve"))
1.176 daniel 5793: *(ctxt->space) = 1;
5794: else {
1.230 veillard 5795: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.176 daniel 5796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797: ctxt->sax->error(ctxt->userData,
5798: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5799: val);
5800: ctxt->wellFormed = 0;
1.180 daniel 5801: ctxt->disableSAX = 1;
1.176 daniel 5802: }
5803: }
5804:
1.72 daniel 5805: *value = val;
5806: return(name);
1.3 veillard 5807: }
5808:
1.50 daniel 5809: /**
5810: * xmlParseStartTag:
5811: * @ctxt: an XML parser context
5812: *
5813: * parse a start of tag either for rule element or
5814: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 5815: *
5816: * [40] STag ::= '<' Name (S Attribute)* S? '>'
5817: *
1.98 daniel 5818: * [ WFC: Unique Att Spec ]
5819: * No attribute name may appear more than once in the same start-tag or
5820: * empty-element tag.
5821: *
1.29 daniel 5822: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5823: *
1.98 daniel 5824: * [ WFC: Unique Att Spec ]
5825: * No attribute name may appear more than once in the same start-tag or
5826: * empty-element tag.
5827: *
1.29 daniel 5828: * With namespace:
5829: *
5830: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5831: *
5832: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 5833: *
1.192 daniel 5834: * Returns the element name parsed
1.2 veillard 5835: */
5836:
1.123 daniel 5837: xmlChar *
1.69 daniel 5838: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5839: xmlChar *name;
5840: xmlChar *attname;
5841: xmlChar *attvalue;
5842: const xmlChar **atts = NULL;
1.72 daniel 5843: int nbatts = 0;
5844: int maxatts = 0;
5845: int i;
1.2 veillard 5846:
1.152 daniel 5847: if (RAW != '<') return(NULL);
1.40 daniel 5848: NEXT;
1.3 veillard 5849:
1.72 daniel 5850: name = xmlParseName(ctxt);
1.59 daniel 5851: if (name == NULL) {
1.230 veillard 5852: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5854: ctxt->sax->error(ctxt->userData,
1.59 daniel 5855: "xmlParseStartTag: invalid element name\n");
5856: ctxt->wellFormed = 0;
1.180 daniel 5857: ctxt->disableSAX = 1;
1.83 daniel 5858: return(NULL);
1.50 daniel 5859: }
5860:
5861: /*
1.3 veillard 5862: * Now parse the attributes, it ends up with the ending
5863: *
5864: * (S Attribute)* S?
5865: */
1.42 daniel 5866: SKIP_BLANKS;
1.91 daniel 5867: GROW;
1.168 daniel 5868:
1.153 daniel 5869: while ((IS_CHAR(RAW)) &&
1.152 daniel 5870: (RAW != '>') &&
5871: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 5872: const xmlChar *q = CUR_PTR;
1.91 daniel 5873: int cons = ctxt->input->consumed;
1.29 daniel 5874:
1.72 daniel 5875: attname = xmlParseAttribute(ctxt, &attvalue);
5876: if ((attname != NULL) && (attvalue != NULL)) {
5877: /*
1.98 daniel 5878: * [ WFC: Unique Att Spec ]
5879: * No attribute name may appear more than once in the same
5880: * start-tag or empty-element tag.
1.72 daniel 5881: */
5882: for (i = 0; i < nbatts;i += 2) {
1.236 veillard 5883: if (xmlStrEqual(atts[i], attname)) {
1.230 veillard 5884: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.72 daniel 5885: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5886: ctxt->sax->error(ctxt->userData,
5887: "Attribute %s redefined\n",
5888: attname);
1.72 daniel 5889: ctxt->wellFormed = 0;
1.180 daniel 5890: ctxt->disableSAX = 1;
1.119 daniel 5891: xmlFree(attname);
5892: xmlFree(attvalue);
1.98 daniel 5893: goto failed;
1.72 daniel 5894: }
5895: }
5896:
5897: /*
5898: * Add the pair to atts
5899: */
5900: if (atts == NULL) {
5901: maxatts = 10;
1.123 daniel 5902: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5903: if (atts == NULL) {
1.241 veillard 5904: xmlGenericError(xmlGenericErrorContext,
5905: "malloc of %ld byte failed\n",
1.123 daniel 5906: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5907: return(NULL);
1.72 daniel 5908: }
1.127 daniel 5909: } else if (nbatts + 4 > maxatts) {
1.72 daniel 5910: maxatts *= 2;
1.233 veillard 5911: atts = (const xmlChar **) xmlRealloc((void *) atts,
5912: maxatts * sizeof(xmlChar *));
1.72 daniel 5913: if (atts == NULL) {
1.241 veillard 5914: xmlGenericError(xmlGenericErrorContext,
5915: "realloc of %ld byte failed\n",
1.123 daniel 5916: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5917: return(NULL);
1.72 daniel 5918: }
5919: }
5920: atts[nbatts++] = attname;
5921: atts[nbatts++] = attvalue;
5922: atts[nbatts] = NULL;
5923: atts[nbatts + 1] = NULL;
1.176 daniel 5924: } else {
5925: if (attname != NULL)
5926: xmlFree(attname);
5927: if (attvalue != NULL)
5928: xmlFree(attvalue);
1.72 daniel 5929: }
5930:
1.116 daniel 5931: failed:
1.168 daniel 5932:
5933: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5934: break;
5935: if (!IS_BLANK(RAW)) {
1.230 veillard 5936: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.168 daniel 5937: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5938: ctxt->sax->error(ctxt->userData,
5939: "attributes construct error\n");
5940: ctxt->wellFormed = 0;
1.180 daniel 5941: ctxt->disableSAX = 1;
1.168 daniel 5942: }
1.42 daniel 5943: SKIP_BLANKS;
1.91 daniel 5944: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.230 veillard 5945: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5946: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5947: ctxt->sax->error(ctxt->userData,
1.31 daniel 5948: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 5949: ctxt->wellFormed = 0;
1.180 daniel 5950: ctxt->disableSAX = 1;
1.29 daniel 5951: break;
1.3 veillard 5952: }
1.91 daniel 5953: GROW;
1.3 veillard 5954: }
5955:
1.43 daniel 5956: /*
1.72 daniel 5957: * SAX: Start of Element !
1.43 daniel 5958: */
1.171 daniel 5959: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5960: (!ctxt->disableSAX))
1.74 daniel 5961: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5962:
1.72 daniel 5963: if (atts != NULL) {
1.123 daniel 5964: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.233 veillard 5965: xmlFree((void *) atts);
1.72 daniel 5966: }
1.83 daniel 5967: return(name);
1.3 veillard 5968: }
5969:
1.50 daniel 5970: /**
5971: * xmlParseEndTag:
5972: * @ctxt: an XML parser context
5973: *
5974: * parse an end of tag
1.27 daniel 5975: *
5976: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5977: *
5978: * With namespace
5979: *
1.72 daniel 5980: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5981: */
5982:
1.55 daniel 5983: void
1.140 daniel 5984: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5985: xmlChar *name;
1.140 daniel 5986: xmlChar *oldname;
1.7 veillard 5987:
1.91 daniel 5988: GROW;
1.152 daniel 5989: if ((RAW != '<') || (NXT(1) != '/')) {
1.230 veillard 5990: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.55 daniel 5991: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5992: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5993: ctxt->wellFormed = 0;
1.180 daniel 5994: ctxt->disableSAX = 1;
1.27 daniel 5995: return;
5996: }
1.40 daniel 5997: SKIP(2);
1.7 veillard 5998:
1.72 daniel 5999: name = xmlParseName(ctxt);
1.7 veillard 6000:
6001: /*
6002: * We should definitely be at the ending "S? '>'" part
6003: */
1.91 daniel 6004: GROW;
1.42 daniel 6005: SKIP_BLANKS;
1.153 daniel 6006: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.230 veillard 6007: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6008: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6009: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 6010: ctxt->wellFormed = 0;
1.180 daniel 6011: ctxt->disableSAX = 1;
1.7 veillard 6012: } else
1.40 daniel 6013: NEXT;
1.7 veillard 6014:
1.72 daniel 6015: /*
1.98 daniel 6016: * [ WFC: Element Type Match ]
6017: * The Name in an element's end-tag must match the element type in the
6018: * start-tag.
6019: *
1.83 daniel 6020: */
1.147 daniel 6021: if ((name == NULL) || (ctxt->name == NULL) ||
1.236 veillard 6022: (!xmlStrEqual(name, ctxt->name))) {
1.230 veillard 6023: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.147 daniel 6024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6025: if ((name != NULL) && (ctxt->name != NULL)) {
6026: ctxt->sax->error(ctxt->userData,
6027: "Opening and ending tag mismatch: %s and %s\n",
6028: ctxt->name, name);
6029: } else if (ctxt->name != NULL) {
6030: ctxt->sax->error(ctxt->userData,
6031: "Ending tag eror for: %s\n", ctxt->name);
6032: } else {
6033: ctxt->sax->error(ctxt->userData,
6034: "Ending tag error: internal error ???\n");
6035: }
1.122 daniel 6036:
1.147 daniel 6037: }
1.83 daniel 6038: ctxt->wellFormed = 0;
1.180 daniel 6039: ctxt->disableSAX = 1;
1.83 daniel 6040: }
6041:
6042: /*
1.72 daniel 6043: * SAX: End of Tag
6044: */
1.171 daniel 6045: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6046: (!ctxt->disableSAX))
1.74 daniel 6047: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6048:
6049: if (name != NULL)
1.119 daniel 6050: xmlFree(name);
1.140 daniel 6051: oldname = namePop(ctxt);
1.176 daniel 6052: spacePop(ctxt);
1.140 daniel 6053: if (oldname != NULL) {
6054: #ifdef DEBUG_STACK
1.241 veillard 6055: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6056: #endif
6057: xmlFree(oldname);
6058: }
1.7 veillard 6059: return;
6060: }
6061:
1.50 daniel 6062: /**
6063: * xmlParseCDSect:
6064: * @ctxt: an XML parser context
6065: *
6066: * Parse escaped pure raw content.
1.29 daniel 6067: *
6068: * [18] CDSect ::= CDStart CData CDEnd
6069: *
6070: * [19] CDStart ::= '<![CDATA['
6071: *
6072: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6073: *
6074: * [21] CDEnd ::= ']]>'
1.3 veillard 6075: */
1.55 daniel 6076: void
6077: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6078: xmlChar *buf = NULL;
6079: int len = 0;
1.140 daniel 6080: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6081: int r, rl;
6082: int s, sl;
6083: int cur, l;
1.234 veillard 6084: int count = 0;
1.3 veillard 6085:
1.106 daniel 6086: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6087: (NXT(2) == '[') && (NXT(3) == 'C') &&
6088: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6089: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6090: (NXT(8) == '[')) {
6091: SKIP(9);
1.29 daniel 6092: } else
1.45 daniel 6093: return;
1.109 daniel 6094:
6095: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6096: r = CUR_CHAR(rl);
6097: if (!IS_CHAR(r)) {
1.230 veillard 6098: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6100: ctxt->sax->error(ctxt->userData,
1.135 daniel 6101: "CData section not finished\n");
1.59 daniel 6102: ctxt->wellFormed = 0;
1.180 daniel 6103: ctxt->disableSAX = 1;
1.109 daniel 6104: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6105: return;
1.3 veillard 6106: }
1.152 daniel 6107: NEXTL(rl);
6108: s = CUR_CHAR(sl);
6109: if (!IS_CHAR(s)) {
1.230 veillard 6110: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6111: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6112: ctxt->sax->error(ctxt->userData,
1.135 daniel 6113: "CData section not finished\n");
1.59 daniel 6114: ctxt->wellFormed = 0;
1.180 daniel 6115: ctxt->disableSAX = 1;
1.109 daniel 6116: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6117: return;
1.3 veillard 6118: }
1.152 daniel 6119: NEXTL(sl);
6120: cur = CUR_CHAR(l);
1.135 daniel 6121: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6122: if (buf == NULL) {
1.241 veillard 6123: xmlGenericError(xmlGenericErrorContext,
6124: "malloc of %d byte failed\n", size);
1.135 daniel 6125: return;
6126: }
1.108 veillard 6127: while (IS_CHAR(cur) &&
1.110 daniel 6128: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6129: if (len + 5 >= size) {
1.135 daniel 6130: size *= 2;
1.204 veillard 6131: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6132: if (buf == NULL) {
1.241 veillard 6133: xmlGenericError(xmlGenericErrorContext,
6134: "realloc of %d byte failed\n", size);
1.135 daniel 6135: return;
6136: }
6137: }
1.152 daniel 6138: COPY_BUF(rl,buf,len,r);
1.110 daniel 6139: r = s;
1.152 daniel 6140: rl = sl;
1.110 daniel 6141: s = cur;
1.152 daniel 6142: sl = l;
1.234 veillard 6143: count++;
6144: if (count > 50) {
6145: GROW;
6146: count = 0;
6147: }
1.152 daniel 6148: NEXTL(l);
6149: cur = CUR_CHAR(l);
1.3 veillard 6150: }
1.135 daniel 6151: buf[len] = 0;
1.109 daniel 6152: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6153: if (cur != '>') {
1.230 veillard 6154: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6155: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6156: ctxt->sax->error(ctxt->userData,
1.135 daniel 6157: "CData section not finished\n%.50s\n", buf);
1.59 daniel 6158: ctxt->wellFormed = 0;
1.180 daniel 6159: ctxt->disableSAX = 1;
1.135 daniel 6160: xmlFree(buf);
1.45 daniel 6161: return;
1.3 veillard 6162: }
1.152 daniel 6163: NEXTL(l);
1.16 daniel 6164:
1.45 daniel 6165: /*
1.135 daniel 6166: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6167: */
1.171 daniel 6168: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 6169: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6170: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6171: }
1.135 daniel 6172: xmlFree(buf);
1.2 veillard 6173: }
6174:
1.50 daniel 6175: /**
6176: * xmlParseContent:
6177: * @ctxt: an XML parser context
6178: *
6179: * Parse a content:
1.2 veillard 6180: *
1.27 daniel 6181: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6182: */
6183:
1.55 daniel 6184: void
6185: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6186: GROW;
1.176 daniel 6187: while (((RAW != 0) || (ctxt->token != 0)) &&
6188: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 6189: const xmlChar *test = CUR_PTR;
1.91 daniel 6190: int cons = ctxt->input->consumed;
1.123 daniel 6191: xmlChar tok = ctxt->token;
1.27 daniel 6192:
6193: /*
1.152 daniel 6194: * Handle possible processed charrefs.
6195: */
6196: if (ctxt->token != 0) {
6197: xmlParseCharData(ctxt, 0);
6198: }
6199: /*
1.27 daniel 6200: * First case : a Processing Instruction.
6201: */
1.152 daniel 6202: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6203: xmlParsePI(ctxt);
6204: }
1.72 daniel 6205:
1.27 daniel 6206: /*
6207: * Second case : a CDSection
6208: */
1.152 daniel 6209: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6210: (NXT(2) == '[') && (NXT(3) == 'C') &&
6211: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6212: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6213: (NXT(8) == '[')) {
1.45 daniel 6214: xmlParseCDSect(ctxt);
1.27 daniel 6215: }
1.72 daniel 6216:
1.27 daniel 6217: /*
6218: * Third case : a comment
6219: */
1.152 daniel 6220: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6221: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6222: xmlParseComment(ctxt);
1.97 daniel 6223: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6224: }
1.72 daniel 6225:
1.27 daniel 6226: /*
6227: * Fourth case : a sub-element.
6228: */
1.152 daniel 6229: else if (RAW == '<') {
1.72 daniel 6230: xmlParseElement(ctxt);
1.45 daniel 6231: }
1.72 daniel 6232:
1.45 daniel 6233: /*
1.50 daniel 6234: * Fifth case : a reference. If if has not been resolved,
6235: * parsing returns it's Name, create the node
1.45 daniel 6236: */
1.97 daniel 6237:
1.152 daniel 6238: else if (RAW == '&') {
1.77 daniel 6239: xmlParseReference(ctxt);
1.27 daniel 6240: }
1.72 daniel 6241:
1.27 daniel 6242: /*
6243: * Last case, text. Note that References are handled directly.
6244: */
6245: else {
1.45 daniel 6246: xmlParseCharData(ctxt, 0);
1.3 veillard 6247: }
1.14 veillard 6248:
1.91 daniel 6249: GROW;
1.14 veillard 6250: /*
1.45 daniel 6251: * Pop-up of finished entities.
1.14 veillard 6252: */
1.152 daniel 6253: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6254: xmlPopInput(ctxt);
1.135 daniel 6255: SHRINK;
1.45 daniel 6256:
1.113 daniel 6257: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6258: (tok == ctxt->token)) {
1.230 veillard 6259: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6261: ctxt->sax->error(ctxt->userData,
1.59 daniel 6262: "detected an error in element content\n");
6263: ctxt->wellFormed = 0;
1.180 daniel 6264: ctxt->disableSAX = 1;
1.224 veillard 6265: ctxt->instate = XML_PARSER_EOF;
1.29 daniel 6266: break;
6267: }
1.3 veillard 6268: }
1.2 veillard 6269: }
6270:
1.50 daniel 6271: /**
6272: * xmlParseElement:
6273: * @ctxt: an XML parser context
6274: *
6275: * parse an XML element, this is highly recursive
1.26 daniel 6276: *
6277: * [39] element ::= EmptyElemTag | STag content ETag
6278: *
1.98 daniel 6279: * [ WFC: Element Type Match ]
6280: * The Name in an element's end-tag must match the element type in the
6281: * start-tag.
6282: *
6283: * [ VC: Element Valid ]
1.117 daniel 6284: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6285: * where the Name matches the element type and one of the following holds:
6286: * - The declaration matches EMPTY and the element has no content.
6287: * - The declaration matches children and the sequence of child elements
6288: * belongs to the language generated by the regular expression in the
6289: * content model, with optional white space (characters matching the
6290: * nonterminal S) between each pair of child elements.
6291: * - The declaration matches Mixed and the content consists of character
6292: * data and child elements whose types match names in the content model.
6293: * - The declaration matches ANY, and the types of any child elements have
6294: * been declared.
1.2 veillard 6295: */
1.26 daniel 6296:
1.72 daniel 6297: void
1.69 daniel 6298: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6299: const xmlChar *openTag = CUR_PTR;
6300: xmlChar *name;
1.140 daniel 6301: xmlChar *oldname;
1.32 daniel 6302: xmlParserNodeInfo node_info;
1.118 daniel 6303: xmlNodePtr ret;
1.2 veillard 6304:
1.32 daniel 6305: /* Capture start position */
1.118 daniel 6306: if (ctxt->record_info) {
6307: node_info.begin_pos = ctxt->input->consumed +
6308: (CUR_PTR - ctxt->input->base);
6309: node_info.begin_line = ctxt->input->line;
6310: }
1.32 daniel 6311:
1.176 daniel 6312: if (ctxt->spaceNr == 0)
6313: spacePush(ctxt, -1);
6314: else
6315: spacePush(ctxt, *ctxt->space);
6316:
1.83 daniel 6317: name = xmlParseStartTag(ctxt);
6318: if (name == NULL) {
1.176 daniel 6319: spacePop(ctxt);
1.83 daniel 6320: return;
6321: }
1.140 daniel 6322: namePush(ctxt, name);
1.118 daniel 6323: ret = ctxt->node;
1.2 veillard 6324:
6325: /*
1.99 daniel 6326: * [ VC: Root Element Type ]
6327: * The Name in the document type declaration must match the element
6328: * type of the root element.
6329: */
1.105 daniel 6330: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6331: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6332: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6333:
6334: /*
1.2 veillard 6335: * Check for an Empty Element.
6336: */
1.152 daniel 6337: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6338: SKIP(2);
1.171 daniel 6339: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6340: (!ctxt->disableSAX))
1.83 daniel 6341: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6342: oldname = namePop(ctxt);
1.176 daniel 6343: spacePop(ctxt);
1.140 daniel 6344: if (oldname != NULL) {
6345: #ifdef DEBUG_STACK
1.241 veillard 6346: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6347: #endif
6348: xmlFree(oldname);
1.211 veillard 6349: }
6350: if ( ret != NULL && ctxt->record_info ) {
6351: node_info.end_pos = ctxt->input->consumed +
6352: (CUR_PTR - ctxt->input->base);
6353: node_info.end_line = ctxt->input->line;
6354: node_info.node = ret;
6355: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 6356: }
1.72 daniel 6357: return;
1.2 veillard 6358: }
1.152 daniel 6359: if (RAW == '>') {
1.91 daniel 6360: NEXT;
6361: } else {
1.230 veillard 6362: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6364: ctxt->sax->error(ctxt->userData,
6365: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6366: openTag);
1.59 daniel 6367: ctxt->wellFormed = 0;
1.180 daniel 6368: ctxt->disableSAX = 1;
1.45 daniel 6369:
6370: /*
6371: * end of parsing of this node.
6372: */
6373: nodePop(ctxt);
1.140 daniel 6374: oldname = namePop(ctxt);
1.176 daniel 6375: spacePop(ctxt);
1.140 daniel 6376: if (oldname != NULL) {
6377: #ifdef DEBUG_STACK
1.241 veillard 6378: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6379: #endif
6380: xmlFree(oldname);
6381: }
1.118 daniel 6382:
6383: /*
6384: * Capture end position and add node
6385: */
6386: if ( ret != NULL && ctxt->record_info ) {
6387: node_info.end_pos = ctxt->input->consumed +
6388: (CUR_PTR - ctxt->input->base);
6389: node_info.end_line = ctxt->input->line;
6390: node_info.node = ret;
6391: xmlParserAddNodeInfo(ctxt, &node_info);
6392: }
1.72 daniel 6393: return;
1.2 veillard 6394: }
6395:
6396: /*
6397: * Parse the content of the element:
6398: */
1.45 daniel 6399: xmlParseContent(ctxt);
1.153 daniel 6400: if (!IS_CHAR(RAW)) {
1.230 veillard 6401: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.55 daniel 6402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6403: ctxt->sax->error(ctxt->userData,
1.57 daniel 6404: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6405: ctxt->wellFormed = 0;
1.180 daniel 6406: ctxt->disableSAX = 1;
1.45 daniel 6407:
6408: /*
6409: * end of parsing of this node.
6410: */
6411: nodePop(ctxt);
1.140 daniel 6412: oldname = namePop(ctxt);
1.176 daniel 6413: spacePop(ctxt);
1.140 daniel 6414: if (oldname != NULL) {
6415: #ifdef DEBUG_STACK
1.241 veillard 6416: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6417: #endif
6418: xmlFree(oldname);
6419: }
1.72 daniel 6420: return;
1.2 veillard 6421: }
6422:
6423: /*
1.27 daniel 6424: * parse the end of tag: '</' should be here.
1.2 veillard 6425: */
1.140 daniel 6426: xmlParseEndTag(ctxt);
1.118 daniel 6427:
6428: /*
6429: * Capture end position and add node
6430: */
6431: if ( ret != NULL && ctxt->record_info ) {
6432: node_info.end_pos = ctxt->input->consumed +
6433: (CUR_PTR - ctxt->input->base);
6434: node_info.end_line = ctxt->input->line;
6435: node_info.node = ret;
6436: xmlParserAddNodeInfo(ctxt, &node_info);
6437: }
1.2 veillard 6438: }
6439:
1.50 daniel 6440: /**
6441: * xmlParseVersionNum:
6442: * @ctxt: an XML parser context
6443: *
6444: * parse the XML version value.
1.29 daniel 6445: *
6446: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6447: *
6448: * Returns the string giving the XML version number, or NULL
1.29 daniel 6449: */
1.123 daniel 6450: xmlChar *
1.55 daniel 6451: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6452: xmlChar *buf = NULL;
6453: int len = 0;
6454: int size = 10;
6455: xmlChar cur;
1.29 daniel 6456:
1.135 daniel 6457: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6458: if (buf == NULL) {
1.241 veillard 6459: xmlGenericError(xmlGenericErrorContext,
6460: "malloc of %d byte failed\n", size);
1.135 daniel 6461: return(NULL);
6462: }
6463: cur = CUR;
1.152 daniel 6464: while (((cur >= 'a') && (cur <= 'z')) ||
6465: ((cur >= 'A') && (cur <= 'Z')) ||
6466: ((cur >= '0') && (cur <= '9')) ||
6467: (cur == '_') || (cur == '.') ||
6468: (cur == ':') || (cur == '-')) {
1.135 daniel 6469: if (len + 1 >= size) {
6470: size *= 2;
1.204 veillard 6471: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6472: if (buf == NULL) {
1.241 veillard 6473: xmlGenericError(xmlGenericErrorContext,
6474: "realloc of %d byte failed\n", size);
1.135 daniel 6475: return(NULL);
6476: }
6477: }
6478: buf[len++] = cur;
6479: NEXT;
6480: cur=CUR;
6481: }
6482: buf[len] = 0;
6483: return(buf);
1.29 daniel 6484: }
6485:
1.50 daniel 6486: /**
6487: * xmlParseVersionInfo:
6488: * @ctxt: an XML parser context
6489: *
6490: * parse the XML version.
1.29 daniel 6491: *
6492: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6493: *
6494: * [25] Eq ::= S? '=' S?
1.50 daniel 6495: *
1.68 daniel 6496: * Returns the version string, e.g. "1.0"
1.29 daniel 6497: */
6498:
1.123 daniel 6499: xmlChar *
1.55 daniel 6500: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6501: xmlChar *version = NULL;
6502: const xmlChar *q;
1.29 daniel 6503:
1.152 daniel 6504: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6505: (NXT(2) == 'r') && (NXT(3) == 's') &&
6506: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6507: (NXT(6) == 'n')) {
6508: SKIP(7);
1.42 daniel 6509: SKIP_BLANKS;
1.152 daniel 6510: if (RAW != '=') {
1.230 veillard 6511: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6512: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6513: ctxt->sax->error(ctxt->userData,
6514: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6515: ctxt->wellFormed = 0;
1.180 daniel 6516: ctxt->disableSAX = 1;
1.31 daniel 6517: return(NULL);
6518: }
1.40 daniel 6519: NEXT;
1.42 daniel 6520: SKIP_BLANKS;
1.152 daniel 6521: if (RAW == '"') {
1.40 daniel 6522: NEXT;
6523: q = CUR_PTR;
1.29 daniel 6524: version = xmlParseVersionNum(ctxt);
1.152 daniel 6525: if (RAW != '"') {
1.230 veillard 6526: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6527: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6528: ctxt->sax->error(ctxt->userData,
6529: "String not closed\n%.50s\n", q);
1.59 daniel 6530: ctxt->wellFormed = 0;
1.180 daniel 6531: ctxt->disableSAX = 1;
1.55 daniel 6532: } else
1.40 daniel 6533: NEXT;
1.152 daniel 6534: } else if (RAW == '\''){
1.40 daniel 6535: NEXT;
6536: q = CUR_PTR;
1.29 daniel 6537: version = xmlParseVersionNum(ctxt);
1.152 daniel 6538: if (RAW != '\'') {
1.230 veillard 6539: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6541: ctxt->sax->error(ctxt->userData,
6542: "String not closed\n%.50s\n", q);
1.59 daniel 6543: ctxt->wellFormed = 0;
1.180 daniel 6544: ctxt->disableSAX = 1;
1.55 daniel 6545: } else
1.40 daniel 6546: NEXT;
1.31 daniel 6547: } else {
1.230 veillard 6548: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6550: ctxt->sax->error(ctxt->userData,
1.59 daniel 6551: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6552: ctxt->wellFormed = 0;
1.180 daniel 6553: ctxt->disableSAX = 1;
1.29 daniel 6554: }
6555: }
6556: return(version);
6557: }
6558:
1.50 daniel 6559: /**
6560: * xmlParseEncName:
6561: * @ctxt: an XML parser context
6562: *
6563: * parse the XML encoding name
1.29 daniel 6564: *
6565: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 6566: *
1.68 daniel 6567: * Returns the encoding name value or NULL
1.29 daniel 6568: */
1.123 daniel 6569: xmlChar *
1.55 daniel 6570: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 6571: xmlChar *buf = NULL;
6572: int len = 0;
6573: int size = 10;
6574: xmlChar cur;
1.29 daniel 6575:
1.135 daniel 6576: cur = CUR;
6577: if (((cur >= 'a') && (cur <= 'z')) ||
6578: ((cur >= 'A') && (cur <= 'Z'))) {
6579: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6580: if (buf == NULL) {
1.241 veillard 6581: xmlGenericError(xmlGenericErrorContext,
6582: "malloc of %d byte failed\n", size);
1.135 daniel 6583: return(NULL);
6584: }
6585:
6586: buf[len++] = cur;
1.40 daniel 6587: NEXT;
1.135 daniel 6588: cur = CUR;
1.152 daniel 6589: while (((cur >= 'a') && (cur <= 'z')) ||
6590: ((cur >= 'A') && (cur <= 'Z')) ||
6591: ((cur >= '0') && (cur <= '9')) ||
6592: (cur == '.') || (cur == '_') ||
6593: (cur == '-')) {
1.135 daniel 6594: if (len + 1 >= size) {
6595: size *= 2;
1.204 veillard 6596: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6597: if (buf == NULL) {
1.241 veillard 6598: xmlGenericError(xmlGenericErrorContext,
6599: "realloc of %d byte failed\n", size);
1.135 daniel 6600: return(NULL);
6601: }
6602: }
6603: buf[len++] = cur;
6604: NEXT;
6605: cur = CUR;
6606: if (cur == 0) {
6607: SHRINK;
6608: GROW;
6609: cur = CUR;
6610: }
6611: }
6612: buf[len] = 0;
1.29 daniel 6613: } else {
1.230 veillard 6614: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.55 daniel 6615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6616: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 6617: ctxt->wellFormed = 0;
1.180 daniel 6618: ctxt->disableSAX = 1;
1.29 daniel 6619: }
1.135 daniel 6620: return(buf);
1.29 daniel 6621: }
6622:
1.50 daniel 6623: /**
6624: * xmlParseEncodingDecl:
6625: * @ctxt: an XML parser context
6626: *
6627: * parse the XML encoding declaration
1.29 daniel 6628: *
6629: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 6630: *
1.229 veillard 6631: * this setups the conversion filters.
1.50 daniel 6632: *
1.68 daniel 6633: * Returns the encoding value or NULL
1.29 daniel 6634: */
6635:
1.123 daniel 6636: xmlChar *
1.55 daniel 6637: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6638: xmlChar *encoding = NULL;
6639: const xmlChar *q;
1.29 daniel 6640:
1.42 daniel 6641: SKIP_BLANKS;
1.152 daniel 6642: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 6643: (NXT(2) == 'c') && (NXT(3) == 'o') &&
6644: (NXT(4) == 'd') && (NXT(5) == 'i') &&
6645: (NXT(6) == 'n') && (NXT(7) == 'g')) {
6646: SKIP(8);
1.42 daniel 6647: SKIP_BLANKS;
1.152 daniel 6648: if (RAW != '=') {
1.230 veillard 6649: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6651: ctxt->sax->error(ctxt->userData,
6652: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 6653: ctxt->wellFormed = 0;
1.180 daniel 6654: ctxt->disableSAX = 1;
1.31 daniel 6655: return(NULL);
6656: }
1.40 daniel 6657: NEXT;
1.42 daniel 6658: SKIP_BLANKS;
1.152 daniel 6659: if (RAW == '"') {
1.40 daniel 6660: NEXT;
6661: q = CUR_PTR;
1.29 daniel 6662: encoding = xmlParseEncName(ctxt);
1.152 daniel 6663: if (RAW != '"') {
1.230 veillard 6664: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6666: ctxt->sax->error(ctxt->userData,
6667: "String not closed\n%.50s\n", q);
1.59 daniel 6668: ctxt->wellFormed = 0;
1.180 daniel 6669: ctxt->disableSAX = 1;
1.55 daniel 6670: } else
1.40 daniel 6671: NEXT;
1.152 daniel 6672: } else if (RAW == '\''){
1.40 daniel 6673: NEXT;
6674: q = CUR_PTR;
1.29 daniel 6675: encoding = xmlParseEncName(ctxt);
1.152 daniel 6676: if (RAW != '\'') {
1.230 veillard 6677: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6678: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6679: ctxt->sax->error(ctxt->userData,
6680: "String not closed\n%.50s\n", q);
1.59 daniel 6681: ctxt->wellFormed = 0;
1.180 daniel 6682: ctxt->disableSAX = 1;
1.55 daniel 6683: } else
1.40 daniel 6684: NEXT;
1.152 daniel 6685: } else if (RAW == '"'){
1.230 veillard 6686: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6688: ctxt->sax->error(ctxt->userData,
1.59 daniel 6689: "xmlParseEncodingDecl : expected ' or \"\n");
6690: ctxt->wellFormed = 0;
1.180 daniel 6691: ctxt->disableSAX = 1;
1.29 daniel 6692: }
1.193 daniel 6693: if (encoding != NULL) {
6694: xmlCharEncoding enc;
6695: xmlCharEncodingHandlerPtr handler;
6696:
1.195 daniel 6697: if (ctxt->input->encoding != NULL)
6698: xmlFree((xmlChar *) ctxt->input->encoding);
6699: ctxt->input->encoding = encoding;
6700:
1.193 daniel 6701: enc = xmlParseCharEncoding((const char *) encoding);
6702: /*
6703: * registered set of known encodings
6704: */
6705: if (enc != XML_CHAR_ENCODING_ERROR) {
6706: xmlSwitchEncoding(ctxt, enc);
6707: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6708: xmlFree(encoding);
6709: return(NULL);
6710: }
6711: } else {
6712: /*
6713: * fallback for unknown encodings
6714: */
6715: handler = xmlFindCharEncodingHandler((const char *) encoding);
6716: if (handler != NULL) {
6717: xmlSwitchToEncoding(ctxt, handler);
6718: } else {
6719: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 6720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6721: ctxt->sax->error(ctxt->userData,
6722: "Unsupported encoding %s\n", encoding);
1.193 daniel 6723: return(NULL);
6724: }
6725: }
6726: }
1.29 daniel 6727: }
6728: return(encoding);
6729: }
6730:
1.50 daniel 6731: /**
6732: * xmlParseSDDecl:
6733: * @ctxt: an XML parser context
6734: *
6735: * parse the XML standalone declaration
1.29 daniel 6736: *
6737: * [32] SDDecl ::= S 'standalone' Eq
6738: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 6739: *
6740: * [ VC: Standalone Document Declaration ]
6741: * TODO The standalone document declaration must have the value "no"
6742: * if any external markup declarations contain declarations of:
6743: * - attributes with default values, if elements to which these
6744: * attributes apply appear in the document without specifications
6745: * of values for these attributes, or
6746: * - entities (other than amp, lt, gt, apos, quot), if references
6747: * to those entities appear in the document, or
6748: * - attributes with values subject to normalization, where the
6749: * attribute appears in the document with a value which will change
6750: * as a result of normalization, or
6751: * - element types with element content, if white space occurs directly
6752: * within any instance of those types.
1.68 daniel 6753: *
6754: * Returns 1 if standalone, 0 otherwise
1.29 daniel 6755: */
6756:
1.55 daniel 6757: int
6758: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 6759: int standalone = -1;
6760:
1.42 daniel 6761: SKIP_BLANKS;
1.152 daniel 6762: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 6763: (NXT(2) == 'a') && (NXT(3) == 'n') &&
6764: (NXT(4) == 'd') && (NXT(5) == 'a') &&
6765: (NXT(6) == 'l') && (NXT(7) == 'o') &&
6766: (NXT(8) == 'n') && (NXT(9) == 'e')) {
6767: SKIP(10);
1.81 daniel 6768: SKIP_BLANKS;
1.152 daniel 6769: if (RAW != '=') {
1.230 veillard 6770: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6771: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6772: ctxt->sax->error(ctxt->userData,
1.59 daniel 6773: "XML standalone declaration : expected '='\n");
6774: ctxt->wellFormed = 0;
1.180 daniel 6775: ctxt->disableSAX = 1;
1.32 daniel 6776: return(standalone);
6777: }
1.40 daniel 6778: NEXT;
1.42 daniel 6779: SKIP_BLANKS;
1.152 daniel 6780: if (RAW == '\''){
1.40 daniel 6781: NEXT;
1.152 daniel 6782: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6783: standalone = 0;
1.40 daniel 6784: SKIP(2);
1.152 daniel 6785: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6786: (NXT(2) == 's')) {
1.29 daniel 6787: standalone = 1;
1.40 daniel 6788: SKIP(3);
1.29 daniel 6789: } else {
1.230 veillard 6790: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6791: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6792: ctxt->sax->error(ctxt->userData,
6793: "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 6794: ctxt->wellFormed = 0;
1.180 daniel 6795: ctxt->disableSAX = 1;
1.29 daniel 6796: }
1.152 daniel 6797: if (RAW != '\'') {
1.230 veillard 6798: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6799: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6800: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6801: ctxt->wellFormed = 0;
1.180 daniel 6802: ctxt->disableSAX = 1;
1.55 daniel 6803: } else
1.40 daniel 6804: NEXT;
1.152 daniel 6805: } else if (RAW == '"'){
1.40 daniel 6806: NEXT;
1.152 daniel 6807: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6808: standalone = 0;
1.40 daniel 6809: SKIP(2);
1.152 daniel 6810: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6811: (NXT(2) == 's')) {
1.29 daniel 6812: standalone = 1;
1.40 daniel 6813: SKIP(3);
1.29 daniel 6814: } else {
1.230 veillard 6815: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6816: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6817: ctxt->sax->error(ctxt->userData,
1.59 daniel 6818: "standalone accepts only 'yes' or 'no'\n");
6819: ctxt->wellFormed = 0;
1.180 daniel 6820: ctxt->disableSAX = 1;
1.29 daniel 6821: }
1.152 daniel 6822: if (RAW != '"') {
1.230 veillard 6823: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6824: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6825: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6826: ctxt->wellFormed = 0;
1.180 daniel 6827: ctxt->disableSAX = 1;
1.55 daniel 6828: } else
1.40 daniel 6829: NEXT;
1.37 daniel 6830: } else {
1.230 veillard 6831: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6833: ctxt->sax->error(ctxt->userData,
6834: "Standalone value not found\n");
1.59 daniel 6835: ctxt->wellFormed = 0;
1.180 daniel 6836: ctxt->disableSAX = 1;
1.37 daniel 6837: }
1.29 daniel 6838: }
6839: return(standalone);
6840: }
6841:
1.50 daniel 6842: /**
6843: * xmlParseXMLDecl:
6844: * @ctxt: an XML parser context
6845: *
6846: * parse an XML declaration header
1.29 daniel 6847: *
6848: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 6849: */
6850:
1.55 daniel 6851: void
6852: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6853: xmlChar *version;
1.1 veillard 6854:
6855: /*
1.19 daniel 6856: * We know that '<?xml' is here.
1.1 veillard 6857: */
1.40 daniel 6858: SKIP(5);
1.1 veillard 6859:
1.153 daniel 6860: if (!IS_BLANK(RAW)) {
1.230 veillard 6861: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6862: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6863: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 6864: ctxt->wellFormed = 0;
1.180 daniel 6865: ctxt->disableSAX = 1;
1.59 daniel 6866: }
1.42 daniel 6867: SKIP_BLANKS;
1.1 veillard 6868:
6869: /*
1.29 daniel 6870: * We should have the VersionInfo here.
1.1 veillard 6871: */
1.29 daniel 6872: version = xmlParseVersionInfo(ctxt);
6873: if (version == NULL)
1.45 daniel 6874: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 6875: ctxt->version = xmlStrdup(version);
1.119 daniel 6876: xmlFree(version);
1.29 daniel 6877:
6878: /*
6879: * We may have the encoding declaration
6880: */
1.153 daniel 6881: if (!IS_BLANK(RAW)) {
1.152 daniel 6882: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6883: SKIP(2);
6884: return;
6885: }
1.230 veillard 6886: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6887: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6888: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6889: ctxt->wellFormed = 0;
1.180 daniel 6890: ctxt->disableSAX = 1;
1.59 daniel 6891: }
1.195 daniel 6892: xmlParseEncodingDecl(ctxt);
1.193 daniel 6893: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6894: /*
6895: * The XML REC instructs us to stop parsing right here
6896: */
6897: return;
6898: }
1.1 veillard 6899:
6900: /*
1.29 daniel 6901: * We may have the standalone status.
1.1 veillard 6902: */
1.164 daniel 6903: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 6904: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6905: SKIP(2);
6906: return;
6907: }
1.230 veillard 6908: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6910: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6911: ctxt->wellFormed = 0;
1.180 daniel 6912: ctxt->disableSAX = 1;
1.59 daniel 6913: }
6914: SKIP_BLANKS;
1.167 daniel 6915: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 6916:
1.42 daniel 6917: SKIP_BLANKS;
1.152 daniel 6918: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 6919: SKIP(2);
1.152 daniel 6920: } else if (RAW == '>') {
1.31 daniel 6921: /* Deprecated old WD ... */
1.230 veillard 6922: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6923: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6924: ctxt->sax->error(ctxt->userData,
6925: "XML declaration must end-up with '?>'\n");
1.59 daniel 6926: ctxt->wellFormed = 0;
1.180 daniel 6927: ctxt->disableSAX = 1;
1.40 daniel 6928: NEXT;
1.29 daniel 6929: } else {
1.230 veillard 6930: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6931: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6932: ctxt->sax->error(ctxt->userData,
6933: "parsing XML declaration: '?>' expected\n");
1.59 daniel 6934: ctxt->wellFormed = 0;
1.180 daniel 6935: ctxt->disableSAX = 1;
1.40 daniel 6936: MOVETO_ENDTAG(CUR_PTR);
6937: NEXT;
1.29 daniel 6938: }
1.1 veillard 6939: }
6940:
1.50 daniel 6941: /**
6942: * xmlParseMisc:
6943: * @ctxt: an XML parser context
6944: *
6945: * parse an XML Misc* optionnal field.
1.21 daniel 6946: *
1.22 daniel 6947: * [27] Misc ::= Comment | PI | S
1.1 veillard 6948: */
6949:
1.55 daniel 6950: void
6951: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 6952: while (((RAW == '<') && (NXT(1) == '?')) ||
6953: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6954: (NXT(2) == '-') && (NXT(3) == '-')) ||
6955: IS_BLANK(CUR)) {
1.152 daniel 6956: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 6957: xmlParsePI(ctxt);
1.40 daniel 6958: } else if (IS_BLANK(CUR)) {
6959: NEXT;
1.1 veillard 6960: } else
1.114 daniel 6961: xmlParseComment(ctxt);
1.1 veillard 6962: }
6963: }
6964:
1.50 daniel 6965: /**
1.181 daniel 6966: * xmlParseDocument:
1.50 daniel 6967: * @ctxt: an XML parser context
6968: *
6969: * parse an XML document (and build a tree if using the standard SAX
6970: * interface).
1.21 daniel 6971: *
1.22 daniel 6972: * [1] document ::= prolog element Misc*
1.29 daniel 6973: *
6974: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 6975: *
1.68 daniel 6976: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 6977: * as a result of the parsing.
1.1 veillard 6978: */
6979:
1.55 daniel 6980: int
6981: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 6982: xmlChar start[4];
6983: xmlCharEncoding enc;
6984:
1.235 veillard 6985: xmlInitParser();
1.45 daniel 6986:
1.91 daniel 6987: GROW;
6988:
1.14 veillard 6989: /*
1.44 daniel 6990: * SAX: beginning of the document processing.
6991: */
1.72 daniel 6992: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 6993: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 6994:
1.156 daniel 6995: /*
6996: * Get the 4 first bytes and decode the charset
6997: * if enc != XML_CHAR_ENCODING_NONE
6998: * plug some encoding conversion routines.
6999: */
7000: start[0] = RAW;
7001: start[1] = NXT(1);
7002: start[2] = NXT(2);
7003: start[3] = NXT(3);
7004: enc = xmlDetectCharEncoding(start, 4);
7005: if (enc != XML_CHAR_ENCODING_NONE) {
7006: xmlSwitchEncoding(ctxt, enc);
7007: }
7008:
1.1 veillard 7009:
1.59 daniel 7010: if (CUR == 0) {
1.230 veillard 7011: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7013: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 7014: ctxt->wellFormed = 0;
1.180 daniel 7015: ctxt->disableSAX = 1;
1.59 daniel 7016: }
1.1 veillard 7017:
7018: /*
7019: * Check for the XMLDecl in the Prolog.
7020: */
1.91 daniel 7021: GROW;
1.152 daniel 7022: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7023: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7024: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 7025:
7026: /*
7027: * Note that we will switch encoding on the fly.
7028: */
1.19 daniel 7029: xmlParseXMLDecl(ctxt);
1.193 daniel 7030: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7031: /*
7032: * The XML REC instructs us to stop parsing right here
7033: */
7034: return(-1);
7035: }
1.167 daniel 7036: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7037: SKIP_BLANKS;
1.1 veillard 7038: } else {
1.72 daniel 7039: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7040: }
1.171 daniel 7041: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 7042: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7043:
7044: /*
7045: * The Misc part of the Prolog
7046: */
1.91 daniel 7047: GROW;
1.16 daniel 7048: xmlParseMisc(ctxt);
1.1 veillard 7049:
7050: /*
1.29 daniel 7051: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7052: * (doctypedecl Misc*)?
7053: */
1.91 daniel 7054: GROW;
1.152 daniel 7055: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7056: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7057: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7058: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7059: (NXT(8) == 'E')) {
1.165 daniel 7060:
1.166 daniel 7061: ctxt->inSubset = 1;
1.22 daniel 7062: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7063: if (RAW == '[') {
1.140 daniel 7064: ctxt->instate = XML_PARSER_DTD;
7065: xmlParseInternalSubset(ctxt);
7066: }
1.165 daniel 7067:
7068: /*
7069: * Create and update the external subset.
7070: */
1.166 daniel 7071: ctxt->inSubset = 2;
1.171 daniel 7072: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7073: (!ctxt->disableSAX))
1.165 daniel 7074: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7075: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7076: ctxt->inSubset = 0;
1.165 daniel 7077:
7078:
1.96 daniel 7079: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7080: xmlParseMisc(ctxt);
1.21 daniel 7081: }
7082:
7083: /*
7084: * Time to start parsing the tree itself
1.1 veillard 7085: */
1.91 daniel 7086: GROW;
1.152 daniel 7087: if (RAW != '<') {
1.230 veillard 7088: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7089: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7090: ctxt->sax->error(ctxt->userData,
1.151 daniel 7091: "Start tag expected, '<' not found\n");
1.59 daniel 7092: ctxt->wellFormed = 0;
1.180 daniel 7093: ctxt->disableSAX = 1;
1.140 daniel 7094: ctxt->instate = XML_PARSER_EOF;
7095: } else {
7096: ctxt->instate = XML_PARSER_CONTENT;
7097: xmlParseElement(ctxt);
7098: ctxt->instate = XML_PARSER_EPILOG;
7099:
7100:
7101: /*
7102: * The Misc part at the end
7103: */
7104: xmlParseMisc(ctxt);
7105:
1.152 daniel 7106: if (RAW != 0) {
1.230 veillard 7107: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7109: ctxt->sax->error(ctxt->userData,
7110: "Extra content at the end of the document\n");
7111: ctxt->wellFormed = 0;
1.180 daniel 7112: ctxt->disableSAX = 1;
1.140 daniel 7113: }
7114: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7115: }
7116:
1.44 daniel 7117: /*
7118: * SAX: end of the document processing.
7119: */
1.171 daniel 7120: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7121: (!ctxt->disableSAX))
1.74 daniel 7122: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7123:
1.59 daniel 7124: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7125: return(0);
7126: }
7127:
1.229 veillard 7128: /**
7129: * xmlParseExtParsedEnt:
7130: * @ctxt: an XML parser context
7131: *
7132: * parse a genreral parsed entity
7133: * An external general parsed entity is well-formed if it matches the
7134: * production labeled extParsedEnt.
7135: *
7136: * [78] extParsedEnt ::= TextDecl? content
7137: *
7138: * Returns 0, -1 in case of error. the parser context is augmented
7139: * as a result of the parsing.
7140: */
7141:
7142: int
7143: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7144: xmlChar start[4];
7145: xmlCharEncoding enc;
7146:
7147: xmlDefaultSAXHandlerInit();
7148:
7149: GROW;
7150:
7151: /*
7152: * SAX: beginning of the document processing.
7153: */
7154: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7155: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7156:
7157: /*
7158: * Get the 4 first bytes and decode the charset
7159: * if enc != XML_CHAR_ENCODING_NONE
7160: * plug some encoding conversion routines.
7161: */
7162: start[0] = RAW;
7163: start[1] = NXT(1);
7164: start[2] = NXT(2);
7165: start[3] = NXT(3);
7166: enc = xmlDetectCharEncoding(start, 4);
7167: if (enc != XML_CHAR_ENCODING_NONE) {
7168: xmlSwitchEncoding(ctxt, enc);
7169: }
7170:
7171:
7172: if (CUR == 0) {
1.230 veillard 7173: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.229 veillard 7174: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7175: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7176: ctxt->wellFormed = 0;
7177: ctxt->disableSAX = 1;
7178: }
7179:
7180: /*
7181: * Check for the XMLDecl in the Prolog.
7182: */
7183: GROW;
7184: if ((RAW == '<') && (NXT(1) == '?') &&
7185: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7186: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7187:
7188: /*
7189: * Note that we will switch encoding on the fly.
7190: */
7191: xmlParseXMLDecl(ctxt);
7192: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7193: /*
7194: * The XML REC instructs us to stop parsing right here
7195: */
7196: return(-1);
7197: }
7198: SKIP_BLANKS;
7199: } else {
7200: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7201: }
7202: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7203: ctxt->sax->startDocument(ctxt->userData);
7204:
7205: /*
7206: * Doing validity checking on chunk doesn't make sense
7207: */
7208: ctxt->instate = XML_PARSER_CONTENT;
7209: ctxt->validate = 0;
7210: ctxt->depth = 0;
7211:
7212: xmlParseContent(ctxt);
7213:
7214: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 7215: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.229 veillard 7216: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7217: ctxt->sax->error(ctxt->userData,
7218: "chunk is not well balanced\n");
7219: ctxt->wellFormed = 0;
7220: ctxt->disableSAX = 1;
7221: } else if (RAW != 0) {
1.230 veillard 7222: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.229 veillard 7223: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7224: ctxt->sax->error(ctxt->userData,
7225: "extra content at the end of well balanced chunk\n");
7226: ctxt->wellFormed = 0;
7227: ctxt->disableSAX = 1;
7228: }
7229:
7230: /*
7231: * SAX: end of the document processing.
7232: */
7233: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7234: (!ctxt->disableSAX))
7235: ctxt->sax->endDocument(ctxt->userData);
7236:
7237: if (! ctxt->wellFormed) return(-1);
7238: return(0);
7239: }
7240:
1.98 daniel 7241: /************************************************************************
7242: * *
1.128 daniel 7243: * Progressive parsing interfaces *
7244: * *
7245: ************************************************************************/
7246:
7247: /**
7248: * xmlParseLookupSequence:
7249: * @ctxt: an XML parser context
7250: * @first: the first char to lookup
1.140 daniel 7251: * @next: the next char to lookup or zero
7252: * @third: the next char to lookup or zero
1.128 daniel 7253: *
1.140 daniel 7254: * Try to find if a sequence (first, next, third) or just (first next) or
7255: * (first) is available in the input stream.
7256: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7257: * to avoid rescanning sequences of bytes, it DOES change the state of the
7258: * parser, do not use liberally.
1.128 daniel 7259: *
1.140 daniel 7260: * Returns the index to the current parsing point if the full sequence
7261: * is available, -1 otherwise.
1.128 daniel 7262: */
7263: int
1.140 daniel 7264: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7265: xmlChar next, xmlChar third) {
7266: int base, len;
7267: xmlParserInputPtr in;
7268: const xmlChar *buf;
7269:
7270: in = ctxt->input;
7271: if (in == NULL) return(-1);
7272: base = in->cur - in->base;
7273: if (base < 0) return(-1);
7274: if (ctxt->checkIndex > base)
7275: base = ctxt->checkIndex;
7276: if (in->buf == NULL) {
7277: buf = in->base;
7278: len = in->length;
7279: } else {
7280: buf = in->buf->buffer->content;
7281: len = in->buf->buffer->use;
7282: }
7283: /* take into account the sequence length */
7284: if (third) len -= 2;
7285: else if (next) len --;
7286: for (;base < len;base++) {
7287: if (buf[base] == first) {
7288: if (third != 0) {
7289: if ((buf[base + 1] != next) ||
7290: (buf[base + 2] != third)) continue;
7291: } else if (next != 0) {
7292: if (buf[base + 1] != next) continue;
7293: }
7294: ctxt->checkIndex = 0;
7295: #ifdef DEBUG_PUSH
7296: if (next == 0)
1.241 veillard 7297: xmlGenericError(xmlGenericErrorContext,
7298: "PP: lookup '%c' found at %d\n",
1.140 daniel 7299: first, base);
7300: else if (third == 0)
1.241 veillard 7301: xmlGenericError(xmlGenericErrorContext,
7302: "PP: lookup '%c%c' found at %d\n",
1.140 daniel 7303: first, next, base);
7304: else
1.241 veillard 7305: xmlGenericError(xmlGenericErrorContext,
7306: "PP: lookup '%c%c%c' found at %d\n",
1.140 daniel 7307: first, next, third, base);
7308: #endif
7309: return(base - (in->cur - in->base));
7310: }
7311: }
7312: ctxt->checkIndex = base;
7313: #ifdef DEBUG_PUSH
7314: if (next == 0)
1.241 veillard 7315: xmlGenericError(xmlGenericErrorContext,
7316: "PP: lookup '%c' failed\n", first);
1.140 daniel 7317: else if (third == 0)
1.241 veillard 7318: xmlGenericError(xmlGenericErrorContext,
7319: "PP: lookup '%c%c' failed\n", first, next);
1.140 daniel 7320: else
1.241 veillard 7321: xmlGenericError(xmlGenericErrorContext,
7322: "PP: lookup '%c%c%c' failed\n", first, next, third);
1.140 daniel 7323: #endif
7324: return(-1);
1.128 daniel 7325: }
7326:
7327: /**
1.143 daniel 7328: * xmlParseTryOrFinish:
1.128 daniel 7329: * @ctxt: an XML parser context
1.143 daniel 7330: * @terminate: last chunk indicator
1.128 daniel 7331: *
7332: * Try to progress on parsing
7333: *
7334: * Returns zero if no parsing was possible
7335: */
7336: int
1.143 daniel 7337: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7338: int ret = 0;
1.140 daniel 7339: int avail;
7340: xmlChar cur, next;
7341:
7342: #ifdef DEBUG_PUSH
7343: switch (ctxt->instate) {
7344: case XML_PARSER_EOF:
1.241 veillard 7345: xmlGenericError(xmlGenericErrorContext,
7346: "PP: try EOF\n"); break;
1.140 daniel 7347: case XML_PARSER_START:
1.241 veillard 7348: xmlGenericError(xmlGenericErrorContext,
7349: "PP: try START\n"); break;
1.140 daniel 7350: case XML_PARSER_MISC:
1.241 veillard 7351: xmlGenericError(xmlGenericErrorContext,
7352: "PP: try MISC\n");break;
1.140 daniel 7353: case XML_PARSER_COMMENT:
1.241 veillard 7354: xmlGenericError(xmlGenericErrorContext,
7355: "PP: try COMMENT\n");break;
1.140 daniel 7356: case XML_PARSER_PROLOG:
1.241 veillard 7357: xmlGenericError(xmlGenericErrorContext,
7358: "PP: try PROLOG\n");break;
1.140 daniel 7359: case XML_PARSER_START_TAG:
1.241 veillard 7360: xmlGenericError(xmlGenericErrorContext,
7361: "PP: try START_TAG\n");break;
1.140 daniel 7362: case XML_PARSER_CONTENT:
1.241 veillard 7363: xmlGenericError(xmlGenericErrorContext,
7364: "PP: try CONTENT\n");break;
1.140 daniel 7365: case XML_PARSER_CDATA_SECTION:
1.241 veillard 7366: xmlGenericError(xmlGenericErrorContext,
7367: "PP: try CDATA_SECTION\n");break;
1.140 daniel 7368: case XML_PARSER_END_TAG:
1.241 veillard 7369: xmlGenericError(xmlGenericErrorContext,
7370: "PP: try END_TAG\n");break;
1.140 daniel 7371: case XML_PARSER_ENTITY_DECL:
1.241 veillard 7372: xmlGenericError(xmlGenericErrorContext,
7373: "PP: try ENTITY_DECL\n");break;
1.140 daniel 7374: case XML_PARSER_ENTITY_VALUE:
1.241 veillard 7375: xmlGenericError(xmlGenericErrorContext,
7376: "PP: try ENTITY_VALUE\n");break;
1.140 daniel 7377: case XML_PARSER_ATTRIBUTE_VALUE:
1.241 veillard 7378: xmlGenericError(xmlGenericErrorContext,
7379: "PP: try ATTRIBUTE_VALUE\n");break;
1.140 daniel 7380: case XML_PARSER_DTD:
1.241 veillard 7381: xmlGenericError(xmlGenericErrorContext,
7382: "PP: try DTD\n");break;
1.140 daniel 7383: case XML_PARSER_EPILOG:
1.241 veillard 7384: xmlGenericError(xmlGenericErrorContext,
7385: "PP: try EPILOG\n");break;
1.140 daniel 7386: case XML_PARSER_PI:
1.241 veillard 7387: xmlGenericError(xmlGenericErrorContext,
7388: "PP: try PI\n");break;
1.245 veillard 7389: case XML_PARSER_IGNORE:
7390: xmlGenericError(xmlGenericErrorContext,
7391: "PP: try IGNORE\n");break;
1.140 daniel 7392: }
7393: #endif
1.128 daniel 7394:
7395: while (1) {
1.140 daniel 7396: /*
7397: * Pop-up of finished entities.
7398: */
1.152 daniel 7399: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7400: xmlPopInput(ctxt);
7401:
1.184 daniel 7402: if (ctxt->input ==NULL) break;
7403: if (ctxt->input->buf == NULL)
7404: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7405: else
1.184 daniel 7406: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7407: if (avail < 1)
7408: goto done;
1.128 daniel 7409: switch (ctxt->instate) {
7410: case XML_PARSER_EOF:
1.140 daniel 7411: /*
7412: * Document parsing is done !
7413: */
7414: goto done;
7415: case XML_PARSER_START:
7416: /*
7417: * Very first chars read from the document flow.
7418: */
1.184 daniel 7419: cur = ctxt->input->cur[0];
1.140 daniel 7420: if (IS_BLANK(cur)) {
7421: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7422: ctxt->sax->setDocumentLocator(ctxt->userData,
7423: &xmlDefaultSAXLocator);
1.230 veillard 7424: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.140 daniel 7425: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7426: ctxt->sax->error(ctxt->userData,
7427: "Extra spaces at the beginning of the document are not allowed\n");
7428: ctxt->wellFormed = 0;
1.180 daniel 7429: ctxt->disableSAX = 1;
1.140 daniel 7430: SKIP_BLANKS;
7431: ret++;
1.184 daniel 7432: if (ctxt->input->buf == NULL)
7433: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7434: else
1.184 daniel 7435: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7436: }
7437: if (avail < 2)
7438: goto done;
7439:
1.184 daniel 7440: cur = ctxt->input->cur[0];
7441: next = ctxt->input->cur[1];
1.140 daniel 7442: if (cur == 0) {
7443: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7444: ctxt->sax->setDocumentLocator(ctxt->userData,
7445: &xmlDefaultSAXLocator);
1.230 veillard 7446: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7447: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7448: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7449: ctxt->wellFormed = 0;
1.180 daniel 7450: ctxt->disableSAX = 1;
1.140 daniel 7451: ctxt->instate = XML_PARSER_EOF;
7452: #ifdef DEBUG_PUSH
1.241 veillard 7453: xmlGenericError(xmlGenericErrorContext,
7454: "PP: entering EOF\n");
1.140 daniel 7455: #endif
7456: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7457: ctxt->sax->endDocument(ctxt->userData);
7458: goto done;
7459: }
7460: if ((cur == '<') && (next == '?')) {
7461: /* PI or XML decl */
7462: if (avail < 5) return(ret);
1.143 daniel 7463: if ((!terminate) &&
7464: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7465: return(ret);
7466: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7467: ctxt->sax->setDocumentLocator(ctxt->userData,
7468: &xmlDefaultSAXLocator);
1.184 daniel 7469: if ((ctxt->input->cur[2] == 'x') &&
7470: (ctxt->input->cur[3] == 'm') &&
7471: (ctxt->input->cur[4] == 'l') &&
7472: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 7473: ret += 5;
7474: #ifdef DEBUG_PUSH
1.241 veillard 7475: xmlGenericError(xmlGenericErrorContext,
7476: "PP: Parsing XML Decl\n");
1.140 daniel 7477: #endif
7478: xmlParseXMLDecl(ctxt);
1.193 daniel 7479: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7480: /*
7481: * The XML REC instructs us to stop parsing right
7482: * here
7483: */
7484: ctxt->instate = XML_PARSER_EOF;
7485: return(0);
7486: }
1.167 daniel 7487: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7488: if ((ctxt->encoding == NULL) &&
7489: (ctxt->input->encoding != NULL))
7490: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 7491: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7492: (!ctxt->disableSAX))
1.140 daniel 7493: ctxt->sax->startDocument(ctxt->userData);
7494: ctxt->instate = XML_PARSER_MISC;
7495: #ifdef DEBUG_PUSH
1.241 veillard 7496: xmlGenericError(xmlGenericErrorContext,
7497: "PP: entering MISC\n");
1.140 daniel 7498: #endif
7499: } else {
7500: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7501: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7502: (!ctxt->disableSAX))
1.140 daniel 7503: ctxt->sax->startDocument(ctxt->userData);
7504: ctxt->instate = XML_PARSER_MISC;
7505: #ifdef DEBUG_PUSH
1.241 veillard 7506: xmlGenericError(xmlGenericErrorContext,
7507: "PP: entering MISC\n");
1.140 daniel 7508: #endif
7509: }
7510: } else {
7511: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7512: ctxt->sax->setDocumentLocator(ctxt->userData,
7513: &xmlDefaultSAXLocator);
7514: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7515: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7516: (!ctxt->disableSAX))
1.140 daniel 7517: ctxt->sax->startDocument(ctxt->userData);
7518: ctxt->instate = XML_PARSER_MISC;
7519: #ifdef DEBUG_PUSH
1.241 veillard 7520: xmlGenericError(xmlGenericErrorContext,
7521: "PP: entering MISC\n");
1.140 daniel 7522: #endif
7523: }
7524: break;
7525: case XML_PARSER_MISC:
7526: SKIP_BLANKS;
1.184 daniel 7527: if (ctxt->input->buf == NULL)
7528: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7529: else
1.184 daniel 7530: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7531: if (avail < 2)
7532: goto done;
1.184 daniel 7533: cur = ctxt->input->cur[0];
7534: next = ctxt->input->cur[1];
1.140 daniel 7535: if ((cur == '<') && (next == '?')) {
1.143 daniel 7536: if ((!terminate) &&
7537: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7538: goto done;
7539: #ifdef DEBUG_PUSH
1.241 veillard 7540: xmlGenericError(xmlGenericErrorContext,
7541: "PP: Parsing PI\n");
1.140 daniel 7542: #endif
7543: xmlParsePI(ctxt);
7544: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7545: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7546: if ((!terminate) &&
7547: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7548: goto done;
7549: #ifdef DEBUG_PUSH
1.241 veillard 7550: xmlGenericError(xmlGenericErrorContext,
7551: "PP: Parsing Comment\n");
1.140 daniel 7552: #endif
7553: xmlParseComment(ctxt);
7554: ctxt->instate = XML_PARSER_MISC;
7555: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7556: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7557: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7558: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7559: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 7560: if ((!terminate) &&
7561: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7562: goto done;
7563: #ifdef DEBUG_PUSH
1.241 veillard 7564: xmlGenericError(xmlGenericErrorContext,
7565: "PP: Parsing internal subset\n");
1.140 daniel 7566: #endif
1.166 daniel 7567: ctxt->inSubset = 1;
1.140 daniel 7568: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7569: if (RAW == '[') {
1.140 daniel 7570: ctxt->instate = XML_PARSER_DTD;
7571: #ifdef DEBUG_PUSH
1.241 veillard 7572: xmlGenericError(xmlGenericErrorContext,
7573: "PP: entering DTD\n");
1.140 daniel 7574: #endif
7575: } else {
1.166 daniel 7576: /*
7577: * Create and update the external subset.
7578: */
7579: ctxt->inSubset = 2;
1.171 daniel 7580: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7581: (ctxt->sax->externalSubset != NULL))
7582: ctxt->sax->externalSubset(ctxt->userData,
7583: ctxt->intSubName, ctxt->extSubSystem,
7584: ctxt->extSubURI);
7585: ctxt->inSubset = 0;
1.140 daniel 7586: ctxt->instate = XML_PARSER_PROLOG;
7587: #ifdef DEBUG_PUSH
1.241 veillard 7588: xmlGenericError(xmlGenericErrorContext,
7589: "PP: entering PROLOG\n");
1.140 daniel 7590: #endif
7591: }
7592: } else if ((cur == '<') && (next == '!') &&
7593: (avail < 9)) {
7594: goto done;
7595: } else {
7596: ctxt->instate = XML_PARSER_START_TAG;
7597: #ifdef DEBUG_PUSH
1.241 veillard 7598: xmlGenericError(xmlGenericErrorContext,
7599: "PP: entering START_TAG\n");
1.140 daniel 7600: #endif
7601: }
7602: break;
1.245 veillard 7603: case XML_PARSER_IGNORE:
7604: xmlGenericError(xmlGenericErrorContext,
7605: "PP: internal error, state == IGNORE");
7606: ctxt->instate = XML_PARSER_DTD;
7607: #ifdef DEBUG_PUSH
7608: xmlGenericError(xmlGenericErrorContext,
7609: "PP: entering DTD\n");
7610: #endif
7611: break;
1.128 daniel 7612: case XML_PARSER_PROLOG:
1.140 daniel 7613: SKIP_BLANKS;
1.184 daniel 7614: if (ctxt->input->buf == NULL)
7615: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7616: else
1.184 daniel 7617: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7618: if (avail < 2)
7619: goto done;
1.184 daniel 7620: cur = ctxt->input->cur[0];
7621: next = ctxt->input->cur[1];
1.140 daniel 7622: if ((cur == '<') && (next == '?')) {
1.143 daniel 7623: if ((!terminate) &&
7624: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7625: goto done;
7626: #ifdef DEBUG_PUSH
1.241 veillard 7627: xmlGenericError(xmlGenericErrorContext,
7628: "PP: Parsing PI\n");
1.140 daniel 7629: #endif
7630: xmlParsePI(ctxt);
7631: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7632: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7633: if ((!terminate) &&
7634: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7635: goto done;
7636: #ifdef DEBUG_PUSH
1.241 veillard 7637: xmlGenericError(xmlGenericErrorContext,
7638: "PP: Parsing Comment\n");
1.140 daniel 7639: #endif
7640: xmlParseComment(ctxt);
7641: ctxt->instate = XML_PARSER_PROLOG;
7642: } else if ((cur == '<') && (next == '!') &&
7643: (avail < 4)) {
7644: goto done;
7645: } else {
7646: ctxt->instate = XML_PARSER_START_TAG;
7647: #ifdef DEBUG_PUSH
1.241 veillard 7648: xmlGenericError(xmlGenericErrorContext,
7649: "PP: entering START_TAG\n");
1.140 daniel 7650: #endif
7651: }
7652: break;
7653: case XML_PARSER_EPILOG:
7654: SKIP_BLANKS;
1.184 daniel 7655: if (ctxt->input->buf == NULL)
7656: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7657: else
1.184 daniel 7658: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7659: if (avail < 2)
7660: goto done;
1.184 daniel 7661: cur = ctxt->input->cur[0];
7662: next = ctxt->input->cur[1];
1.140 daniel 7663: if ((cur == '<') && (next == '?')) {
1.143 daniel 7664: if ((!terminate) &&
7665: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7666: goto done;
7667: #ifdef DEBUG_PUSH
1.241 veillard 7668: xmlGenericError(xmlGenericErrorContext,
7669: "PP: Parsing PI\n");
1.140 daniel 7670: #endif
7671: xmlParsePI(ctxt);
7672: ctxt->instate = XML_PARSER_EPILOG;
7673: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7674: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7675: if ((!terminate) &&
7676: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7677: goto done;
7678: #ifdef DEBUG_PUSH
1.241 veillard 7679: xmlGenericError(xmlGenericErrorContext,
7680: "PP: Parsing Comment\n");
1.140 daniel 7681: #endif
7682: xmlParseComment(ctxt);
7683: ctxt->instate = XML_PARSER_EPILOG;
7684: } else if ((cur == '<') && (next == '!') &&
7685: (avail < 4)) {
7686: goto done;
7687: } else {
1.230 veillard 7688: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7690: ctxt->sax->error(ctxt->userData,
7691: "Extra content at the end of the document\n");
7692: ctxt->wellFormed = 0;
1.180 daniel 7693: ctxt->disableSAX = 1;
1.140 daniel 7694: ctxt->instate = XML_PARSER_EOF;
7695: #ifdef DEBUG_PUSH
1.241 veillard 7696: xmlGenericError(xmlGenericErrorContext,
7697: "PP: entering EOF\n");
1.140 daniel 7698: #endif
1.171 daniel 7699: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7700: (!ctxt->disableSAX))
1.140 daniel 7701: ctxt->sax->endDocument(ctxt->userData);
7702: goto done;
7703: }
7704: break;
7705: case XML_PARSER_START_TAG: {
7706: xmlChar *name, *oldname;
7707:
1.184 daniel 7708: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7709: goto done;
1.184 daniel 7710: cur = ctxt->input->cur[0];
1.140 daniel 7711: if (cur != '<') {
1.230 veillard 7712: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7714: ctxt->sax->error(ctxt->userData,
7715: "Start tag expect, '<' not found\n");
7716: ctxt->wellFormed = 0;
1.180 daniel 7717: ctxt->disableSAX = 1;
1.140 daniel 7718: ctxt->instate = XML_PARSER_EOF;
7719: #ifdef DEBUG_PUSH
1.241 veillard 7720: xmlGenericError(xmlGenericErrorContext,
7721: "PP: entering EOF\n");
1.140 daniel 7722: #endif
1.171 daniel 7723: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7724: (!ctxt->disableSAX))
1.140 daniel 7725: ctxt->sax->endDocument(ctxt->userData);
7726: goto done;
7727: }
1.143 daniel 7728: if ((!terminate) &&
7729: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7730: goto done;
1.176 daniel 7731: if (ctxt->spaceNr == 0)
7732: spacePush(ctxt, -1);
7733: else
7734: spacePush(ctxt, *ctxt->space);
1.140 daniel 7735: name = xmlParseStartTag(ctxt);
7736: if (name == NULL) {
1.176 daniel 7737: spacePop(ctxt);
1.140 daniel 7738: ctxt->instate = XML_PARSER_EOF;
7739: #ifdef DEBUG_PUSH
1.241 veillard 7740: xmlGenericError(xmlGenericErrorContext,
7741: "PP: entering EOF\n");
1.140 daniel 7742: #endif
1.171 daniel 7743: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7744: (!ctxt->disableSAX))
1.140 daniel 7745: ctxt->sax->endDocument(ctxt->userData);
7746: goto done;
7747: }
7748: namePush(ctxt, xmlStrdup(name));
7749:
7750: /*
7751: * [ VC: Root Element Type ]
7752: * The Name in the document type declaration must match
7753: * the element type of the root element.
7754: */
7755: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7756: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7757: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7758:
7759: /*
7760: * Check for an Empty Element.
7761: */
1.152 daniel 7762: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7763: SKIP(2);
1.171 daniel 7764: if ((ctxt->sax != NULL) &&
7765: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 7766: ctxt->sax->endElement(ctxt->userData, name);
7767: xmlFree(name);
7768: oldname = namePop(ctxt);
1.176 daniel 7769: spacePop(ctxt);
1.140 daniel 7770: if (oldname != NULL) {
7771: #ifdef DEBUG_STACK
1.241 veillard 7772: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 7773: #endif
7774: xmlFree(oldname);
7775: }
7776: if (ctxt->name == NULL) {
7777: ctxt->instate = XML_PARSER_EPILOG;
7778: #ifdef DEBUG_PUSH
1.241 veillard 7779: xmlGenericError(xmlGenericErrorContext,
7780: "PP: entering EPILOG\n");
1.140 daniel 7781: #endif
7782: } else {
7783: ctxt->instate = XML_PARSER_CONTENT;
7784: #ifdef DEBUG_PUSH
1.241 veillard 7785: xmlGenericError(xmlGenericErrorContext,
7786: "PP: entering CONTENT\n");
1.140 daniel 7787: #endif
7788: }
7789: break;
7790: }
1.152 daniel 7791: if (RAW == '>') {
1.140 daniel 7792: NEXT;
7793: } else {
1.230 veillard 7794: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.140 daniel 7795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7796: ctxt->sax->error(ctxt->userData,
7797: "Couldn't find end of Start Tag %s\n",
7798: name);
7799: ctxt->wellFormed = 0;
1.180 daniel 7800: ctxt->disableSAX = 1;
1.140 daniel 7801:
7802: /*
7803: * end of parsing of this node.
7804: */
7805: nodePop(ctxt);
7806: oldname = namePop(ctxt);
1.176 daniel 7807: spacePop(ctxt);
1.140 daniel 7808: if (oldname != NULL) {
7809: #ifdef DEBUG_STACK
1.241 veillard 7810: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 7811: #endif
7812: xmlFree(oldname);
7813: }
7814: }
7815: xmlFree(name);
7816: ctxt->instate = XML_PARSER_CONTENT;
7817: #ifdef DEBUG_PUSH
1.241 veillard 7818: xmlGenericError(xmlGenericErrorContext,
7819: "PP: entering CONTENT\n");
1.140 daniel 7820: #endif
7821: break;
7822: }
1.224 veillard 7823: case XML_PARSER_CONTENT: {
7824: const xmlChar *test;
7825: int cons;
7826: xmlChar tok;
7827:
1.140 daniel 7828: /*
7829: * Handle preparsed entities and charRef
7830: */
7831: if (ctxt->token != 0) {
7832: xmlChar cur[2] = { 0 , 0 } ;
7833:
7834: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 7835: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7836: (ctxt->sax->characters != NULL))
1.140 daniel 7837: ctxt->sax->characters(ctxt->userData, cur, 1);
7838: ctxt->token = 0;
7839: }
1.184 daniel 7840: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7841: goto done;
1.184 daniel 7842: cur = ctxt->input->cur[0];
7843: next = ctxt->input->cur[1];
1.224 veillard 7844:
7845: test = CUR_PTR;
7846: cons = ctxt->input->consumed;
7847: tok = ctxt->token;
1.140 daniel 7848: if ((cur == '<') && (next == '?')) {
1.143 daniel 7849: if ((!terminate) &&
7850: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7851: goto done;
7852: #ifdef DEBUG_PUSH
1.241 veillard 7853: xmlGenericError(xmlGenericErrorContext,
7854: "PP: Parsing PI\n");
1.140 daniel 7855: #endif
7856: xmlParsePI(ctxt);
7857: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7858: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7859: if ((!terminate) &&
7860: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7861: goto done;
7862: #ifdef DEBUG_PUSH
1.241 veillard 7863: xmlGenericError(xmlGenericErrorContext,
7864: "PP: Parsing Comment\n");
1.140 daniel 7865: #endif
7866: xmlParseComment(ctxt);
7867: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 7868: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7869: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7870: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7871: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7872: (ctxt->input->cur[8] == '[')) {
1.140 daniel 7873: SKIP(9);
7874: ctxt->instate = XML_PARSER_CDATA_SECTION;
7875: #ifdef DEBUG_PUSH
1.241 veillard 7876: xmlGenericError(xmlGenericErrorContext,
7877: "PP: entering CDATA_SECTION\n");
1.140 daniel 7878: #endif
7879: break;
7880: } else if ((cur == '<') && (next == '!') &&
7881: (avail < 9)) {
7882: goto done;
7883: } else if ((cur == '<') && (next == '/')) {
7884: ctxt->instate = XML_PARSER_END_TAG;
7885: #ifdef DEBUG_PUSH
1.241 veillard 7886: xmlGenericError(xmlGenericErrorContext,
7887: "PP: entering END_TAG\n");
1.140 daniel 7888: #endif
7889: break;
7890: } else if (cur == '<') {
7891: ctxt->instate = XML_PARSER_START_TAG;
7892: #ifdef DEBUG_PUSH
1.241 veillard 7893: xmlGenericError(xmlGenericErrorContext,
7894: "PP: entering START_TAG\n");
1.140 daniel 7895: #endif
7896: break;
7897: } else if (cur == '&') {
1.143 daniel 7898: if ((!terminate) &&
7899: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 7900: goto done;
7901: #ifdef DEBUG_PUSH
1.241 veillard 7902: xmlGenericError(xmlGenericErrorContext,
7903: "PP: Parsing Reference\n");
1.140 daniel 7904: #endif
7905: xmlParseReference(ctxt);
7906: } else {
1.156 daniel 7907: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 7908: /*
1.181 daniel 7909: * Goal of the following test is:
1.140 daniel 7910: * - minimize calls to the SAX 'character' callback
7911: * when they are mergeable
7912: * - handle an problem for isBlank when we only parse
7913: * a sequence of blank chars and the next one is
7914: * not available to check against '<' presence.
7915: * - tries to homogenize the differences in SAX
7916: * callbacks beween the push and pull versions
7917: * of the parser.
7918: */
7919: if ((ctxt->inputNr == 1) &&
7920: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 7921: if ((!terminate) &&
7922: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 7923: goto done;
7924: }
7925: ctxt->checkIndex = 0;
7926: #ifdef DEBUG_PUSH
1.241 veillard 7927: xmlGenericError(xmlGenericErrorContext,
7928: "PP: Parsing char data\n");
1.140 daniel 7929: #endif
7930: xmlParseCharData(ctxt, 0);
7931: }
7932: /*
7933: * Pop-up of finished entities.
7934: */
1.152 daniel 7935: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7936: xmlPopInput(ctxt);
1.224 veillard 7937: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7938: (tok == ctxt->token)) {
1.230 veillard 7939: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.224 veillard 7940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7941: ctxt->sax->error(ctxt->userData,
7942: "detected an error in element content\n");
7943: ctxt->wellFormed = 0;
7944: ctxt->disableSAX = 1;
7945: ctxt->instate = XML_PARSER_EOF;
7946: break;
7947: }
1.140 daniel 7948: break;
1.224 veillard 7949: }
1.140 daniel 7950: case XML_PARSER_CDATA_SECTION: {
7951: /*
7952: * The Push mode need to have the SAX callback for
7953: * cdataBlock merge back contiguous callbacks.
7954: */
7955: int base;
7956:
7957: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7958: if (base < 0) {
7959: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 7960: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 7961: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 7962: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 7963: XML_PARSER_BIG_BUFFER_SIZE);
7964: }
7965: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7966: ctxt->checkIndex = 0;
7967: }
7968: goto done;
7969: } else {
1.171 daniel 7970: if ((ctxt->sax != NULL) && (base > 0) &&
7971: (!ctxt->disableSAX)) {
1.140 daniel 7972: if (ctxt->sax->cdataBlock != NULL)
7973: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 7974: ctxt->input->cur, base);
1.140 daniel 7975: }
7976: SKIP(base + 3);
7977: ctxt->checkIndex = 0;
7978: ctxt->instate = XML_PARSER_CONTENT;
7979: #ifdef DEBUG_PUSH
1.241 veillard 7980: xmlGenericError(xmlGenericErrorContext,
7981: "PP: entering CONTENT\n");
1.140 daniel 7982: #endif
7983: }
7984: break;
7985: }
1.141 daniel 7986: case XML_PARSER_END_TAG:
1.140 daniel 7987: if (avail < 2)
7988: goto done;
1.143 daniel 7989: if ((!terminate) &&
7990: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7991: goto done;
7992: xmlParseEndTag(ctxt);
7993: if (ctxt->name == NULL) {
7994: ctxt->instate = XML_PARSER_EPILOG;
7995: #ifdef DEBUG_PUSH
1.241 veillard 7996: xmlGenericError(xmlGenericErrorContext,
7997: "PP: entering EPILOG\n");
1.140 daniel 7998: #endif
7999: } else {
8000: ctxt->instate = XML_PARSER_CONTENT;
8001: #ifdef DEBUG_PUSH
1.241 veillard 8002: xmlGenericError(xmlGenericErrorContext,
8003: "PP: entering CONTENT\n");
1.140 daniel 8004: #endif
8005: }
8006: break;
8007: case XML_PARSER_DTD: {
8008: /*
8009: * Sorry but progressive parsing of the internal subset
8010: * is not expected to be supported. We first check that
8011: * the full content of the internal subset is available and
8012: * the parsing is launched only at that point.
8013: * Internal subset ends up with "']' S? '>'" in an unescaped
8014: * section and not in a ']]>' sequence which are conditional
8015: * sections (whoever argued to keep that crap in XML deserve
8016: * a place in hell !).
8017: */
8018: int base, i;
8019: xmlChar *buf;
8020: xmlChar quote = 0;
8021:
1.184 daniel 8022: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 8023: if (base < 0) return(0);
8024: if (ctxt->checkIndex > base)
8025: base = ctxt->checkIndex;
1.184 daniel 8026: buf = ctxt->input->buf->buffer->content;
1.202 daniel 8027: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8028: base++) {
1.140 daniel 8029: if (quote != 0) {
8030: if (buf[base] == quote)
8031: quote = 0;
8032: continue;
8033: }
8034: if (buf[base] == '"') {
8035: quote = '"';
8036: continue;
8037: }
8038: if (buf[base] == '\'') {
8039: quote = '\'';
8040: continue;
8041: }
8042: if (buf[base] == ']') {
1.202 daniel 8043: if ((unsigned int) base +1 >=
8044: ctxt->input->buf->buffer->use)
1.140 daniel 8045: break;
8046: if (buf[base + 1] == ']') {
8047: /* conditional crap, skip both ']' ! */
8048: base++;
8049: continue;
8050: }
1.202 daniel 8051: for (i = 0;
8052: (unsigned int) base + i < ctxt->input->buf->buffer->use;
8053: i++) {
1.140 daniel 8054: if (buf[base + i] == '>')
8055: goto found_end_int_subset;
8056: }
8057: break;
8058: }
8059: }
8060: /*
8061: * We didn't found the end of the Internal subset
8062: */
8063: if (quote == 0)
8064: ctxt->checkIndex = base;
8065: #ifdef DEBUG_PUSH
8066: if (next == 0)
1.241 veillard 8067: xmlGenericError(xmlGenericErrorContext,
8068: "PP: lookup of int subset end filed\n");
1.140 daniel 8069: #endif
8070: goto done;
8071:
8072: found_end_int_subset:
8073: xmlParseInternalSubset(ctxt);
1.166 daniel 8074: ctxt->inSubset = 2;
1.171 daniel 8075: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8076: (ctxt->sax->externalSubset != NULL))
8077: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8078: ctxt->extSubSystem, ctxt->extSubURI);
8079: ctxt->inSubset = 0;
1.140 daniel 8080: ctxt->instate = XML_PARSER_PROLOG;
8081: ctxt->checkIndex = 0;
8082: #ifdef DEBUG_PUSH
1.241 veillard 8083: xmlGenericError(xmlGenericErrorContext,
8084: "PP: entering PROLOG\n");
1.140 daniel 8085: #endif
8086: break;
8087: }
8088: case XML_PARSER_COMMENT:
1.241 veillard 8089: xmlGenericError(xmlGenericErrorContext,
8090: "PP: internal error, state == COMMENT\n");
1.140 daniel 8091: ctxt->instate = XML_PARSER_CONTENT;
8092: #ifdef DEBUG_PUSH
1.241 veillard 8093: xmlGenericError(xmlGenericErrorContext,
8094: "PP: entering CONTENT\n");
1.140 daniel 8095: #endif
8096: break;
8097: case XML_PARSER_PI:
1.241 veillard 8098: xmlGenericError(xmlGenericErrorContext,
8099: "PP: internal error, state == PI\n");
1.140 daniel 8100: ctxt->instate = XML_PARSER_CONTENT;
8101: #ifdef DEBUG_PUSH
1.241 veillard 8102: xmlGenericError(xmlGenericErrorContext,
8103: "PP: entering CONTENT\n");
1.140 daniel 8104: #endif
8105: break;
1.128 daniel 8106: case XML_PARSER_ENTITY_DECL:
1.241 veillard 8107: xmlGenericError(xmlGenericErrorContext,
8108: "PP: internal error, state == ENTITY_DECL\n");
1.140 daniel 8109: ctxt->instate = XML_PARSER_DTD;
8110: #ifdef DEBUG_PUSH
1.241 veillard 8111: xmlGenericError(xmlGenericErrorContext,
8112: "PP: entering DTD\n");
1.140 daniel 8113: #endif
8114: break;
1.128 daniel 8115: case XML_PARSER_ENTITY_VALUE:
1.241 veillard 8116: xmlGenericError(xmlGenericErrorContext,
8117: "PP: internal error, state == ENTITY_VALUE\n");
1.140 daniel 8118: ctxt->instate = XML_PARSER_CONTENT;
8119: #ifdef DEBUG_PUSH
1.241 veillard 8120: xmlGenericError(xmlGenericErrorContext,
8121: "PP: entering DTD\n");
1.140 daniel 8122: #endif
8123: break;
1.128 daniel 8124: case XML_PARSER_ATTRIBUTE_VALUE:
1.241 veillard 8125: xmlGenericError(xmlGenericErrorContext,
8126: "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8127: ctxt->instate = XML_PARSER_START_TAG;
8128: #ifdef DEBUG_PUSH
1.241 veillard 8129: xmlGenericError(xmlGenericErrorContext,
8130: "PP: entering START_TAG\n");
1.168 daniel 8131: #endif
8132: break;
8133: case XML_PARSER_SYSTEM_LITERAL:
1.241 veillard 8134: xmlGenericError(xmlGenericErrorContext,
8135: "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8136: ctxt->instate = XML_PARSER_START_TAG;
8137: #ifdef DEBUG_PUSH
1.241 veillard 8138: xmlGenericError(xmlGenericErrorContext,
8139: "PP: entering START_TAG\n");
1.140 daniel 8140: #endif
8141: break;
1.128 daniel 8142: }
8143: }
1.140 daniel 8144: done:
8145: #ifdef DEBUG_PUSH
1.241 veillard 8146: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
1.140 daniel 8147: #endif
1.128 daniel 8148: return(ret);
8149: }
8150:
8151: /**
1.143 daniel 8152: * xmlParseTry:
8153: * @ctxt: an XML parser context
8154: *
8155: * Try to progress on parsing
8156: *
8157: * Returns zero if no parsing was possible
8158: */
8159: int
8160: xmlParseTry(xmlParserCtxtPtr ctxt) {
8161: return(xmlParseTryOrFinish(ctxt, 0));
8162: }
8163:
8164: /**
1.128 daniel 8165: * xmlParseChunk:
8166: * @ctxt: an XML parser context
8167: * @chunk: an char array
8168: * @size: the size in byte of the chunk
8169: * @terminate: last chunk indicator
8170: *
8171: * Parse a Chunk of memory
8172: *
8173: * Returns zero if no error, the xmlParserErrors otherwise.
8174: */
1.140 daniel 8175: int
1.128 daniel 8176: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8177: int terminate) {
1.132 daniel 8178: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8179: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8180: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8181: int cur = ctxt->input->cur - ctxt->input->base;
8182:
1.132 daniel 8183: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8184: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8185: ctxt->input->cur = ctxt->input->base + cur;
8186: #ifdef DEBUG_PUSH
1.241 veillard 8187: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
1.140 daniel 8188: #endif
8189:
1.150 daniel 8190: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8191: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8192: } else if (ctxt->instate != XML_PARSER_EOF)
1.244 veillard 8193: if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8194: xmlParserInputBufferPtr in = ctxt->input->buf;
8195: int nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8196: if (nbchars < 0) {
8197: xmlGenericError(xmlGenericErrorContext,
8198: "xmlParseChunk: encoder error\n");
8199: return(XML_ERR_INVALID_ENCODING);
8200: }
8201: }
1.143 daniel 8202: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8203: if (terminate) {
1.151 daniel 8204: /*
8205: * Check for termination
8206: */
1.140 daniel 8207: if ((ctxt->instate != XML_PARSER_EOF) &&
8208: (ctxt->instate != XML_PARSER_EPILOG)) {
1.230 veillard 8209: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 8210: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8211: ctxt->sax->error(ctxt->userData,
8212: "Extra content at the end of the document\n");
8213: ctxt->wellFormed = 0;
1.180 daniel 8214: ctxt->disableSAX = 1;
1.140 daniel 8215: }
8216: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8217: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8218: (!ctxt->disableSAX))
1.140 daniel 8219: ctxt->sax->endDocument(ctxt->userData);
8220: }
8221: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8222: }
8223: return((xmlParserErrors) ctxt->errNo);
8224: }
8225:
8226: /************************************************************************
8227: * *
1.98 daniel 8228: * I/O front end functions to the parser *
8229: * *
8230: ************************************************************************/
1.201 daniel 8231:
8232: /**
1.229 veillard 8233: * xmlStopParser:
1.201 daniel 8234: * @ctxt: an XML parser context
8235: *
8236: * Blocks further parser processing
8237: */
8238: void
8239: xmlStopParser(xmlParserCtxtPtr ctxt) {
8240: ctxt->instate = XML_PARSER_EOF;
8241: if (ctxt->input != NULL)
8242: ctxt->input->cur = BAD_CAST"";
8243: }
1.98 daniel 8244:
1.50 daniel 8245: /**
1.181 daniel 8246: * xmlCreatePushParserCtxt:
1.140 daniel 8247: * @sax: a SAX handler
8248: * @user_data: The user data returned on SAX callbacks
8249: * @chunk: a pointer to an array of chars
8250: * @size: number of chars in the array
8251: * @filename: an optional file name or URI
8252: *
8253: * Create a parser context for using the XML parser in push mode
8254: * To allow content encoding detection, @size should be >= 4
8255: * The value of @filename is used for fetching external entities
8256: * and error/warning reports.
8257: *
8258: * Returns the new parser context or NULL
8259: */
8260: xmlParserCtxtPtr
8261: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8262: const char *chunk, int size, const char *filename) {
8263: xmlParserCtxtPtr ctxt;
8264: xmlParserInputPtr inputStream;
8265: xmlParserInputBufferPtr buf;
8266: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8267:
8268: /*
1.156 daniel 8269: * plug some encoding conversion routines
1.140 daniel 8270: */
8271: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8272: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8273:
8274: buf = xmlAllocParserInputBuffer(enc);
8275: if (buf == NULL) return(NULL);
8276:
8277: ctxt = xmlNewParserCtxt();
8278: if (ctxt == NULL) {
8279: xmlFree(buf);
8280: return(NULL);
8281: }
8282: if (sax != NULL) {
8283: if (ctxt->sax != &xmlDefaultSAXHandler)
8284: xmlFree(ctxt->sax);
8285: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8286: if (ctxt->sax == NULL) {
8287: xmlFree(buf);
8288: xmlFree(ctxt);
8289: return(NULL);
8290: }
8291: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8292: if (user_data != NULL)
8293: ctxt->userData = user_data;
8294: }
8295: if (filename == NULL) {
8296: ctxt->directory = NULL;
8297: } else {
8298: ctxt->directory = xmlParserGetDirectory(filename);
8299: }
8300:
8301: inputStream = xmlNewInputStream(ctxt);
8302: if (inputStream == NULL) {
8303: xmlFreeParserCtxt(ctxt);
8304: return(NULL);
8305: }
8306:
8307: if (filename == NULL)
8308: inputStream->filename = NULL;
8309: else
8310: inputStream->filename = xmlMemStrdup(filename);
8311: inputStream->buf = buf;
8312: inputStream->base = inputStream->buf->buffer->content;
8313: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8314: if (enc != XML_CHAR_ENCODING_NONE) {
8315: xmlSwitchEncoding(ctxt, enc);
8316: }
1.140 daniel 8317:
8318: inputPush(ctxt, inputStream);
8319:
8320: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8321: (ctxt->input->buf != NULL)) {
8322: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8323: #ifdef DEBUG_PUSH
1.241 veillard 8324: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
1.140 daniel 8325: #endif
8326: }
1.190 daniel 8327:
8328: return(ctxt);
8329: }
8330:
8331: /**
8332: * xmlCreateIOParserCtxt:
8333: * @sax: a SAX handler
8334: * @user_data: The user data returned on SAX callbacks
8335: * @ioread: an I/O read function
8336: * @ioclose: an I/O close function
8337: * @ioctx: an I/O handler
8338: * @enc: the charset encoding if known
8339: *
8340: * Create a parser context for using the XML parser with an existing
8341: * I/O stream
8342: *
8343: * Returns the new parser context or NULL
8344: */
8345: xmlParserCtxtPtr
8346: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8347: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8348: void *ioctx, xmlCharEncoding enc) {
8349: xmlParserCtxtPtr ctxt;
8350: xmlParserInputPtr inputStream;
8351: xmlParserInputBufferPtr buf;
8352:
8353: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8354: if (buf == NULL) return(NULL);
8355:
8356: ctxt = xmlNewParserCtxt();
8357: if (ctxt == NULL) {
8358: xmlFree(buf);
8359: return(NULL);
8360: }
8361: if (sax != NULL) {
8362: if (ctxt->sax != &xmlDefaultSAXHandler)
8363: xmlFree(ctxt->sax);
8364: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8365: if (ctxt->sax == NULL) {
8366: xmlFree(buf);
8367: xmlFree(ctxt);
8368: return(NULL);
8369: }
8370: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8371: if (user_data != NULL)
8372: ctxt->userData = user_data;
8373: }
8374:
1.229 veillard 8375: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8376: if (inputStream == NULL) {
8377: xmlFreeParserCtxt(ctxt);
8378: return(NULL);
1.74 daniel 8379: }
1.229 veillard 8380: inputPush(ctxt, inputStream);
1.69 daniel 8381:
1.229 veillard 8382: return(ctxt);
1.1 veillard 8383: }
8384:
1.229 veillard 8385: /************************************************************************
8386: * *
8387: * Front ends when parsing a Dtd *
8388: * *
8389: ************************************************************************/
1.76 daniel 8390:
8391: /**
1.242 veillard 8392: * xmlIOParseDTD:
8393: * @sax: the SAX handler block or NULL
8394: * @input: an Input Buffer
8395: * @enc: the charset encoding if known
8396: *
8397: * Load and parse a DTD
8398: *
8399: * Returns the resulting xmlDtdPtr or NULL in case of error.
1.243 veillard 8400: * @input will be freed at parsing end.
1.242 veillard 8401: */
8402:
8403: xmlDtdPtr
8404: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8405: xmlCharEncoding enc) {
8406: xmlDtdPtr ret = NULL;
8407: xmlParserCtxtPtr ctxt;
8408: xmlParserInputPtr pinput = NULL;
8409:
8410: if (input == NULL)
8411: return(NULL);
8412:
8413: ctxt = xmlNewParserCtxt();
8414: if (ctxt == NULL) {
8415: return(NULL);
8416: }
8417:
8418: /*
8419: * Set-up the SAX context
8420: */
8421: if (sax != NULL) {
8422: if (ctxt->sax != NULL)
8423: xmlFree(ctxt->sax);
8424: ctxt->sax = sax;
8425: ctxt->userData = NULL;
8426: }
8427:
8428: /*
8429: * generate a parser input from the I/O handler
8430: */
8431:
8432: pinput = xmlNewIOInputStream(ctxt, input, enc);
8433: if (pinput == NULL) {
8434: if (sax != NULL) ctxt->sax = NULL;
8435: xmlFreeParserCtxt(ctxt);
8436: return(NULL);
8437: }
8438:
8439: /*
8440: * plug some encoding conversion routines here.
8441: */
8442: xmlPushInput(ctxt, pinput);
8443:
8444: pinput->filename = NULL;
8445: pinput->line = 1;
8446: pinput->col = 1;
8447: pinput->base = ctxt->input->cur;
8448: pinput->cur = ctxt->input->cur;
8449: pinput->free = NULL;
8450:
8451: /*
8452: * let's parse that entity knowing it's an external subset.
8453: */
8454: ctxt->inSubset = 2;
8455: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8456: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8457: BAD_CAST "none", BAD_CAST "none");
8458: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8459:
8460: if (ctxt->myDoc != NULL) {
8461: if (ctxt->wellFormed) {
8462: ret = ctxt->myDoc->extSubset;
8463: ctxt->myDoc->extSubset = NULL;
8464: } else {
8465: ret = NULL;
8466: }
8467: xmlFreeDoc(ctxt->myDoc);
8468: ctxt->myDoc = NULL;
8469: }
8470: if (sax != NULL) ctxt->sax = NULL;
8471: xmlFreeParserCtxt(ctxt);
8472:
8473: return(ret);
8474: }
8475:
8476: /**
1.181 daniel 8477: * xmlSAXParseDTD:
1.76 daniel 8478: * @sax: the SAX handler block
8479: * @ExternalID: a NAME* containing the External ID of the DTD
8480: * @SystemID: a NAME* containing the URL to the DTD
8481: *
8482: * Load and parse an external subset.
8483: *
8484: * Returns the resulting xmlDtdPtr or NULL in case of error.
8485: */
8486:
8487: xmlDtdPtr
1.123 daniel 8488: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8489: const xmlChar *SystemID) {
1.76 daniel 8490: xmlDtdPtr ret = NULL;
8491: xmlParserCtxtPtr ctxt;
1.83 daniel 8492: xmlParserInputPtr input = NULL;
1.76 daniel 8493: xmlCharEncoding enc;
8494:
8495: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8496:
1.97 daniel 8497: ctxt = xmlNewParserCtxt();
1.76 daniel 8498: if (ctxt == NULL) {
8499: return(NULL);
8500: }
8501:
8502: /*
8503: * Set-up the SAX context
8504: */
8505: if (sax != NULL) {
1.93 veillard 8506: if (ctxt->sax != NULL)
1.119 daniel 8507: xmlFree(ctxt->sax);
1.76 daniel 8508: ctxt->sax = sax;
8509: ctxt->userData = NULL;
8510: }
8511:
8512: /*
8513: * Ask the Entity resolver to load the damn thing
8514: */
8515:
8516: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8517: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8518: if (input == NULL) {
1.86 daniel 8519: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8520: xmlFreeParserCtxt(ctxt);
8521: return(NULL);
8522: }
8523:
8524: /*
1.156 daniel 8525: * plug some encoding conversion routines here.
1.76 daniel 8526: */
8527: xmlPushInput(ctxt, input);
1.156 daniel 8528: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8529: xmlSwitchEncoding(ctxt, enc);
8530:
1.95 veillard 8531: if (input->filename == NULL)
1.156 daniel 8532: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8533: input->line = 1;
8534: input->col = 1;
8535: input->base = ctxt->input->cur;
8536: input->cur = ctxt->input->cur;
8537: input->free = NULL;
8538:
8539: /*
8540: * let's parse that entity knowing it's an external subset.
8541: */
1.191 daniel 8542: ctxt->inSubset = 2;
8543: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8544: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8545: ExternalID, SystemID);
1.79 daniel 8546: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8547:
8548: if (ctxt->myDoc != NULL) {
8549: if (ctxt->wellFormed) {
1.191 daniel 8550: ret = ctxt->myDoc->extSubset;
8551: ctxt->myDoc->extSubset = NULL;
1.76 daniel 8552: } else {
8553: ret = NULL;
8554: }
8555: xmlFreeDoc(ctxt->myDoc);
8556: ctxt->myDoc = NULL;
8557: }
1.86 daniel 8558: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8559: xmlFreeParserCtxt(ctxt);
8560:
8561: return(ret);
8562: }
8563:
8564: /**
1.181 daniel 8565: * xmlParseDTD:
1.76 daniel 8566: * @ExternalID: a NAME* containing the External ID of the DTD
8567: * @SystemID: a NAME* containing the URL to the DTD
8568: *
8569: * Load and parse an external subset.
8570: *
8571: * Returns the resulting xmlDtdPtr or NULL in case of error.
8572: */
8573:
8574: xmlDtdPtr
1.123 daniel 8575: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8576: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8577: }
8578:
1.229 veillard 8579: /************************************************************************
8580: * *
8581: * Front ends when parsing an Entity *
8582: * *
8583: ************************************************************************/
8584:
1.59 daniel 8585: /**
1.181 daniel 8586: * xmlSAXParseBalancedChunk:
1.144 daniel 8587: * @ctx: an XML parser context (possibly NULL)
8588: * @sax: the SAX handler bloc (possibly NULL)
8589: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8590: * @input: a parser input stream
8591: * @enc: the encoding
8592: *
8593: * Parse a well-balanced chunk of an XML document
8594: * The user has to provide SAX callback block whose routines will be
8595: * called by the parser
8596: * The allowed sequence for the Well Balanced Chunk is the one defined by
8597: * the content production in the XML grammar:
8598: *
8599: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8600: *
1.176 daniel 8601: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 8602: * the error code otherwise
8603: */
8604:
8605: int
8606: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8607: void *user_data, xmlParserInputPtr input,
8608: xmlCharEncoding enc) {
8609: xmlParserCtxtPtr ctxt;
8610: int ret;
8611:
8612: if (input == NULL) return(-1);
8613:
8614: if (ctx != NULL)
8615: ctxt = ctx;
8616: else {
8617: ctxt = xmlNewParserCtxt();
8618: if (ctxt == NULL)
8619: return(-1);
8620: if (sax == NULL)
8621: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8622: }
8623:
8624: /*
8625: * Set-up the SAX context
8626: */
8627: if (sax != NULL) {
8628: if (ctxt->sax != NULL)
8629: xmlFree(ctxt->sax);
8630: ctxt->sax = sax;
8631: ctxt->userData = user_data;
8632: }
8633:
8634: /*
8635: * plug some encoding conversion routines here.
8636: */
8637: xmlPushInput(ctxt, input);
8638: if (enc != XML_CHAR_ENCODING_NONE)
8639: xmlSwitchEncoding(ctxt, enc);
8640:
8641: /*
8642: * let's parse that entity knowing it's an external subset.
8643: */
8644: xmlParseContent(ctxt);
8645: ret = ctxt->errNo;
8646:
8647: if (ctx == NULL) {
8648: if (sax != NULL)
8649: ctxt->sax = NULL;
8650: else
8651: xmlFreeDoc(ctxt->myDoc);
8652: xmlFreeParserCtxt(ctxt);
8653: }
8654: return(ret);
8655: }
8656:
8657: /**
1.213 veillard 8658: * xmlParseCtxtExternalEntity:
8659: * @ctx: the existing parsing context
8660: * @URL: the URL for the entity to load
8661: * @ID: the System ID for the entity to load
8662: * @list: the return value for the set of parsed nodes
8663: *
8664: * Parse an external general entity within an existing parsing context
8665: * An external general parsed entity is well-formed if it matches the
8666: * production labeled extParsedEnt.
8667: *
8668: * [78] extParsedEnt ::= TextDecl? content
8669: *
8670: * Returns 0 if the entity is well formed, -1 in case of args problem and
8671: * the parser error code otherwise
8672: */
8673:
8674: int
8675: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8676: const xmlChar *ID, xmlNodePtr *list) {
8677: xmlParserCtxtPtr ctxt;
8678: xmlDocPtr newDoc;
8679: xmlSAXHandlerPtr oldsax = NULL;
8680: int ret = 0;
8681:
8682: if (ctx->depth > 40) {
8683: return(XML_ERR_ENTITY_LOOP);
8684: }
8685:
8686: if (list != NULL)
8687: *list = NULL;
8688: if ((URL == NULL) && (ID == NULL))
8689: return(-1);
8690: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8691: return(-1);
8692:
8693:
1.228 veillard 8694: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.213 veillard 8695: if (ctxt == NULL) return(-1);
8696: ctxt->userData = ctxt;
8697: oldsax = ctxt->sax;
8698: ctxt->sax = ctx->sax;
8699: newDoc = xmlNewDoc(BAD_CAST "1.0");
8700: if (newDoc == NULL) {
8701: xmlFreeParserCtxt(ctxt);
8702: return(-1);
8703: }
8704: if (ctx->myDoc != NULL) {
8705: newDoc->intSubset = ctx->myDoc->intSubset;
8706: newDoc->extSubset = ctx->myDoc->extSubset;
8707: }
8708: if (ctx->myDoc->URL != NULL) {
8709: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8710: }
8711: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8712: if (newDoc->children == NULL) {
8713: ctxt->sax = oldsax;
8714: xmlFreeParserCtxt(ctxt);
8715: newDoc->intSubset = NULL;
8716: newDoc->extSubset = NULL;
8717: xmlFreeDoc(newDoc);
8718: return(-1);
8719: }
8720: nodePush(ctxt, newDoc->children);
8721: if (ctx->myDoc == NULL) {
8722: ctxt->myDoc = newDoc;
8723: } else {
8724: ctxt->myDoc = ctx->myDoc;
8725: newDoc->children->doc = ctx->myDoc;
8726: }
8727:
8728: /*
8729: * Parse a possible text declaration first
8730: */
8731: GROW;
8732: if ((RAW == '<') && (NXT(1) == '?') &&
8733: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8734: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8735: xmlParseTextDecl(ctxt);
8736: }
8737:
8738: /*
8739: * Doing validity checking on chunk doesn't make sense
8740: */
8741: ctxt->instate = XML_PARSER_CONTENT;
8742: ctxt->validate = ctx->validate;
8743: ctxt->depth = ctx->depth + 1;
8744: ctxt->replaceEntities = ctx->replaceEntities;
8745: if (ctxt->validate) {
8746: ctxt->vctxt.error = ctx->vctxt.error;
8747: ctxt->vctxt.warning = ctx->vctxt.warning;
8748: /* Allocate the Node stack */
8749: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1.228 veillard 8750: if (ctxt->vctxt.nodeTab == NULL) {
1.241 veillard 8751: xmlGenericError(xmlGenericErrorContext,
8752: "xmlParseCtxtExternalEntity: out of memory\n");
1.228 veillard 8753: ctxt->validate = 0;
8754: ctxt->vctxt.error = NULL;
8755: ctxt->vctxt.warning = NULL;
8756: } else {
8757: ctxt->vctxt.nodeNr = 0;
8758: ctxt->vctxt.nodeMax = 4;
8759: ctxt->vctxt.node = NULL;
8760: }
1.213 veillard 8761: } else {
8762: ctxt->vctxt.error = NULL;
8763: ctxt->vctxt.warning = NULL;
8764: }
8765:
8766: xmlParseContent(ctxt);
8767:
8768: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8769: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8770: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8771: ctxt->sax->error(ctxt->userData,
8772: "chunk is not well balanced\n");
8773: ctxt->wellFormed = 0;
8774: ctxt->disableSAX = 1;
8775: } else if (RAW != 0) {
1.230 veillard 8776: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.213 veillard 8777: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8778: ctxt->sax->error(ctxt->userData,
8779: "extra content at the end of well balanced chunk\n");
8780: ctxt->wellFormed = 0;
8781: ctxt->disableSAX = 1;
8782: }
8783: if (ctxt->node != newDoc->children) {
1.230 veillard 8784: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8786: ctxt->sax->error(ctxt->userData,
8787: "chunk is not well balanced\n");
8788: ctxt->wellFormed = 0;
8789: ctxt->disableSAX = 1;
8790: }
8791:
8792: if (!ctxt->wellFormed) {
8793: if (ctxt->errNo == 0)
8794: ret = 1;
8795: else
8796: ret = ctxt->errNo;
8797: } else {
8798: if (list != NULL) {
8799: xmlNodePtr cur;
8800:
8801: /*
8802: * Return the newly created nodeset after unlinking it from
8803: * they pseudo parent.
8804: */
8805: cur = newDoc->children->children;
8806: *list = cur;
8807: while (cur != NULL) {
8808: cur->parent = NULL;
8809: cur = cur->next;
8810: }
8811: newDoc->children->children = NULL;
8812: }
8813: ret = 0;
8814: }
8815: ctxt->sax = oldsax;
8816: xmlFreeParserCtxt(ctxt);
8817: newDoc->intSubset = NULL;
8818: newDoc->extSubset = NULL;
8819: xmlFreeDoc(newDoc);
8820:
8821: return(ret);
8822: }
8823:
8824: /**
1.181 daniel 8825: * xmlParseExternalEntity:
8826: * @doc: the document the chunk pertains to
8827: * @sax: the SAX handler bloc (possibly NULL)
8828: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8829: * @depth: Used for loop detection, use 0
1.181 daniel 8830: * @URL: the URL for the entity to load
8831: * @ID: the System ID for the entity to load
8832: * @list: the return value for the set of parsed nodes
8833: *
8834: * Parse an external general entity
8835: * An external general parsed entity is well-formed if it matches the
8836: * production labeled extParsedEnt.
8837: *
8838: * [78] extParsedEnt ::= TextDecl? content
8839: *
8840: * Returns 0 if the entity is well formed, -1 in case of args problem and
8841: * the parser error code otherwise
8842: */
8843:
8844: int
8845: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 8846: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 8847: xmlParserCtxtPtr ctxt;
8848: xmlDocPtr newDoc;
8849: xmlSAXHandlerPtr oldsax = NULL;
8850: int ret = 0;
8851:
1.185 daniel 8852: if (depth > 40) {
8853: return(XML_ERR_ENTITY_LOOP);
8854: }
8855:
8856:
1.181 daniel 8857:
8858: if (list != NULL)
8859: *list = NULL;
8860: if ((URL == NULL) && (ID == NULL))
1.213 veillard 8861: return(-1);
8862: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 8863: return(-1);
8864:
8865:
1.228 veillard 8866: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.181 daniel 8867: if (ctxt == NULL) return(-1);
8868: ctxt->userData = ctxt;
8869: if (sax != NULL) {
8870: oldsax = ctxt->sax;
8871: ctxt->sax = sax;
8872: if (user_data != NULL)
8873: ctxt->userData = user_data;
8874: }
8875: newDoc = xmlNewDoc(BAD_CAST "1.0");
8876: if (newDoc == NULL) {
8877: xmlFreeParserCtxt(ctxt);
8878: return(-1);
8879: }
8880: if (doc != NULL) {
8881: newDoc->intSubset = doc->intSubset;
8882: newDoc->extSubset = doc->extSubset;
8883: }
8884: if (doc->URL != NULL) {
8885: newDoc->URL = xmlStrdup(doc->URL);
8886: }
8887: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8888: if (newDoc->children == NULL) {
8889: if (sax != NULL)
8890: ctxt->sax = oldsax;
8891: xmlFreeParserCtxt(ctxt);
8892: newDoc->intSubset = NULL;
8893: newDoc->extSubset = NULL;
8894: xmlFreeDoc(newDoc);
8895: return(-1);
8896: }
8897: nodePush(ctxt, newDoc->children);
8898: if (doc == NULL) {
8899: ctxt->myDoc = newDoc;
8900: } else {
8901: ctxt->myDoc = doc;
8902: newDoc->children->doc = doc;
8903: }
8904:
8905: /*
8906: * Parse a possible text declaration first
8907: */
8908: GROW;
8909: if ((RAW == '<') && (NXT(1) == '?') &&
8910: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8911: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8912: xmlParseTextDecl(ctxt);
8913: }
8914:
8915: /*
8916: * Doing validity checking on chunk doesn't make sense
8917: */
8918: ctxt->instate = XML_PARSER_CONTENT;
8919: ctxt->validate = 0;
1.185 daniel 8920: ctxt->depth = depth;
1.181 daniel 8921:
8922: xmlParseContent(ctxt);
8923:
8924: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8925: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8927: ctxt->sax->error(ctxt->userData,
8928: "chunk is not well balanced\n");
8929: ctxt->wellFormed = 0;
8930: ctxt->disableSAX = 1;
8931: } else if (RAW != 0) {
1.230 veillard 8932: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.181 daniel 8933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8934: ctxt->sax->error(ctxt->userData,
8935: "extra content at the end of well balanced chunk\n");
8936: ctxt->wellFormed = 0;
8937: ctxt->disableSAX = 1;
8938: }
8939: if (ctxt->node != newDoc->children) {
1.230 veillard 8940: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8941: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8942: ctxt->sax->error(ctxt->userData,
8943: "chunk is not well balanced\n");
8944: ctxt->wellFormed = 0;
8945: ctxt->disableSAX = 1;
8946: }
8947:
8948: if (!ctxt->wellFormed) {
8949: if (ctxt->errNo == 0)
8950: ret = 1;
8951: else
8952: ret = ctxt->errNo;
8953: } else {
8954: if (list != NULL) {
8955: xmlNodePtr cur;
8956:
8957: /*
8958: * Return the newly created nodeset after unlinking it from
8959: * they pseudo parent.
8960: */
8961: cur = newDoc->children->children;
8962: *list = cur;
8963: while (cur != NULL) {
8964: cur->parent = NULL;
8965: cur = cur->next;
8966: }
8967: newDoc->children->children = NULL;
8968: }
8969: ret = 0;
8970: }
8971: if (sax != NULL)
8972: ctxt->sax = oldsax;
8973: xmlFreeParserCtxt(ctxt);
8974: newDoc->intSubset = NULL;
8975: newDoc->extSubset = NULL;
8976: xmlFreeDoc(newDoc);
8977:
8978: return(ret);
8979: }
8980:
8981: /**
8982: * xmlParseBalancedChunk:
1.176 daniel 8983: * @doc: the document the chunk pertains to
8984: * @sax: the SAX handler bloc (possibly NULL)
8985: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8986: * @depth: Used for loop detection, use 0
1.176 daniel 8987: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8988: * @list: the return value for the set of parsed nodes
8989: *
8990: * Parse a well-balanced chunk of an XML document
8991: * called by the parser
8992: * The allowed sequence for the Well Balanced Chunk is the one defined by
8993: * the content production in the XML grammar:
1.144 daniel 8994: *
1.175 daniel 8995: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8996: *
1.176 daniel 8997: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8998: * the parser error code otherwise
1.144 daniel 8999: */
9000:
1.175 daniel 9001: int
9002: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 9003: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 9004: xmlParserCtxtPtr ctxt;
1.175 daniel 9005: xmlDocPtr newDoc;
1.181 daniel 9006: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 9007: int size;
1.176 daniel 9008: int ret = 0;
1.175 daniel 9009:
1.185 daniel 9010: if (depth > 40) {
9011: return(XML_ERR_ENTITY_LOOP);
9012: }
9013:
1.175 daniel 9014:
1.176 daniel 9015: if (list != NULL)
9016: *list = NULL;
9017: if (string == NULL)
9018: return(-1);
9019:
9020: size = xmlStrlen(string);
9021:
1.183 daniel 9022: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 9023: if (ctxt == NULL) return(-1);
9024: ctxt->userData = ctxt;
1.175 daniel 9025: if (sax != NULL) {
1.176 daniel 9026: oldsax = ctxt->sax;
9027: ctxt->sax = sax;
9028: if (user_data != NULL)
9029: ctxt->userData = user_data;
1.175 daniel 9030: }
9031: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 9032: if (newDoc == NULL) {
9033: xmlFreeParserCtxt(ctxt);
9034: return(-1);
9035: }
1.175 daniel 9036: if (doc != NULL) {
9037: newDoc->intSubset = doc->intSubset;
9038: newDoc->extSubset = doc->extSubset;
9039: }
1.176 daniel 9040: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9041: if (newDoc->children == NULL) {
9042: if (sax != NULL)
9043: ctxt->sax = oldsax;
9044: xmlFreeParserCtxt(ctxt);
9045: newDoc->intSubset = NULL;
9046: newDoc->extSubset = NULL;
9047: xmlFreeDoc(newDoc);
9048: return(-1);
9049: }
9050: nodePush(ctxt, newDoc->children);
9051: if (doc == NULL) {
9052: ctxt->myDoc = newDoc;
9053: } else {
9054: ctxt->myDoc = doc;
9055: newDoc->children->doc = doc;
9056: }
9057: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 9058: ctxt->depth = depth;
1.176 daniel 9059:
9060: /*
9061: * Doing validity checking on chunk doesn't make sense
9062: */
9063: ctxt->validate = 0;
9064:
1.175 daniel 9065: xmlParseContent(ctxt);
1.176 daniel 9066:
9067: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 9068: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9069: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9070: ctxt->sax->error(ctxt->userData,
9071: "chunk is not well balanced\n");
9072: ctxt->wellFormed = 0;
1.180 daniel 9073: ctxt->disableSAX = 1;
1.176 daniel 9074: } else if (RAW != 0) {
1.230 veillard 9075: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.176 daniel 9076: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9077: ctxt->sax->error(ctxt->userData,
9078: "extra content at the end of well balanced chunk\n");
9079: ctxt->wellFormed = 0;
1.180 daniel 9080: ctxt->disableSAX = 1;
1.176 daniel 9081: }
9082: if (ctxt->node != newDoc->children) {
1.230 veillard 9083: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9085: ctxt->sax->error(ctxt->userData,
9086: "chunk is not well balanced\n");
9087: ctxt->wellFormed = 0;
1.180 daniel 9088: ctxt->disableSAX = 1;
1.176 daniel 9089: }
1.175 daniel 9090:
1.176 daniel 9091: if (!ctxt->wellFormed) {
9092: if (ctxt->errNo == 0)
9093: ret = 1;
9094: else
9095: ret = ctxt->errNo;
9096: } else {
9097: if (list != NULL) {
9098: xmlNodePtr cur;
1.175 daniel 9099:
1.176 daniel 9100: /*
9101: * Return the newly created nodeset after unlinking it from
9102: * they pseudo parent.
9103: */
9104: cur = newDoc->children->children;
9105: *list = cur;
9106: while (cur != NULL) {
9107: cur->parent = NULL;
9108: cur = cur->next;
9109: }
9110: newDoc->children->children = NULL;
9111: }
9112: ret = 0;
1.175 daniel 9113: }
1.176 daniel 9114: if (sax != NULL)
9115: ctxt->sax = oldsax;
1.175 daniel 9116: xmlFreeParserCtxt(ctxt);
9117: newDoc->intSubset = NULL;
9118: newDoc->extSubset = NULL;
1.176 daniel 9119: xmlFreeDoc(newDoc);
1.175 daniel 9120:
1.176 daniel 9121: return(ret);
1.144 daniel 9122: }
9123:
9124: /**
1.229 veillard 9125: * xmlSAXParseEntity:
9126: * @sax: the SAX handler block
9127: * @filename: the filename
9128: *
9129: * parse an XML external entity out of context and build a tree.
9130: * It use the given SAX function block to handle the parsing callback.
9131: * If sax is NULL, fallback to the default DOM tree building routines.
9132: *
9133: * [78] extParsedEnt ::= TextDecl? content
9134: *
9135: * This correspond to a "Well Balanced" chunk
1.144 daniel 9136: *
1.229 veillard 9137: * Returns the resulting document tree
1.144 daniel 9138: */
9139:
1.229 veillard 9140: xmlDocPtr
9141: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9142: xmlDocPtr ret;
9143: xmlParserCtxtPtr ctxt;
9144: char *directory = NULL;
9145:
9146: ctxt = xmlCreateFileParserCtxt(filename);
9147: if (ctxt == NULL) {
9148: return(NULL);
9149: }
9150: if (sax != NULL) {
9151: if (ctxt->sax != NULL)
9152: xmlFree(ctxt->sax);
9153: ctxt->sax = sax;
9154: ctxt->userData = NULL;
9155: }
9156:
9157: if ((ctxt->directory == NULL) && (directory == NULL))
9158: directory = xmlParserGetDirectory(filename);
9159:
9160: xmlParseExtParsedEnt(ctxt);
9161:
9162: if (ctxt->wellFormed)
9163: ret = ctxt->myDoc;
9164: else {
9165: ret = NULL;
9166: xmlFreeDoc(ctxt->myDoc);
9167: ctxt->myDoc = NULL;
9168: }
9169: if (sax != NULL)
9170: ctxt->sax = NULL;
9171: xmlFreeParserCtxt(ctxt);
9172:
9173: return(ret);
1.144 daniel 9174: }
9175:
9176: /**
1.229 veillard 9177: * xmlParseEntity:
9178: * @filename: the filename
9179: *
9180: * parse an XML external entity out of context and build a tree.
9181: *
9182: * [78] extParsedEnt ::= TextDecl? content
9183: *
9184: * This correspond to a "Well Balanced" chunk
1.59 daniel 9185: *
1.68 daniel 9186: * Returns the resulting document tree
1.59 daniel 9187: */
9188:
1.69 daniel 9189: xmlDocPtr
1.229 veillard 9190: xmlParseEntity(const char *filename) {
9191: return(xmlSAXParseEntity(NULL, filename));
1.55 daniel 9192: }
9193:
9194: /**
1.181 daniel 9195: * xmlCreateEntityParserCtxt:
9196: * @URL: the entity URL
9197: * @ID: the entity PUBLIC ID
9198: * @base: a posible base for the target URI
9199: *
9200: * Create a parser context for an external entity
9201: * Automatic support for ZLIB/Compress compressed document is provided
9202: * by default if found at compile-time.
9203: *
9204: * Returns the new parser context or NULL
9205: */
9206: xmlParserCtxtPtr
9207: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9208: const xmlChar *base) {
9209: xmlParserCtxtPtr ctxt;
9210: xmlParserInputPtr inputStream;
9211: char *directory = NULL;
1.210 veillard 9212: xmlChar *uri;
9213:
1.181 daniel 9214: ctxt = xmlNewParserCtxt();
9215: if (ctxt == NULL) {
9216: return(NULL);
9217: }
9218:
1.210 veillard 9219: uri = xmlBuildURI(URL, base);
9220:
9221: if (uri == NULL) {
9222: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9223: if (inputStream == NULL) {
9224: xmlFreeParserCtxt(ctxt);
9225: return(NULL);
9226: }
9227:
9228: inputPush(ctxt, inputStream);
9229:
9230: if ((ctxt->directory == NULL) && (directory == NULL))
9231: directory = xmlParserGetDirectory((char *)URL);
9232: if ((ctxt->directory == NULL) && (directory != NULL))
9233: ctxt->directory = directory;
9234: } else {
9235: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9236: if (inputStream == NULL) {
9237: xmlFreeParserCtxt(ctxt);
9238: return(NULL);
9239: }
1.181 daniel 9240:
1.210 veillard 9241: inputPush(ctxt, inputStream);
1.181 daniel 9242:
1.210 veillard 9243: if ((ctxt->directory == NULL) && (directory == NULL))
9244: directory = xmlParserGetDirectory((char *)uri);
9245: if ((ctxt->directory == NULL) && (directory != NULL))
9246: ctxt->directory = directory;
9247: xmlFree(uri);
9248: }
1.181 daniel 9249:
9250: return(ctxt);
9251: }
9252:
1.229 veillard 9253: /************************************************************************
9254: * *
9255: * Front ends when parsing from a file *
9256: * *
9257: ************************************************************************/
9258:
1.181 daniel 9259: /**
9260: * xmlCreateFileParserCtxt:
1.50 daniel 9261: * @filename: the filename
9262: *
1.69 daniel 9263: * Create a parser context for a file content.
9264: * Automatic support for ZLIB/Compress compressed document is provided
9265: * by default if found at compile-time.
1.50 daniel 9266: *
1.69 daniel 9267: * Returns the new parser context or NULL
1.9 httpng 9268: */
1.69 daniel 9269: xmlParserCtxtPtr
9270: xmlCreateFileParserCtxt(const char *filename)
9271: {
9272: xmlParserCtxtPtr ctxt;
1.40 daniel 9273: xmlParserInputPtr inputStream;
1.91 daniel 9274: xmlParserInputBufferPtr buf;
1.111 daniel 9275: char *directory = NULL;
1.9 httpng 9276:
1.91 daniel 9277: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.228 veillard 9278: if (buf == NULL) {
9279: return(NULL);
9280: }
1.9 httpng 9281:
1.97 daniel 9282: ctxt = xmlNewParserCtxt();
1.16 daniel 9283: if (ctxt == NULL) {
1.228 veillard 9284: if (xmlDefaultSAXHandler.error != NULL) {
9285: xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9286: }
1.16 daniel 9287: return(NULL);
9288: }
1.97 daniel 9289:
1.96 daniel 9290: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9291: if (inputStream == NULL) {
1.97 daniel 9292: xmlFreeParserCtxt(ctxt);
1.40 daniel 9293: return(NULL);
9294: }
9295:
1.119 daniel 9296: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9297: inputStream->buf = buf;
9298: inputStream->base = inputStream->buf->buffer->content;
9299: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9300:
1.40 daniel 9301: inputPush(ctxt, inputStream);
1.110 daniel 9302: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9303: directory = xmlParserGetDirectory(filename);
9304: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9305: ctxt->directory = directory;
1.106 daniel 9306:
1.69 daniel 9307: return(ctxt);
9308: }
9309:
9310: /**
1.181 daniel 9311: * xmlSAXParseFile:
1.69 daniel 9312: * @sax: the SAX handler block
9313: * @filename: the filename
9314: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9315: * documents
9316: *
9317: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9318: * compressed document is provided by default if found at compile-time.
9319: * It use the given SAX function block to handle the parsing callback.
9320: * If sax is NULL, fallback to the default DOM tree building routines.
9321: *
9322: * Returns the resulting document tree
9323: */
9324:
1.79 daniel 9325: xmlDocPtr
9326: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9327: int recovery) {
9328: xmlDocPtr ret;
1.229 veillard 9329: xmlParserCtxtPtr ctxt;
9330: char *directory = NULL;
9331:
9332: ctxt = xmlCreateFileParserCtxt(filename);
9333: if (ctxt == NULL) {
9334: return(NULL);
9335: }
9336: if (sax != NULL) {
9337: if (ctxt->sax != NULL)
9338: xmlFree(ctxt->sax);
9339: ctxt->sax = sax;
9340: ctxt->userData = NULL;
9341: }
9342:
9343: if ((ctxt->directory == NULL) && (directory == NULL))
9344: directory = xmlParserGetDirectory(filename);
9345: if ((ctxt->directory == NULL) && (directory != NULL))
9346: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9347:
9348: xmlParseDocument(ctxt);
9349:
9350: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9351: else {
9352: ret = NULL;
9353: xmlFreeDoc(ctxt->myDoc);
9354: ctxt->myDoc = NULL;
9355: }
9356: if (sax != NULL)
9357: ctxt->sax = NULL;
9358: xmlFreeParserCtxt(ctxt);
9359:
9360: return(ret);
9361: }
9362:
9363: /**
9364: * xmlRecoverDoc:
9365: * @cur: a pointer to an array of xmlChar
9366: *
9367: * parse an XML in-memory document and build a tree.
9368: * In the case the document is not Well Formed, a tree is built anyway
9369: *
9370: * Returns the resulting document tree
9371: */
9372:
9373: xmlDocPtr
9374: xmlRecoverDoc(xmlChar *cur) {
9375: return(xmlSAXParseDoc(NULL, cur, 1));
9376: }
9377:
9378: /**
9379: * xmlParseFile:
9380: * @filename: the filename
9381: *
9382: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9383: * compressed document is provided by default if found at compile-time.
9384: *
9385: * Returns the resulting document tree
9386: */
9387:
9388: xmlDocPtr
9389: xmlParseFile(const char *filename) {
9390: return(xmlSAXParseFile(NULL, filename, 0));
9391: }
9392:
9393: /**
9394: * xmlRecoverFile:
9395: * @filename: the filename
9396: *
9397: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9398: * compressed document is provided by default if found at compile-time.
9399: * In the case the document is not Well Formed, a tree is built anyway
9400: *
9401: * Returns the resulting document tree
9402: */
9403:
9404: xmlDocPtr
9405: xmlRecoverFile(const char *filename) {
9406: return(xmlSAXParseFile(NULL, filename, 1));
9407: }
9408:
9409:
9410: /**
9411: * xmlSetupParserForBuffer:
9412: * @ctxt: an XML parser context
9413: * @buffer: a xmlChar * buffer
9414: * @filename: a file name
9415: *
9416: * Setup the parser context to parse a new buffer; Clears any prior
9417: * contents from the parser context. The buffer parameter must not be
9418: * NULL, but the filename parameter can be
9419: */
9420: void
9421: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9422: const char* filename)
9423: {
9424: xmlParserInputPtr input;
9425:
9426: input = xmlNewInputStream(ctxt);
9427: if (input == NULL) {
9428: perror("malloc");
9429: xmlFree(ctxt);
9430: return;
9431: }
9432:
9433: xmlClearParserCtxt(ctxt);
9434: if (filename != NULL)
9435: input->filename = xmlMemStrdup(filename);
9436: input->base = buffer;
9437: input->cur = buffer;
9438: inputPush(ctxt, input);
9439: }
9440:
9441: /**
9442: * xmlSAXUserParseFile:
9443: * @sax: a SAX handler
9444: * @user_data: The user data returned on SAX callbacks
9445: * @filename: a file name
9446: *
9447: * parse an XML file and call the given SAX handler routines.
9448: * Automatic support for ZLIB/Compress compressed document is provided
9449: *
9450: * Returns 0 in case of success or a error number otherwise
9451: */
9452: int
9453: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9454: const char *filename) {
9455: int ret = 0;
9456: xmlParserCtxtPtr ctxt;
9457:
9458: ctxt = xmlCreateFileParserCtxt(filename);
9459: if (ctxt == NULL) return -1;
9460: if (ctxt->sax != &xmlDefaultSAXHandler)
9461: xmlFree(ctxt->sax);
9462: ctxt->sax = sax;
9463: if (user_data != NULL)
9464: ctxt->userData = user_data;
9465:
1.16 daniel 9466: xmlParseDocument(ctxt);
1.229 veillard 9467:
9468: if (ctxt->wellFormed)
9469: ret = 0;
1.59 daniel 9470: else {
1.229 veillard 9471: if (ctxt->errNo != 0)
9472: ret = ctxt->errNo;
9473: else
9474: ret = -1;
1.59 daniel 9475: }
1.86 daniel 9476: if (sax != NULL)
1.229 veillard 9477: ctxt->sax = NULL;
1.69 daniel 9478: xmlFreeParserCtxt(ctxt);
1.20 daniel 9479:
1.229 veillard 9480: return ret;
1.20 daniel 9481: }
9482:
1.229 veillard 9483: /************************************************************************
9484: * *
9485: * Front ends when parsing from memory *
9486: * *
9487: ************************************************************************/
1.32 daniel 9488:
1.50 daniel 9489: /**
1.181 daniel 9490: * xmlCreateMemoryParserCtxt:
1.229 veillard 9491: * @buffer: a pointer to a char array
9492: * @size: the size of the array
1.50 daniel 9493: *
1.69 daniel 9494: * Create a parser context for an XML in-memory document.
1.50 daniel 9495: *
1.69 daniel 9496: * Returns the new parser context or NULL
1.20 daniel 9497: */
1.69 daniel 9498: xmlParserCtxtPtr
9499: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9500: xmlParserCtxtPtr ctxt;
1.40 daniel 9501: xmlParserInputPtr input;
1.209 veillard 9502: xmlParserInputBufferPtr buf;
1.40 daniel 9503:
1.229 veillard 9504: if (buffer == NULL)
9505: return(NULL);
9506: if (size <= 0)
1.181 daniel 9507: return(NULL);
1.40 daniel 9508:
1.97 daniel 9509: ctxt = xmlNewParserCtxt();
1.181 daniel 9510: if (ctxt == NULL)
1.20 daniel 9511: return(NULL);
1.97 daniel 9512:
1.209 veillard 9513: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9514: if (buf == NULL) return(NULL);
9515:
1.96 daniel 9516: input = xmlNewInputStream(ctxt);
1.40 daniel 9517: if (input == NULL) {
1.97 daniel 9518: xmlFreeParserCtxt(ctxt);
1.40 daniel 9519: return(NULL);
9520: }
1.20 daniel 9521:
1.40 daniel 9522: input->filename = NULL;
1.209 veillard 9523: input->buf = buf;
9524: input->base = input->buf->buffer->content;
9525: input->cur = input->buf->buffer->content;
1.20 daniel 9526:
1.40 daniel 9527: inputPush(ctxt, input);
1.69 daniel 9528: return(ctxt);
9529: }
9530:
9531: /**
1.181 daniel 9532: * xmlSAXParseMemory:
1.69 daniel 9533: * @sax: the SAX handler block
9534: * @buffer: an pointer to a char array
1.127 daniel 9535: * @size: the size of the array
9536: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9537: * documents
9538: *
9539: * parse an XML in-memory block and use the given SAX function block
9540: * to handle the parsing callback. If sax is NULL, fallback to the default
9541: * DOM tree building routines.
9542: *
9543: * Returns the resulting document tree
9544: */
9545: xmlDocPtr
9546: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9547: xmlDocPtr ret;
9548: xmlParserCtxtPtr ctxt;
9549:
9550: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9551: if (ctxt == NULL) return(NULL);
1.74 daniel 9552: if (sax != NULL) {
9553: ctxt->sax = sax;
9554: ctxt->userData = NULL;
9555: }
1.20 daniel 9556:
9557: xmlParseDocument(ctxt);
1.40 daniel 9558:
1.72 daniel 9559: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9560: else {
9561: ret = NULL;
1.72 daniel 9562: xmlFreeDoc(ctxt->myDoc);
9563: ctxt->myDoc = NULL;
1.59 daniel 9564: }
1.86 daniel 9565: if (sax != NULL)
9566: ctxt->sax = NULL;
1.69 daniel 9567: xmlFreeParserCtxt(ctxt);
1.16 daniel 9568:
1.9 httpng 9569: return(ret);
1.17 daniel 9570: }
9571:
1.55 daniel 9572: /**
1.181 daniel 9573: * xmlParseMemory:
1.68 daniel 9574: * @buffer: an pointer to a char array
1.55 daniel 9575: * @size: the size of the array
9576: *
9577: * parse an XML in-memory block and build a tree.
9578: *
1.68 daniel 9579: * Returns the resulting document tree
1.55 daniel 9580: */
9581:
9582: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9583: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9584: }
9585:
9586: /**
1.181 daniel 9587: * xmlRecoverMemory:
1.68 daniel 9588: * @buffer: an pointer to a char array
1.59 daniel 9589: * @size: the size of the array
9590: *
9591: * parse an XML in-memory block and build a tree.
9592: * In the case the document is not Well Formed, a tree is built anyway
9593: *
1.68 daniel 9594: * Returns the resulting document tree
1.59 daniel 9595: */
9596:
9597: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9598: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9599: }
9600:
1.123 daniel 9601: /**
9602: * xmlSAXUserParseMemory:
9603: * @sax: a SAX handler
9604: * @user_data: The user data returned on SAX callbacks
9605: * @buffer: an in-memory XML document input
1.127 daniel 9606: * @size: the length of the XML document in bytes
1.123 daniel 9607: *
9608: * A better SAX parsing routine.
9609: * parse an XML in-memory buffer and call the given SAX handler routines.
9610: *
9611: * Returns 0 in case of success or a error number otherwise
9612: */
9613: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9614: char *buffer, int size) {
9615: int ret = 0;
9616: xmlParserCtxtPtr ctxt;
1.218 veillard 9617: xmlSAXHandlerPtr oldsax = NULL;
1.123 daniel 9618:
9619: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9620: if (ctxt == NULL) return -1;
1.216 veillard 9621: if (sax != NULL) {
9622: oldsax = ctxt->sax;
9623: ctxt->sax = sax;
9624: }
1.123 daniel 9625: ctxt->userData = user_data;
9626:
9627: xmlParseDocument(ctxt);
9628:
9629: if (ctxt->wellFormed)
9630: ret = 0;
9631: else {
9632: if (ctxt->errNo != 0)
9633: ret = ctxt->errNo;
9634: else
9635: ret = -1;
9636: }
1.216 veillard 9637: if (sax != NULL) {
9638: ctxt->sax = oldsax;
9639: }
1.123 daniel 9640: xmlFreeParserCtxt(ctxt);
9641:
9642: return ret;
9643: }
9644:
1.132 daniel 9645: /**
1.229 veillard 9646: * xmlCreateDocParserCtxt:
9647: * @cur: a pointer to an array of xmlChar
9648: *
9649: * Creates a parser context for an XML in-memory document.
1.132 daniel 9650: *
1.229 veillard 9651: * Returns the new parser context or NULL
1.132 daniel 9652: */
1.229 veillard 9653: xmlParserCtxtPtr
9654: xmlCreateDocParserCtxt(xmlChar *cur) {
9655: int len;
1.132 daniel 9656:
1.229 veillard 9657: if (cur == NULL)
9658: return(NULL);
9659: len = xmlStrlen(cur);
9660: return(xmlCreateMemoryParserCtxt((char *)cur, len));
1.132 daniel 9661: }
1.98 daniel 9662:
1.50 daniel 9663: /**
1.229 veillard 9664: * xmlSAXParseDoc:
9665: * @sax: the SAX handler block
9666: * @cur: a pointer to an array of xmlChar
9667: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9668: * documents
1.50 daniel 9669: *
1.229 veillard 9670: * parse an XML in-memory document and build a tree.
9671: * It use the given SAX function block to handle the parsing callback.
9672: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 9673: *
1.229 veillard 9674: * Returns the resulting document tree
1.32 daniel 9675: */
9676:
1.229 veillard 9677: xmlDocPtr
9678: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9679: xmlDocPtr ret;
9680: xmlParserCtxtPtr ctxt;
9681:
9682: if (cur == NULL) return(NULL);
1.32 daniel 9683:
9684:
1.229 veillard 9685: ctxt = xmlCreateDocParserCtxt(cur);
9686: if (ctxt == NULL) return(NULL);
9687: if (sax != NULL) {
9688: ctxt->sax = sax;
9689: ctxt->userData = NULL;
9690: }
1.32 daniel 9691:
1.229 veillard 9692: xmlParseDocument(ctxt);
9693: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9694: else {
9695: ret = NULL;
9696: xmlFreeDoc(ctxt->myDoc);
9697: ctxt->myDoc = NULL;
9698: }
9699: if (sax != NULL)
9700: ctxt->sax = NULL;
9701: xmlFreeParserCtxt(ctxt);
9702:
9703: return(ret);
1.32 daniel 9704: }
9705:
1.50 daniel 9706: /**
1.229 veillard 9707: * xmlParseDoc:
9708: * @cur: a pointer to an array of xmlChar
1.50 daniel 9709: *
1.229 veillard 9710: * parse an XML in-memory document and build a tree.
1.50 daniel 9711: *
1.229 veillard 9712: * Returns the resulting document tree
1.32 daniel 9713: */
9714:
1.229 veillard 9715: xmlDocPtr
9716: xmlParseDoc(xmlChar *cur) {
9717: return(xmlSAXParseDoc(NULL, cur, 0));
9718: }
1.32 daniel 9719:
9720:
1.229 veillard 9721: /************************************************************************
9722: * *
9723: * Miscellaneous *
9724: * *
9725: ************************************************************************/
1.32 daniel 9726:
1.237 veillard 9727: #ifdef LIBXML_XPATH_ENABLED
9728: #include <libxml/xpath.h>
9729: #endif
9730:
1.235 veillard 9731: static int xmlParserInitialized = 0;
9732:
9733: /**
9734: * xmlInitParser:
9735: *
9736: * Initialization function for the XML parser.
9737: * This is not reentrant. Call once before processing in case of
9738: * use in multithreaded programs.
9739: */
9740:
9741: void
9742: xmlInitParser(void) {
9743: if (xmlParserInitialized) return;
9744:
9745: xmlInitCharEncodingHandlers();
9746: xmlInitializePredefinedEntities();
9747: xmlDefaultSAXHandlerInit();
1.237 veillard 9748: xmlRegisterDefaultInputCallbacks();
9749: xmlRegisterDefaultOutputCallbacks();
1.235 veillard 9750: #ifdef LIBXML_HTML_ENABLED
9751: htmlInitAutoClose();
9752: htmlDefaultSAXHandlerInit();
1.237 veillard 9753: #endif
9754: #ifdef LIBXML_XPATH_ENABLED
9755: xmlXPathInit();
1.235 veillard 9756: #endif
9757: xmlParserInitialized = 1;
9758: }
9759:
1.50 daniel 9760: /**
1.229 veillard 9761: * xmlCleanupParser:
1.50 daniel 9762: *
1.229 veillard 9763: * Cleanup function for the XML parser. It tries to reclaim all
9764: * parsing related global memory allocated for the parser processing.
9765: * It doesn't deallocate any document related memory. Calling this
9766: * function should not prevent reusing the parser.
1.32 daniel 9767: */
1.229 veillard 9768:
1.55 daniel 9769: void
1.229 veillard 9770: xmlCleanupParser(void) {
1.235 veillard 9771: xmlParserInitialized = 0;
1.229 veillard 9772: xmlCleanupCharEncodingHandlers();
9773: xmlCleanupPredefinedEntities();
1.32 daniel 9774: }
1.220 veillard 9775:
9776: /**
9777: * xmlPedanticParserDefault:
9778: * @val: int 0 or 1
9779: *
9780: * Set and return the previous value for enabling pedantic warnings.
9781: *
9782: * Returns the last value for 0 for no substitution, 1 for substitution.
9783: */
9784:
9785: int
9786: xmlPedanticParserDefault(int val) {
9787: int old = xmlPedanticParserDefaultValue;
9788:
9789: xmlPedanticParserDefaultValue = val;
9790: return(old);
9791: }
1.98 daniel 9792:
9793: /**
1.181 daniel 9794: * xmlSubstituteEntitiesDefault:
1.98 daniel 9795: * @val: int 0 or 1
9796: *
9797: * Set and return the previous value for default entity support.
9798: * Initially the parser always keep entity references instead of substituting
9799: * entity values in the output. This function has to be used to change the
9800: * default parser behaviour
9801: * SAX::subtituteEntities() has to be used for changing that on a file by
9802: * file basis.
9803: *
9804: * Returns the last value for 0 for no substitution, 1 for substitution.
9805: */
9806:
9807: int
9808: xmlSubstituteEntitiesDefault(int val) {
9809: int old = xmlSubstituteEntitiesDefaultValue;
9810:
9811: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 9812: return(old);
9813: }
9814:
9815: /**
9816: * xmlKeepBlanksDefault:
9817: * @val: int 0 or 1
9818: *
9819: * Set and return the previous value for default blanks text nodes support.
9820: * The 1.x version of the parser used an heuristic to try to detect
9821: * ignorable white spaces. As a result the SAX callback was generating
9822: * ignorableWhitespace() callbacks instead of characters() one, and when
9823: * using the DOM output text nodes containing those blanks were not generated.
9824: * The 2.x and later version will switch to the XML standard way and
9825: * ignorableWhitespace() are only generated when running the parser in
9826: * validating mode and when the current element doesn't allow CDATA or
9827: * mixed content.
9828: * This function is provided as a way to force the standard behaviour
9829: * on 1.X libs and to switch back to the old mode for compatibility when
9830: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9831: * by using xmlIsBlankNode() commodity function to detect the "empty"
9832: * nodes generated.
9833: * This value also affect autogeneration of indentation when saving code
9834: * if blanks sections are kept, indentation is not generated.
9835: *
9836: * Returns the last value for 0 for no substitution, 1 for substitution.
9837: */
9838:
9839: int
9840: xmlKeepBlanksDefault(int val) {
9841: int old = xmlKeepBlanksDefaultValue;
9842:
9843: xmlKeepBlanksDefaultValue = val;
9844: xmlIndentTreeOutput = !val;
1.98 daniel 9845: return(old);
9846: }
1.77 daniel 9847:
Webmaster