Annotation of XML/parser.c, revision 1.230
1.1 veillard 1: /*
1.229 veillard 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
1.15 veillard 4: *
1.222 veillard 5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
1.229 veillard 13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscelaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAx callbacks or as standalones functions using a preparsed
26: * document.
27: *
1.15 veillard 28: * See Copyright for the status of this software.
29: *
1.60 daniel 30: * Daniel.Veillard@w3.org
1.1 veillard 31: */
32:
1.26 daniel 33: #ifdef WIN32
1.138 daniel 34: #include "win32config.h"
1.226 veillard 35: #define XML_DIR_SEP '\\'
1.26 daniel 36: #else
1.121 daniel 37: #include "config.h"
1.226 veillard 38: #define XML_DIR_SEP '/'
1.26 daniel 39: #endif
1.121 daniel 40:
1.1 veillard 41: #include <stdio.h>
1.204 veillard 42: #include <string.h>
1.121 daniel 43: #ifdef HAVE_CTYPE_H
1.1 veillard 44: #include <ctype.h>
1.121 daniel 45: #endif
46: #ifdef HAVE_STDLIB_H
1.50 daniel 47: #include <stdlib.h>
1.121 daniel 48: #endif
49: #ifdef HAVE_SYS_STAT_H
1.9 httpng 50: #include <sys/stat.h>
1.121 daniel 51: #endif
1.9 httpng 52: #ifdef HAVE_FCNTL_H
53: #include <fcntl.h>
54: #endif
1.10 httpng 55: #ifdef HAVE_UNISTD_H
56: #include <unistd.h>
57: #endif
1.20 daniel 58: #ifdef HAVE_ZLIB_H
59: #include <zlib.h>
60: #endif
1.1 veillard 61:
1.188 daniel 62: #include <libxml/xmlmemory.h>
63: #include <libxml/tree.h>
64: #include <libxml/parser.h>
65: #include <libxml/entities.h>
66: #include <libxml/encoding.h>
67: #include <libxml/valid.h>
68: #include <libxml/parserInternals.h>
69: #include <libxml/xmlIO.h>
1.193 daniel 70: #include <libxml/uri.h>
1.122 daniel 71: #include "xml-error.h"
1.1 veillard 72:
1.140 daniel 73: #define XML_PARSER_BIG_BUFFER_SIZE 1000
74: #define XML_PARSER_BUFFER_SIZE 100
75:
1.229 veillard 76: /*
77: * Various global defaults for parsing
78: */
1.160 daniel 79: int xmlGetWarningsDefaultValue = 1;
1.220 veillard 80: int xmlParserDebugEntities = 0;
1.229 veillard 81: int xmlSubstituteEntitiesDefaultValue = 0;
82: int xmlDoValidityCheckingDefaultValue = 0;
83: int xmlPedanticParserDefaultValue = 0;
84: int xmlKeepBlanksDefaultValue = 1;
1.86 daniel 85:
1.139 daniel 86: /*
87: * List of XML prefixed PI allowed by W3C specs
88: */
89:
90: const char *xmlW3CPIs[] = {
91: "xml-stylesheet",
92: NULL
93: };
1.91 daniel 94:
1.229 veillard 95: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
1.151 daniel 96: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
97: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
98: const xmlChar **str);
1.91 daniel 99:
100:
1.45 daniel 101: /************************************************************************
102: * *
103: * Parser stacks related functions and macros *
104: * *
105: ************************************************************************/
1.79 daniel 106:
1.135 daniel 107: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
108: const xmlChar ** str);
1.79 daniel 109:
1.1 veillard 110: /*
1.40 daniel 111: * Generic function for accessing stacks in the Parser Context
1.1 veillard 112: */
113:
1.140 daniel 114: #define PUSH_AND_POP(scope, type, name) \
115: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 116: if (ctxt->name##Nr >= ctxt->name##Max) { \
117: ctxt->name##Max *= 2; \
1.204 veillard 118: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 119: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
120: if (ctxt->name##Tab == NULL) { \
1.31 daniel 121: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 122: return(0); \
1.31 daniel 123: } \
124: } \
1.40 daniel 125: ctxt->name##Tab[ctxt->name##Nr] = value; \
126: ctxt->name = value; \
127: return(ctxt->name##Nr++); \
1.31 daniel 128: } \
1.140 daniel 129: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 130: type ret; \
1.40 daniel 131: if (ctxt->name##Nr <= 0) return(0); \
132: ctxt->name##Nr--; \
1.50 daniel 133: if (ctxt->name##Nr > 0) \
134: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
135: else \
136: ctxt->name = NULL; \
1.69 daniel 137: ret = ctxt->name##Tab[ctxt->name##Nr]; \
138: ctxt->name##Tab[ctxt->name##Nr] = 0; \
139: return(ret); \
1.31 daniel 140: } \
141:
1.229 veillard 142: /*
143: * Those macros actually generate the functions
144: */
1.140 daniel 145: PUSH_AND_POP(extern, xmlParserInputPtr, input)
146: PUSH_AND_POP(extern, xmlNodePtr, node)
147: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 148:
1.176 daniel 149: int spacePush(xmlParserCtxtPtr ctxt, int val) {
150: if (ctxt->spaceNr >= ctxt->spaceMax) {
151: ctxt->spaceMax *= 2;
1.204 veillard 152: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 153: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
154: if (ctxt->spaceTab == NULL) {
155: fprintf(stderr, "realloc failed !\n");
156: return(0);
157: }
158: }
159: ctxt->spaceTab[ctxt->spaceNr] = val;
160: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
161: return(ctxt->spaceNr++);
162: }
163:
164: int spacePop(xmlParserCtxtPtr ctxt) {
165: int ret;
166: if (ctxt->spaceNr <= 0) return(0);
167: ctxt->spaceNr--;
168: if (ctxt->spaceNr > 0)
169: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
170: else
171: ctxt->space = NULL;
172: ret = ctxt->spaceTab[ctxt->spaceNr];
173: ctxt->spaceTab[ctxt->spaceNr] = -1;
174: return(ret);
175: }
176:
1.55 daniel 177: /*
178: * Macros for accessing the content. Those should be used only by the parser,
179: * and not exported.
180: *
1.229 veillard 181: * Dirty macros, i.e. one often need to make assumption on the context to
182: * use them
1.55 daniel 183: *
1.123 daniel 184: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 185: * To be used with extreme caution since operations consuming
186: * characters may move the input buffer to a different location !
1.123 daniel 187: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.151 daniel 188: * This should be used internally by the parser
1.55 daniel 189: * only to compare to ASCII values otherwise it would break when
190: * running with UTF-8 encoding.
1.229 veillard 191: * RAW same as CUR but in the input buffer, bypass any token
192: * extraction that may have been done
1.123 daniel 193: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 194: * to compare on ASCII based substring.
1.123 daniel 195: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 196: * strings within the parser.
197: *
1.77 daniel 198: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 199: *
200: * NEXT Skip to the next character, this does the proper decoding
201: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.229 veillard 202: * NEXTL(l) Skip l xmlChars in the input buffer
203: * CUR_CHAR(l) returns the current unicode character (int), set l
204: * to the number of xmlChars used for the encoding [0-5].
205: * CUR_SCHAR same but operate on a string instead of the context
206: * COPY_BUF copy the current unicode char to the target buffer, increment
207: * the index
208: * GROW, SHRINK handling of input buffers
1.55 daniel 209: */
1.45 daniel 210:
1.152 daniel 211: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 212: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 213: #define NXT(val) ctxt->input->cur[(val)]
214: #define CUR_PTR ctxt->input->cur
1.154 daniel 215:
1.164 daniel 216: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
217: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 218: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
1.168 daniel 219: if ((*ctxt->input->cur == 0) && \
220: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
221: xmlPopInput(ctxt)
1.164 daniel 222:
1.97 daniel 223: #define SHRINK xmlParserInputShrink(ctxt->input); \
224: if ((*ctxt->input->cur == 0) && \
225: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
226: xmlPopInput(ctxt)
227:
228: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
229: if ((*ctxt->input->cur == 0) && \
230: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
231: xmlPopInput(ctxt)
1.55 daniel 232:
1.155 daniel 233: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 234:
1.151 daniel 235: #define NEXT xmlNextChar(ctxt);
1.154 daniel 236:
1.153 daniel 237: #define NEXTL(l) \
238: if (*(ctxt->input->cur) == '\n') { \
239: ctxt->input->line++; ctxt->input->col = 1; \
240: } else ctxt->input->col++; \
1.154 daniel 241: ctxt->token = 0; ctxt->input->cur += l; \
242: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 243: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.154 daniel 244:
1.152 daniel 245: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 246: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 247:
1.152 daniel 248: #define COPY_BUF(l,b,i,v) \
249: if (l == 1) b[i++] = (xmlChar) v; \
250: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 251:
252: /**
1.229 veillard 253: * xmlSkipBlankChars:
1.151 daniel 254: * @ctxt: the XML parser context
255: *
1.229 veillard 256: * skip all blanks character found at that point in the input streams.
257: * It pops up finished entities in the process if allowable at that point.
258: *
259: * Returns the number of space chars skipped
1.151 daniel 260: */
1.55 daniel 261:
1.229 veillard 262: int
263: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
264: int cur, res = 0;
1.201 daniel 265:
1.176 daniel 266: /*
1.229 veillard 267: * It's Okay to use CUR/NEXT here since all the blanks are on
268: * the ASCII range.
269: */
270: do {
271: cur = CUR;
272: while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
273: NEXT;
274: cur = CUR;
275: res++;
1.151 daniel 276: }
1.229 veillard 277: while ((cur == 0) && (ctxt->inputNr > 1) &&
278: (ctxt->instate != XML_PARSER_COMMENT)) {
1.168 daniel 279: xmlPopInput(ctxt);
1.229 veillard 280: cur = CUR;
281: }
1.222 veillard 282: /*
283: * Need to handle support of entities branching here
284: */
1.155 daniel 285: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1.229 veillard 286: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.222 veillard 287: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1.155 daniel 288: return(res);
1.152 daniel 289: }
290:
1.97 daniel 291: /************************************************************************
292: * *
1.229 veillard 293: * Commodity functions to handle entities *
1.97 daniel 294: * *
295: ************************************************************************/
1.40 daniel 296:
1.50 daniel 297: /**
298: * xmlPopInput:
299: * @ctxt: an XML parser context
300: *
1.40 daniel 301: * xmlPopInput: the current input pointed by ctxt->input came to an end
302: * pop it and return the next char.
1.45 daniel 303: *
1.123 daniel 304: * Returns the current xmlChar in the parser context
1.40 daniel 305: */
1.123 daniel 306: xmlChar
1.55 daniel 307: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 308: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.220 veillard 309: if (xmlParserDebugEntities)
310: fprintf(stderr, "Popping input %d\n", ctxt->inputNr);
1.69 daniel 311: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 312: if ((*ctxt->input->cur == 0) &&
313: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
314: return(xmlPopInput(ctxt));
1.40 daniel 315: return(CUR);
316: }
317:
1.50 daniel 318: /**
1.229 veillard 319: * xmlPushInput:
1.174 daniel 320: * @ctxt: an XML parser context
1.229 veillard 321: * @input: an XML parser input fragment (entity, XML fragment ...).
1.174 daniel 322: *
1.229 veillard 323: * xmlPushInput: switch to a new input stream which is stacked on top
324: * of the previous one(s).
1.174 daniel 325: */
1.229 veillard 326: void
327: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
328: if (input == NULL) return;
1.174 daniel 329:
1.229 veillard 330: if (xmlParserDebugEntities) {
331: if ((ctxt->input != NULL) && (ctxt->input->filename))
332: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
333: ctxt->input->line);
334: fprintf(stderr, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
335: }
336: inputPush(ctxt, input);
337: GROW;
1.174 daniel 338: }
1.97 daniel 339:
340: /**
341: * xmlParseCharRef:
342: * @ctxt: an XML parser context
343: *
344: * parse Reference declarations
345: *
346: * [66] CharRef ::= '&#' [0-9]+ ';' |
347: * '&#x' [0-9a-fA-F]+ ';'
348: *
1.98 daniel 349: * [ WFC: Legal Character ]
350: * Characters referred to using character references must match the
351: * production for Char.
352: *
1.135 daniel 353: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 354: */
1.97 daniel 355: int
356: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
357: int val = 0;
1.222 veillard 358: int count = 0;
1.97 daniel 359:
1.111 daniel 360: if (ctxt->token != 0) {
361: val = ctxt->token;
362: ctxt->token = 0;
363: return(val);
364: }
1.222 veillard 365: /*
366: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
367: */
1.152 daniel 368: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 369: (NXT(2) == 'x')) {
370: SKIP(3);
1.222 veillard 371: GROW;
372: while (RAW != ';') { /* loop blocked by count */
373: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 374: val = val * 16 + (CUR - '0');
1.222 veillard 375: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1.97 daniel 376: val = val * 16 + (CUR - 'a') + 10;
1.222 veillard 377: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1.97 daniel 378: val = val * 16 + (CUR - 'A') + 10;
379: else {
1.123 daniel 380: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
382: ctxt->sax->error(ctxt->userData,
383: "xmlParseCharRef: invalid hexadecimal value\n");
384: ctxt->wellFormed = 0;
1.180 daniel 385: ctxt->disableSAX = 1;
1.97 daniel 386: val = 0;
387: break;
388: }
389: NEXT;
1.222 veillard 390: count++;
1.97 daniel 391: }
1.164 daniel 392: if (RAW == ';') {
393: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
394: ctxt->nbChars ++;
395: ctxt->input->cur++;
396: }
1.152 daniel 397: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 398: SKIP(2);
1.222 veillard 399: GROW;
400: while (RAW != ';') { /* loop blocked by count */
401: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 402: val = val * 10 + (CUR - '0');
403: else {
1.123 daniel 404: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData,
407: "xmlParseCharRef: invalid decimal value\n");
408: ctxt->wellFormed = 0;
1.180 daniel 409: ctxt->disableSAX = 1;
1.97 daniel 410: val = 0;
411: break;
412: }
413: NEXT;
1.222 veillard 414: count++;
1.97 daniel 415: }
1.164 daniel 416: if (RAW == ';') {
417: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
418: ctxt->nbChars ++;
419: ctxt->input->cur++;
420: }
1.97 daniel 421: } else {
1.123 daniel 422: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 424: ctxt->sax->error(ctxt->userData,
425: "xmlParseCharRef: invalid value\n");
1.97 daniel 426: ctxt->wellFormed = 0;
1.180 daniel 427: ctxt->disableSAX = 1;
1.97 daniel 428: }
1.229 veillard 429:
430: /*
431: * [ WFC: Legal Character ]
432: * Characters referred to using character references must match the
433: * production for Char.
434: */
435: if (IS_CHAR(val)) {
436: return(val);
437: } else {
438: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 440: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
441: val);
1.97 daniel 442: ctxt->wellFormed = 0;
1.180 daniel 443: ctxt->disableSAX = 1;
1.97 daniel 444: }
1.229 veillard 445: return(0);
446: }
447:
448: /**
449: * xmlParseStringCharRef:
450: * @ctxt: an XML parser context
451: * @str: a pointer to an index in the string
452: *
453: * parse Reference declarations, variant parsing from a string rather
454: * than an an input flow.
455: *
456: * [66] CharRef ::= '&#' [0-9]+ ';' |
457: * '&#x' [0-9a-fA-F]+ ';'
458: *
459: * [ WFC: Legal Character ]
460: * Characters referred to using character references must match the
461: * production for Char.
462: *
463: * Returns the value parsed (as an int), 0 in case of error, str will be
464: * updated to the current value of the index
465: */
466: int
467: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
468: const xmlChar *ptr;
469: xmlChar cur;
470: int val = 0;
1.98 daniel 471:
1.229 veillard 472: if ((str == NULL) || (*str == NULL)) return(0);
473: ptr = *str;
474: cur = *ptr;
475: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
476: ptr += 3;
477: cur = *ptr;
478: while (cur != ';') { /* Non input consuming loop */
479: if ((cur >= '0') && (cur <= '9'))
480: val = val * 16 + (cur - '0');
481: else if ((cur >= 'a') && (cur <= 'f'))
482: val = val * 16 + (cur - 'a') + 10;
483: else if ((cur >= 'A') && (cur <= 'F'))
484: val = val * 16 + (cur - 'A') + 10;
485: else {
486: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
488: ctxt->sax->error(ctxt->userData,
489: "xmlParseStringCharRef: invalid hexadecimal value\n");
490: ctxt->wellFormed = 0;
491: ctxt->disableSAX = 1;
492: val = 0;
493: break;
494: }
495: ptr++;
496: cur = *ptr;
497: }
498: if (cur == ';')
499: ptr++;
500: } else if ((cur == '&') && (ptr[1] == '#')){
501: ptr += 2;
502: cur = *ptr;
503: while (cur != ';') { /* Non input consuming loops */
504: if ((cur >= '0') && (cur <= '9'))
505: val = val * 10 + (cur - '0');
506: else {
507: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
509: ctxt->sax->error(ctxt->userData,
510: "xmlParseStringCharRef: invalid decimal value\n");
511: ctxt->wellFormed = 0;
512: ctxt->disableSAX = 1;
513: val = 0;
514: break;
515: }
516: ptr++;
517: cur = *ptr;
518: }
519: if (cur == ';')
520: ptr++;
521: } else {
522: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 524: ctxt->sax->error(ctxt->userData,
525: "xmlParseCharRef: invalid value\n");
1.97 daniel 526: ctxt->wellFormed = 0;
1.180 daniel 527: ctxt->disableSAX = 1;
1.229 veillard 528: return(0);
1.97 daniel 529: }
1.229 veillard 530: *str = ptr;
1.98 daniel 531:
532: /*
1.229 veillard 533: * [ WFC: Legal Character ]
534: * Characters referred to using character references must match the
535: * production for Char.
1.98 daniel 536: */
1.229 veillard 537: if (IS_CHAR(val)) {
538: return(val);
539: } else {
540: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.98 daniel 541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 542: ctxt->sax->error(ctxt->userData,
543: "CharRef: invalid xmlChar value %d\n", val);
1.98 daniel 544: ctxt->wellFormed = 0;
1.180 daniel 545: ctxt->disableSAX = 1;
1.98 daniel 546: }
1.229 veillard 547: return(0);
1.96 daniel 548: }
549:
550: /**
551: * xmlParserHandlePEReference:
552: * @ctxt: the parser context
553: *
554: * [69] PEReference ::= '%' Name ';'
555: *
1.98 daniel 556: * [ WFC: No Recursion ]
1.229 veillard 557: * A parsed entity must not contain a recursive
1.98 daniel 558: * reference to itself, either directly or indirectly.
559: *
560: * [ WFC: Entity Declared ]
561: * In a document without any DTD, a document with only an internal DTD
562: * subset which contains no parameter entity references, or a document
563: * with "standalone='yes'", ... ... The declaration of a parameter
564: * entity must precede any reference to it...
565: *
566: * [ VC: Entity Declared ]
567: * In a document with an external subset or external parameter entities
568: * with "standalone='no'", ... ... The declaration of a parameter entity
569: * must precede any reference to it...
570: *
571: * [ WFC: In DTD ]
572: * Parameter-entity references may only appear in the DTD.
573: * NOTE: misleading but this is handled.
574: *
575: * A PEReference may have been detected in the current input stream
1.96 daniel 576: * the handling is done accordingly to
577: * http://www.w3.org/TR/REC-xml#entproc
578: * i.e.
579: * - Included in literal in entity values
580: * - Included as Paraemeter Entity reference within DTDs
581: */
582: void
583: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 584: xmlChar *name;
1.96 daniel 585: xmlEntityPtr entity = NULL;
586: xmlParserInputPtr input;
587:
1.126 daniel 588: if (ctxt->token != 0) {
589: return;
590: }
1.152 daniel 591: if (RAW != '%') return;
1.96 daniel 592: switch(ctxt->instate) {
1.109 daniel 593: case XML_PARSER_CDATA_SECTION:
594: return;
1.97 daniel 595: case XML_PARSER_COMMENT:
596: return;
1.140 daniel 597: case XML_PARSER_START_TAG:
598: return;
599: case XML_PARSER_END_TAG:
600: return;
1.96 daniel 601: case XML_PARSER_EOF:
1.123 daniel 602: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
605: ctxt->wellFormed = 0;
1.180 daniel 606: ctxt->disableSAX = 1;
1.96 daniel 607: return;
608: case XML_PARSER_PROLOG:
1.140 daniel 609: case XML_PARSER_START:
610: case XML_PARSER_MISC:
1.123 daniel 611: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
613: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
614: ctxt->wellFormed = 0;
1.180 daniel 615: ctxt->disableSAX = 1;
1.96 daniel 616: return;
1.97 daniel 617: case XML_PARSER_ENTITY_DECL:
1.96 daniel 618: case XML_PARSER_CONTENT:
619: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 620: case XML_PARSER_PI:
1.168 daniel 621: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 622: /* we just ignore it there */
623: return;
624: case XML_PARSER_EPILOG:
1.123 daniel 625: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 626: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 627: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 628: ctxt->wellFormed = 0;
1.180 daniel 629: ctxt->disableSAX = 1;
1.96 daniel 630: return;
1.97 daniel 631: case XML_PARSER_ENTITY_VALUE:
632: /*
633: * NOTE: in the case of entity values, we don't do the
1.127 daniel 634: * substitution here since we need the literal
1.97 daniel 635: * entity value to be able to save the internal
636: * subset of the document.
1.222 veillard 637: * This will be handled by xmlStringDecodeEntities
1.97 daniel 638: */
639: return;
1.96 daniel 640: case XML_PARSER_DTD:
1.98 daniel 641: /*
642: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
643: * In the internal DTD subset, parameter-entity references
644: * can occur only where markup declarations can occur, not
645: * within markup declarations.
646: * In that case this is handled in xmlParseMarkupDecl
647: */
648: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
649: return;
1.96 daniel 650: }
651:
652: NEXT;
653: name = xmlParseName(ctxt);
1.220 veillard 654: if (xmlParserDebugEntities)
655: fprintf(stderr, "PE Reference: %s\n", name);
1.96 daniel 656: if (name == NULL) {
1.123 daniel 657: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
659: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
660: ctxt->wellFormed = 0;
1.180 daniel 661: ctxt->disableSAX = 1;
1.96 daniel 662: } else {
1.152 daniel 663: if (RAW == ';') {
1.96 daniel 664: NEXT;
1.98 daniel 665: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
666: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 667: if (entity == NULL) {
1.98 daniel 668:
669: /*
670: * [ WFC: Entity Declared ]
671: * In a document without any DTD, a document with only an
672: * internal DTD subset which contains no parameter entity
673: * references, or a document with "standalone='yes'", ...
674: * ... The declaration of a parameter entity must precede
675: * any reference to it...
676: */
677: if ((ctxt->standalone == 1) ||
678: ((ctxt->hasExternalSubset == 0) &&
679: (ctxt->hasPErefs == 0))) {
680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681: ctxt->sax->error(ctxt->userData,
682: "PEReference: %%%s; not found\n", name);
683: ctxt->wellFormed = 0;
1.180 daniel 684: ctxt->disableSAX = 1;
1.98 daniel 685: } else {
686: /*
687: * [ VC: Entity Declared ]
688: * In a document with an external subset or external
689: * parameter entities with "standalone='no'", ...
690: * ... The declaration of a parameter entity must precede
691: * any reference to it...
692: */
1.220 veillard 693: if ((!ctxt->disableSAX) &&
694: (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1.212 veillard 695: ctxt->vctxt.error(ctxt->vctxt.userData,
696: "PEReference: %%%s; not found\n", name);
1.220 veillard 697: } else if ((!ctxt->disableSAX) &&
698: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 699: ctxt->sax->warning(ctxt->userData,
700: "PEReference: %%%s; not found\n", name);
701: ctxt->valid = 0;
702: }
1.96 daniel 703: } else {
1.159 daniel 704: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
705: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 706: /*
1.229 veillard 707: * handle the extra spaces added before and after
1.96 daniel 708: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.229 veillard 709: * this is done independantly.
1.96 daniel 710: */
711: input = xmlNewEntityInputStream(ctxt, entity);
712: xmlPushInput(ctxt, input);
1.164 daniel 713: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
714: (RAW == '<') && (NXT(1) == '?') &&
715: (NXT(2) == 'x') && (NXT(3) == 'm') &&
716: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 717: xmlParseTextDecl(ctxt);
1.164 daniel 718: }
719: if (ctxt->token == 0)
720: ctxt->token = ' ';
1.96 daniel 721: } else {
722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723: ctxt->sax->error(ctxt->userData,
724: "xmlHandlePEReference: %s is not a parameter entity\n",
725: name);
726: ctxt->wellFormed = 0;
1.180 daniel 727: ctxt->disableSAX = 1;
1.96 daniel 728: }
729: }
730: } else {
1.123 daniel 731: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
733: ctxt->sax->error(ctxt->userData,
734: "xmlHandlePEReference: expecting ';'\n");
735: ctxt->wellFormed = 0;
1.180 daniel 736: ctxt->disableSAX = 1;
1.96 daniel 737: }
1.119 daniel 738: xmlFree(name);
1.97 daniel 739: }
740: }
741:
742: /*
743: * Macro used to grow the current buffer.
744: */
745: #define growBuffer(buffer) { \
746: buffer##_size *= 2; \
1.145 daniel 747: buffer = (xmlChar *) \
748: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 749: if (buffer == NULL) { \
750: perror("realloc failed"); \
1.145 daniel 751: return(NULL); \
1.97 daniel 752: } \
1.96 daniel 753: }
1.77 daniel 754:
755: /**
1.135 daniel 756: * xmlStringDecodeEntities:
757: * @ctxt: the parser context
758: * @str: the input string
759: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
760: * @end: an end marker xmlChar, 0 if none
761: * @end2: an end marker xmlChar, 0 if none
762: * @end3: an end marker xmlChar, 0 if none
763: *
1.222 veillard 764: * Takes a entity string content and process to do the adequate subtitutions.
765: *
1.135 daniel 766: * [67] Reference ::= EntityRef | CharRef
767: *
768: * [69] PEReference ::= '%' Name ';'
769: *
770: * Returns A newly allocated string with the substitution done. The caller
771: * must deallocate it !
772: */
773: xmlChar *
774: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
775: xmlChar end, xmlChar end2, xmlChar end3) {
776: xmlChar *buffer = NULL;
777: int buffer_size = 0;
778:
779: xmlChar *current = NULL;
780: xmlEntityPtr ent;
1.176 daniel 781: int c,l;
782: int nbchars = 0;
1.135 daniel 783:
1.211 veillard 784: if (str == NULL)
785: return(NULL);
786:
1.185 daniel 787: if (ctxt->depth > 40) {
1.230 ! veillard 788: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
790: ctxt->sax->error(ctxt->userData,
791: "Detected entity reference loop\n");
792: ctxt->wellFormed = 0;
793: ctxt->disableSAX = 1;
794: return(NULL);
795: }
796:
1.135 daniel 797: /*
798: * allocate a translation buffer.
799: */
1.140 daniel 800: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 801: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
802: if (buffer == NULL) {
803: perror("xmlDecodeEntities: malloc failed");
804: return(NULL);
805: }
806:
807: /*
808: * Ok loop until we reach one of the ending char or a size limit.
1.222 veillard 809: * we are operating on already parsed values.
1.135 daniel 810: */
1.176 daniel 811: c = CUR_SCHAR(str, l);
1.222 veillard 812: while ((c != 0) && (c != end) && /* non input consuming loop */
813: (c != end2) && (c != end3)) {
1.135 daniel 814:
1.176 daniel 815: if (c == 0) break;
816: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 817: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 818: if (val != 0) {
819: COPY_BUF(0,buffer,nbchars,val);
820: }
821: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.220 veillard 822: if (xmlParserDebugEntities)
823: fprintf(stderr, "String decoding Entity Reference: %.30s\n",
824: str);
1.135 daniel 825: ent = xmlParseStringEntityRef(ctxt, &str);
1.222 veillard 826: if ((ent != NULL) &&
827: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.219 veillard 828: if (ent->content != NULL) {
829: COPY_BUF(0,buffer,nbchars,ent->content[0]);
830: } else {
831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
832: ctxt->sax->error(ctxt->userData,
833: "internal error entity has no content\n");
834: }
835: } else if ((ent != NULL) && (ent->content != NULL)) {
1.185 daniel 836: xmlChar *rep;
837:
838: ctxt->depth++;
839: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
840: 0, 0, 0);
841: ctxt->depth--;
842: if (rep != NULL) {
843: current = rep;
1.222 veillard 844: while (*current != 0) { /* non input consuming loop */
1.185 daniel 845: buffer[nbchars++] = *current++;
846: if (nbchars >
847: buffer_size - XML_PARSER_BUFFER_SIZE) {
848: growBuffer(buffer);
849: }
1.135 daniel 850: }
1.185 daniel 851: xmlFree(rep);
1.135 daniel 852: }
853: } else if (ent != NULL) {
854: int i = xmlStrlen(ent->name);
855: const xmlChar *cur = ent->name;
856:
1.176 daniel 857: buffer[nbchars++] = '&';
858: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 859: growBuffer(buffer);
860: }
861: for (;i > 0;i--)
1.176 daniel 862: buffer[nbchars++] = *cur++;
863: buffer[nbchars++] = ';';
1.135 daniel 864: }
1.176 daniel 865: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.220 veillard 866: if (xmlParserDebugEntities)
867: fprintf(stderr, "String decoding PE Reference: %.30s\n", str);
1.135 daniel 868: ent = xmlParseStringPEReference(ctxt, &str);
869: if (ent != NULL) {
1.185 daniel 870: xmlChar *rep;
871:
872: ctxt->depth++;
873: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
874: 0, 0, 0);
875: ctxt->depth--;
876: if (rep != NULL) {
877: current = rep;
1.222 veillard 878: while (*current != 0) { /* non input consuming loop */
1.185 daniel 879: buffer[nbchars++] = *current++;
880: if (nbchars >
881: buffer_size - XML_PARSER_BUFFER_SIZE) {
882: growBuffer(buffer);
883: }
1.135 daniel 884: }
1.185 daniel 885: xmlFree(rep);
1.135 daniel 886: }
887: }
888: } else {
1.176 daniel 889: COPY_BUF(l,buffer,nbchars,c);
890: str += l;
891: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 892: growBuffer(buffer);
893: }
894: }
1.176 daniel 895: c = CUR_SCHAR(str, l);
1.135 daniel 896: }
1.229 veillard 897: buffer[nbchars++] = 0;
898: return(buffer);
1.172 daniel 899: }
900:
1.229 veillard 901:
902: /************************************************************************
903: * *
904: * Commodity functions to handle encodings *
905: * *
906: ************************************************************************/
907:
1.75 daniel 908: /**
909: * xmlSwitchEncoding:
910: * @ctxt: the parser context
1.124 daniel 911: * @enc: the encoding value (number)
1.75 daniel 912: *
913: * change the input functions when discovering the character encoding
914: * of a given entity.
1.193 daniel 915: *
916: * Returns 0 in case of success, -1 otherwise
1.75 daniel 917: */
1.193 daniel 918: int
1.75 daniel 919: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
920: {
1.156 daniel 921: xmlCharEncodingHandlerPtr handler;
922:
1.193 daniel 923: switch (enc) {
924: case XML_CHAR_ENCODING_ERROR:
925: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
927: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
928: ctxt->wellFormed = 0;
929: ctxt->disableSAX = 1;
930: break;
931: case XML_CHAR_ENCODING_NONE:
932: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 933: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 934: return(0);
935: case XML_CHAR_ENCODING_UTF8:
936: /* default encoding, no conversion should be needed */
1.198 daniel 937: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 938: return(0);
939: default:
940: break;
941: }
1.156 daniel 942: handler = xmlGetCharEncodingHandler(enc);
1.193 daniel 943: if (handler == NULL) {
944: /*
945: * Default handlers.
946: */
947: switch (enc) {
948: case XML_CHAR_ENCODING_ERROR:
949: ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
951: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
952: ctxt->wellFormed = 0;
953: ctxt->disableSAX = 1;
1.198 daniel 954: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 955: break;
956: case XML_CHAR_ENCODING_NONE:
957: /* let's assume it's UTF-8 without the XML decl */
1.198 daniel 958: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 959: return(0);
960: case XML_CHAR_ENCODING_UTF8:
1.211 veillard 961: case XML_CHAR_ENCODING_ASCII:
1.193 daniel 962: /* default encoding, no conversion should be needed */
1.198 daniel 963: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 964: return(0);
965: case XML_CHAR_ENCODING_UTF16LE:
966: break;
967: case XML_CHAR_ENCODING_UTF16BE:
968: break;
969: case XML_CHAR_ENCODING_UCS4LE:
970: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
972: ctxt->sax->error(ctxt->userData,
973: "char encoding USC4 little endian not supported\n");
974: break;
975: case XML_CHAR_ENCODING_UCS4BE:
976: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
978: ctxt->sax->error(ctxt->userData,
979: "char encoding USC4 big endian not supported\n");
980: break;
981: case XML_CHAR_ENCODING_EBCDIC:
982: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
984: ctxt->sax->error(ctxt->userData,
985: "char encoding EBCDIC not supported\n");
986: break;
987: case XML_CHAR_ENCODING_UCS4_2143:
988: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
989: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
990: ctxt->sax->error(ctxt->userData,
991: "char encoding UCS4 2143 not supported\n");
992: break;
993: case XML_CHAR_ENCODING_UCS4_3412:
994: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
995: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
996: ctxt->sax->error(ctxt->userData,
997: "char encoding UCS4 3412 not supported\n");
998: break;
999: case XML_CHAR_ENCODING_UCS2:
1000: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1001: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1002: ctxt->sax->error(ctxt->userData,
1003: "char encoding UCS2 not supported\n");
1004: break;
1005: case XML_CHAR_ENCODING_8859_1:
1006: case XML_CHAR_ENCODING_8859_2:
1007: case XML_CHAR_ENCODING_8859_3:
1008: case XML_CHAR_ENCODING_8859_4:
1009: case XML_CHAR_ENCODING_8859_5:
1010: case XML_CHAR_ENCODING_8859_6:
1011: case XML_CHAR_ENCODING_8859_7:
1012: case XML_CHAR_ENCODING_8859_8:
1013: case XML_CHAR_ENCODING_8859_9:
1.195 daniel 1014: /*
1.203 veillard 1015: * We used to keep the internal content in the
1016: * document encoding however this turns being unmaintainable
1017: * So xmlGetCharEncodingHandler() will return non-null
1018: * values for this now.
1.195 daniel 1019: */
1020: if ((ctxt->inputNr == 1) &&
1021: (ctxt->encoding == NULL) &&
1022: (ctxt->input->encoding != NULL)) {
1023: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1024: }
1.198 daniel 1025: ctxt->charset = enc;
1.195 daniel 1026: return(0);
1.193 daniel 1027: case XML_CHAR_ENCODING_2022_JP:
1028: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1029: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1030: ctxt->sax->error(ctxt->userData,
1031: "char encoding ISO-2022-JPnot supported\n");
1032: break;
1033: case XML_CHAR_ENCODING_SHIFT_JIS:
1034: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1036: ctxt->sax->error(ctxt->userData,
1037: "char encoding Shift_JIS not supported\n");
1038: break;
1039: case XML_CHAR_ENCODING_EUC_JP:
1040: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1042: ctxt->sax->error(ctxt->userData,
1043: "char encoding EUC-JPnot supported\n");
1044: break;
1045: }
1046: }
1047: if (handler == NULL)
1048: return(-1);
1.198 daniel 1049: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 1050: return(xmlSwitchToEncoding(ctxt, handler));
1051: }
1052:
1053: /**
1054: * xmlSwitchToEncoding:
1055: * @ctxt: the parser context
1056: * @handler: the encoding handler
1057: *
1058: * change the input functions when discovering the character encoding
1059: * of a given entity.
1060: *
1061: * Returns 0 in case of success, -1 otherwise
1062: */
1063: int
1064: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1065: {
1.194 daniel 1066: int nbchars;
1067:
1.156 daniel 1068: if (handler != NULL) {
1069: if (ctxt->input != NULL) {
1070: if (ctxt->input->buf != NULL) {
1071: if (ctxt->input->buf->encoder != NULL) {
1.193 daniel 1072: if (ctxt->input->buf->encoder == handler)
1073: return(0);
1.197 daniel 1074: /*
1075: * Note: this is a bit dangerous, but that's what it
1076: * takes to use nearly compatible signature for different
1077: * encodings.
1078: */
1079: xmlCharEncCloseFunc(ctxt->input->buf->encoder);
1080: ctxt->input->buf->encoder = handler;
1081: return(0);
1.156 daniel 1082: }
1083: ctxt->input->buf->encoder = handler;
1084:
1085: /*
1.194 daniel 1086: * Is there already some content down the pipe to convert ?
1.156 daniel 1087: */
1088: if ((ctxt->input->buf->buffer != NULL) &&
1089: (ctxt->input->buf->buffer->use > 0)) {
1090: int processed;
1091:
1092: /*
1093: * Specific handling of the Byte Order Mark for
1094: * UTF-16
1095: */
1.195 daniel 1096: if ((handler->name != NULL) &&
1097: (!strcmp(handler->name, "UTF-16LE")) &&
1.156 daniel 1098: (ctxt->input->cur[0] == 0xFF) &&
1099: (ctxt->input->cur[1] == 0xFE)) {
1.194 daniel 1100: ctxt->input->cur += 2;
1.156 daniel 1101: }
1.195 daniel 1102: if ((handler->name != NULL) &&
1103: (!strcmp(handler->name, "UTF-16BE")) &&
1.156 daniel 1104: (ctxt->input->cur[0] == 0xFE) &&
1105: (ctxt->input->cur[1] == 0xFF)) {
1.194 daniel 1106: ctxt->input->cur += 2;
1.156 daniel 1107: }
1108:
1109: /*
1.194 daniel 1110: * Shring the current input buffer.
1111: * Move it as the raw buffer and create a new input buffer
1.156 daniel 1112: */
1113: processed = ctxt->input->cur - ctxt->input->base;
1.194 daniel 1114: xmlBufferShrink(ctxt->input->buf->buffer, processed);
1115: ctxt->input->buf->raw = ctxt->input->buf->buffer;
1116: ctxt->input->buf->buffer = xmlBufferCreate();
1117:
1.219 veillard 1118: if (ctxt->html) {
1119: /*
1120: * converst as much as possbile of the buffer
1121: */
1122: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1123: ctxt->input->buf->buffer,
1124: ctxt->input->buf->raw);
1125: } else {
1126: /*
1127: * convert just enough to get
1128: * '<?xml version="1.0" encoding="xxx"?>'
1129: * parsed with the autodetected encoding
1130: * into the parser reading buffer.
1131: */
1132: nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
1133: ctxt->input->buf->buffer,
1134: ctxt->input->buf->raw);
1135: }
1.194 daniel 1136: if (nbchars < 0) {
1137: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
1138: return(-1);
1.156 daniel 1139: }
1.194 daniel 1140: ctxt->input->base =
1141: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.217 veillard 1142:
1.156 daniel 1143: }
1.193 daniel 1144: return(0);
1.156 daniel 1145: } else {
1.209 veillard 1146: if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
1.156 daniel 1147: /*
1148: * When parsing a static memory array one must know the
1149: * size to be able to convert the buffer.
1150: */
1151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1152: ctxt->sax->error(ctxt->userData,
1153: "xmlSwitchEncoding : no input\n");
1.193 daniel 1154: return(-1);
1.156 daniel 1155: } else {
1.194 daniel 1156: int processed;
1157:
1158: /*
1159: * Shring the current input buffer.
1160: * Move it as the raw buffer and create a new input buffer
1161: */
1162: processed = ctxt->input->cur - ctxt->input->base;
1.209 veillard 1163:
1.194 daniel 1164: ctxt->input->buf->raw = xmlBufferCreate();
1165: xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
1.209 veillard 1166: ctxt->input->length - processed);
1.194 daniel 1167: ctxt->input->buf->buffer = xmlBufferCreate();
1.156 daniel 1168:
1169: /*
1.194 daniel 1170: * convert as much as possible of the raw input
1171: * to the parser reading buffer.
1172: */
1173: nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
1174: ctxt->input->buf->buffer,
1175: ctxt->input->buf->raw);
1176: if (nbchars < 0) {
1177: fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
1178: return(-1);
1.156 daniel 1179: }
1.194 daniel 1180:
1.156 daniel 1181: /*
1182: * Conversion succeeded, get rid of the old buffer
1183: */
1184: if ((ctxt->input->free != NULL) &&
1185: (ctxt->input->base != NULL))
1186: ctxt->input->free((xmlChar *) ctxt->input->base);
1.194 daniel 1187: ctxt->input->base =
1188: ctxt->input->cur = ctxt->input->buf->buffer->content;
1.156 daniel 1189: }
1190: }
1191: } else {
1192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1193: ctxt->sax->error(ctxt->userData,
1194: "xmlSwitchEncoding : no input\n");
1.193 daniel 1195: return(-1);
1.156 daniel 1196: }
1.195 daniel 1197: /*
1198: * The parsing is now done in UTF8 natively
1199: */
1.198 daniel 1200: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1.193 daniel 1201: } else
1202: return(-1);
1203: return(0);
1.156 daniel 1204:
1.75 daniel 1205: }
1206:
1207: /************************************************************************
1208: * *
1.123 daniel 1209: * Commodity functions to handle xmlChars *
1.28 daniel 1210: * *
1211: ************************************************************************/
1212:
1.50 daniel 1213: /**
1214: * xmlStrndup:
1.123 daniel 1215: * @cur: the input xmlChar *
1.50 daniel 1216: * @len: the len of @cur
1217: *
1.123 daniel 1218: * a strndup for array of xmlChar's
1.68 daniel 1219: *
1.123 daniel 1220: * Returns a new xmlChar * or NULL
1.1 veillard 1221: */
1.123 daniel 1222: xmlChar *
1223: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 1224: xmlChar *ret;
1225:
1226: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 1227: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 1228: if (ret == NULL) {
1.86 daniel 1229: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 1230: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 1231: return(NULL);
1232: }
1.123 daniel 1233: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 1234: ret[len] = 0;
1235: return(ret);
1236: }
1237:
1.50 daniel 1238: /**
1239: * xmlStrdup:
1.123 daniel 1240: * @cur: the input xmlChar *
1.50 daniel 1241: *
1.152 daniel 1242: * a strdup for array of xmlChar's. Since they are supposed to be
1243: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1244: * a termination mark of '0'.
1.68 daniel 1245: *
1.123 daniel 1246: * Returns a new xmlChar * or NULL
1.1 veillard 1247: */
1.123 daniel 1248: xmlChar *
1249: xmlStrdup(const xmlChar *cur) {
1250: const xmlChar *p = cur;
1.1 veillard 1251:
1.135 daniel 1252: if (cur == NULL) return(NULL);
1.222 veillard 1253: while (*p != 0) p++; /* non input consuming */
1.1 veillard 1254: return(xmlStrndup(cur, p - cur));
1255: }
1256:
1.50 daniel 1257: /**
1258: * xmlCharStrndup:
1259: * @cur: the input char *
1260: * @len: the len of @cur
1261: *
1.123 daniel 1262: * a strndup for char's to xmlChar's
1.68 daniel 1263: *
1.123 daniel 1264: * Returns a new xmlChar * or NULL
1.45 daniel 1265: */
1266:
1.123 daniel 1267: xmlChar *
1.55 daniel 1268: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1269: int i;
1.135 daniel 1270: xmlChar *ret;
1271:
1272: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 1273: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 1274: if (ret == NULL) {
1.86 daniel 1275: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 1276: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1277: return(NULL);
1278: }
1279: for (i = 0;i < len;i++)
1.123 daniel 1280: ret[i] = (xmlChar) cur[i];
1.45 daniel 1281: ret[len] = 0;
1282: return(ret);
1283: }
1284:
1.50 daniel 1285: /**
1286: * xmlCharStrdup:
1287: * @cur: the input char *
1288: * @len: the len of @cur
1289: *
1.123 daniel 1290: * a strdup for char's to xmlChar's
1.68 daniel 1291: *
1.123 daniel 1292: * Returns a new xmlChar * or NULL
1.45 daniel 1293: */
1294:
1.123 daniel 1295: xmlChar *
1.55 daniel 1296: xmlCharStrdup(const char *cur) {
1.45 daniel 1297: const char *p = cur;
1298:
1.135 daniel 1299: if (cur == NULL) return(NULL);
1.222 veillard 1300: while (*p != '\0') p++; /* non input consuming */
1.45 daniel 1301: return(xmlCharStrndup(cur, p - cur));
1302: }
1303:
1.50 daniel 1304: /**
1305: * xmlStrcmp:
1.123 daniel 1306: * @str1: the first xmlChar *
1307: * @str2: the second xmlChar *
1.50 daniel 1308: *
1.123 daniel 1309: * a strcmp for xmlChar's
1.68 daniel 1310: *
1311: * Returns the integer result of the comparison
1.14 veillard 1312: */
1313:
1.55 daniel 1314: int
1.123 daniel 1315: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1316: register int tmp;
1317:
1.229 veillard 1318: if (str1 == str2) return(0);
1.135 daniel 1319: if (str1 == NULL) return(-1);
1320: if (str2 == NULL) return(1);
1.14 veillard 1321: do {
1322: tmp = *str1++ - *str2++;
1323: if (tmp != 0) return(tmp);
1.222 veillard 1324: } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */
1.14 veillard 1325: return (*str1 - *str2);
1326: }
1327:
1.50 daniel 1328: /**
1329: * xmlStrncmp:
1.123 daniel 1330: * @str1: the first xmlChar *
1331: * @str2: the second xmlChar *
1.50 daniel 1332: * @len: the max comparison length
1333: *
1.123 daniel 1334: * a strncmp for xmlChar's
1.68 daniel 1335: *
1336: * Returns the integer result of the comparison
1.14 veillard 1337: */
1338:
1.55 daniel 1339: int
1.123 daniel 1340: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1341: register int tmp;
1342:
1343: if (len <= 0) return(0);
1.135 daniel 1344: if ((str1 == NULL) && (str2 == NULL)) return(0);
1345: if (str1 == NULL) return(-1);
1346: if (str2 == NULL) return(1);
1.14 veillard 1347: do {
1348: tmp = *str1++ - *str2++;
1349: if (tmp != 0) return(tmp);
1350: len--;
1351: if (len <= 0) return(0);
1.222 veillard 1352: } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */
1.14 veillard 1353: return (*str1 - *str2);
1354: }
1355:
1.50 daniel 1356: /**
1357: * xmlStrchr:
1.123 daniel 1358: * @str: the xmlChar * array
1359: * @val: the xmlChar to search
1.50 daniel 1360: *
1.123 daniel 1361: * a strchr for xmlChar's
1.68 daniel 1362: *
1.123 daniel 1363: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1364: */
1365:
1.123 daniel 1366: const xmlChar *
1367: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 1368: if (str == NULL) return(NULL);
1.222 veillard 1369: while (*str != 0) { /* non input consuming */
1.123 daniel 1370: if (*str == val) return((xmlChar *) str);
1.14 veillard 1371: str++;
1372: }
1373: return(NULL);
1.89 daniel 1374: }
1375:
1376: /**
1377: * xmlStrstr:
1.123 daniel 1378: * @str: the xmlChar * array (haystack)
1379: * @val: the xmlChar to search (needle)
1.89 daniel 1380: *
1.123 daniel 1381: * a strstr for xmlChar's
1.89 daniel 1382: *
1.123 daniel 1383: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1384: */
1385:
1.123 daniel 1386: const xmlChar *
1387: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1388: int n;
1389:
1390: if (str == NULL) return(NULL);
1391: if (val == NULL) return(NULL);
1392: n = xmlStrlen(val);
1393:
1394: if (n == 0) return(str);
1.222 veillard 1395: while (*str != 0) { /* non input consuming */
1.89 daniel 1396: if (*str == *val) {
1.123 daniel 1397: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1398: }
1399: str++;
1400: }
1401: return(NULL);
1402: }
1403:
1404: /**
1405: * xmlStrsub:
1.123 daniel 1406: * @str: the xmlChar * array (haystack)
1.89 daniel 1407: * @start: the index of the first char (zero based)
1408: * @len: the length of the substring
1409: *
1410: * Extract a substring of a given string
1411: *
1.123 daniel 1412: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1413: */
1414:
1.123 daniel 1415: xmlChar *
1416: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1417: int i;
1418:
1419: if (str == NULL) return(NULL);
1420: if (start < 0) return(NULL);
1.90 daniel 1421: if (len < 0) return(NULL);
1.89 daniel 1422:
1423: for (i = 0;i < start;i++) {
1424: if (*str == 0) return(NULL);
1425: str++;
1426: }
1427: if (*str == 0) return(NULL);
1428: return(xmlStrndup(str, len));
1.14 veillard 1429: }
1.28 daniel 1430:
1.50 daniel 1431: /**
1432: * xmlStrlen:
1.123 daniel 1433: * @str: the xmlChar * array
1.50 daniel 1434: *
1.127 daniel 1435: * length of a xmlChar's string
1.68 daniel 1436: *
1.123 daniel 1437: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1438: */
1439:
1.55 daniel 1440: int
1.123 daniel 1441: xmlStrlen(const xmlChar *str) {
1.45 daniel 1442: int len = 0;
1443:
1444: if (str == NULL) return(0);
1.222 veillard 1445: while (*str != 0) { /* non input consuming */
1.45 daniel 1446: str++;
1447: len++;
1448: }
1449: return(len);
1450: }
1451:
1.50 daniel 1452: /**
1453: * xmlStrncat:
1.123 daniel 1454: * @cur: the original xmlChar * array
1455: * @add: the xmlChar * array added
1.50 daniel 1456: * @len: the length of @add
1457: *
1.123 daniel 1458: * a strncat for array of xmlChar's
1.68 daniel 1459: *
1.123 daniel 1460: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1461: */
1462:
1.123 daniel 1463: xmlChar *
1464: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1465: int size;
1.123 daniel 1466: xmlChar *ret;
1.45 daniel 1467:
1468: if ((add == NULL) || (len == 0))
1469: return(cur);
1470: if (cur == NULL)
1471: return(xmlStrndup(add, len));
1472:
1473: size = xmlStrlen(cur);
1.204 veillard 1474: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1475: if (ret == NULL) {
1.86 daniel 1476: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1477: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1478: return(cur);
1479: }
1.123 daniel 1480: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1481: ret[size + len] = 0;
1482: return(ret);
1483: }
1484:
1.50 daniel 1485: /**
1486: * xmlStrcat:
1.123 daniel 1487: * @cur: the original xmlChar * array
1488: * @add: the xmlChar * array added
1.50 daniel 1489: *
1.152 daniel 1490: * a strcat for array of xmlChar's. Since they are supposed to be
1491: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1492: * a termination mark of '0'.
1.68 daniel 1493: *
1.123 daniel 1494: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1495: */
1.123 daniel 1496: xmlChar *
1497: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1498: const xmlChar *p = add;
1.45 daniel 1499:
1500: if (add == NULL) return(cur);
1501: if (cur == NULL)
1502: return(xmlStrdup(add));
1503:
1.222 veillard 1504: while (*p != 0) p++; /* non input consuming */
1.45 daniel 1505: return(xmlStrncat(cur, add, p - add));
1506: }
1507:
1508: /************************************************************************
1509: * *
1510: * Commodity functions, cleanup needed ? *
1511: * *
1512: ************************************************************************/
1513:
1.50 daniel 1514: /**
1515: * areBlanks:
1516: * @ctxt: an XML parser context
1.123 daniel 1517: * @str: a xmlChar *
1.50 daniel 1518: * @len: the size of @str
1519: *
1.45 daniel 1520: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1521: *
1.68 daniel 1522: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1523: */
1524:
1.123 daniel 1525: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1526: int i, ret;
1.45 daniel 1527: xmlNodePtr lastChild;
1528:
1.176 daniel 1529: /*
1530: * Check for xml:space value.
1531: */
1532: if (*(ctxt->space) == 1)
1533: return(0);
1534:
1535: /*
1536: * Check that the string is made of blanks
1537: */
1.45 daniel 1538: for (i = 0;i < len;i++)
1539: if (!(IS_BLANK(str[i]))) return(0);
1540:
1.176 daniel 1541: /*
1542: * Look if the element is mixed content in the Dtd if available
1543: */
1.104 daniel 1544: if (ctxt->myDoc != NULL) {
1545: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1546: if (ret == 0) return(1);
1547: if (ret == 1) return(0);
1548: }
1.176 daniel 1549:
1.104 daniel 1550: /*
1.176 daniel 1551: * Otherwise, heuristic :-\
1.104 daniel 1552: */
1.179 daniel 1553: if (ctxt->keepBlanks)
1554: return(0);
1555: if (RAW != '<') return(0);
1556: if (ctxt->node == NULL) return(0);
1557: if ((ctxt->node->children == NULL) &&
1558: (RAW == '<') && (NXT(1) == '/')) return(0);
1559:
1.45 daniel 1560: lastChild = xmlGetLastChild(ctxt->node);
1561: if (lastChild == NULL) {
1562: if (ctxt->node->content != NULL) return(0);
1563: } else if (xmlNodeIsText(lastChild))
1564: return(0);
1.157 daniel 1565: else if ((ctxt->node->children != NULL) &&
1566: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 1567: return(0);
1.45 daniel 1568: return(1);
1569: }
1570:
1571: /*
1572: * Forward definition for recusive behaviour.
1573: */
1.77 daniel 1574: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1575: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1576:
1.28 daniel 1577: /************************************************************************
1578: * *
1579: * Extra stuff for namespace support *
1580: * Relates to http://www.w3.org/TR/WD-xml-names *
1581: * *
1582: ************************************************************************/
1583:
1.50 daniel 1584: /**
1.72 daniel 1585: * xmlSplitQName:
1.162 daniel 1586: * @ctxt: an XML parser context
1.72 daniel 1587: * @name: an XML parser context
1.123 daniel 1588: * @prefix: a xmlChar **
1.72 daniel 1589: *
1.206 veillard 1590: * parse an UTF8 encoded XML qualified name string
1.72 daniel 1591: *
1592: * [NS 5] QName ::= (Prefix ':')? LocalPart
1593: *
1594: * [NS 6] Prefix ::= NCName
1595: *
1596: * [NS 7] LocalPart ::= NCName
1597: *
1.127 daniel 1598: * Returns the local part, and prefix is updated
1.72 daniel 1599: * to get the Prefix if any.
1600: */
1601:
1.123 daniel 1602: xmlChar *
1.162 daniel 1603: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1604: xmlChar buf[XML_MAX_NAMELEN + 5];
1.222 veillard 1605: xmlChar *buffer = NULL;
1.162 daniel 1606: int len = 0;
1.222 veillard 1607: int max = XML_MAX_NAMELEN;
1.123 daniel 1608: xmlChar *ret = NULL;
1609: const xmlChar *cur = name;
1.206 veillard 1610: int c;
1.72 daniel 1611:
1612: *prefix = NULL;
1.113 daniel 1613:
1614: /* xml: prefix is not really a namespace */
1615: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1616: (cur[2] == 'l') && (cur[3] == ':'))
1617: return(xmlStrdup(name));
1618:
1.162 daniel 1619: /* nasty but valid */
1620: if (cur[0] == ':')
1621: return(xmlStrdup(name));
1622:
1.206 veillard 1623: c = *cur++;
1.222 veillard 1624: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1.206 veillard 1625: buf[len++] = c;
1626: c = *cur++;
1.162 daniel 1627: }
1.222 veillard 1628: if (len >= max) {
1629: /*
1630: * Okay someone managed to make a huge name, so he's ready to pay
1631: * for the processing speed.
1632: */
1633: max = len * 2;
1634:
1635: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1636: if (buffer == NULL) {
1637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1638: ctxt->sax->error(ctxt->userData,
1639: "xmlSplitQName: out of memory\n");
1640: return(NULL);
1641: }
1642: memcpy(buffer, buf, len);
1643: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1644: if (len + 10 > max) {
1645: max *= 2;
1646: buffer = (xmlChar *) xmlRealloc(buffer,
1647: max * sizeof(xmlChar));
1648: if (buffer == NULL) {
1649: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1650: ctxt->sax->error(ctxt->userData,
1651: "xmlSplitQName: out of memory\n");
1652: return(NULL);
1653: }
1654: }
1655: buffer[len++] = c;
1656: c = *cur++;
1657: }
1658: buffer[len] = 0;
1659: }
1.72 daniel 1660:
1.222 veillard 1661: if (buffer == NULL)
1662: ret = xmlStrndup(buf, len);
1663: else {
1664: ret = buffer;
1665: buffer = NULL;
1666: max = XML_MAX_NAMELEN;
1667: }
1668:
1.72 daniel 1669:
1.162 daniel 1670: if (c == ':') {
1.206 veillard 1671: c = *cur++;
1672: if (c == 0) return(ret);
1.72 daniel 1673: *prefix = ret;
1.162 daniel 1674: len = 0;
1.72 daniel 1675:
1.222 veillard 1676: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1.206 veillard 1677: buf[len++] = c;
1678: c = *cur++;
1.162 daniel 1679: }
1.222 veillard 1680: if (len >= max) {
1681: /*
1682: * Okay someone managed to make a huge name, so he's ready to pay
1683: * for the processing speed.
1684: */
1.229 veillard 1685: max = len * 2;
1686:
1687: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1688: if (buffer == NULL) {
1.55 daniel 1689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 1690: ctxt->sax->error(ctxt->userData,
1.229 veillard 1691: "xmlSplitQName: out of memory\n");
1692: return(NULL);
1693: }
1694: memcpy(buffer, buf, len);
1695: while (c != 0) { /* tested bigname2.xml */
1696: if (len + 10 > max) {
1697: max *= 2;
1698: buffer = (xmlChar *) xmlRealloc(buffer,
1699: max * sizeof(xmlChar));
1700: if (buffer == NULL) {
1701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1702: ctxt->sax->error(ctxt->userData,
1703: "xmlSplitQName: out of memory\n");
1704: return(NULL);
1705: }
1706: }
1707: buffer[len++] = c;
1708: c = *cur++;
1.122 daniel 1709: }
1.229 veillard 1710: buffer[len] = 0;
1711: }
1712:
1713: if (buffer == NULL)
1714: ret = xmlStrndup(buf, len);
1715: else {
1716: ret = buffer;
1717: }
1.45 daniel 1718: }
1719:
1.229 veillard 1720: return(ret);
1.45 daniel 1721: }
1722:
1.28 daniel 1723: /************************************************************************
1724: * *
1725: * The parser itself *
1726: * Relates to http://www.w3.org/TR/REC-xml *
1727: * *
1728: ************************************************************************/
1.14 veillard 1729:
1.50 daniel 1730: /**
1731: * xmlParseName:
1732: * @ctxt: an XML parser context
1733: *
1734: * parse an XML name.
1.22 daniel 1735: *
1736: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1737: * CombiningChar | Extender
1738: *
1739: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1740: *
1741: * [6] Names ::= Name (S Name)*
1.68 daniel 1742: *
1743: * Returns the Name parsed or NULL
1.1 veillard 1744: */
1745:
1.123 daniel 1746: xmlChar *
1.55 daniel 1747: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 1748: xmlChar buf[XML_MAX_NAMELEN + 5];
1749: int len = 0, l;
1750: int c;
1.222 veillard 1751: int count = 0;
1.1 veillard 1752:
1.91 daniel 1753: GROW;
1.160 daniel 1754: c = CUR_CHAR(l);
1.190 daniel 1755: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1756: (!IS_LETTER(c) && (c != '_') &&
1757: (c != ':'))) {
1.91 daniel 1758: return(NULL);
1759: }
1.40 daniel 1760:
1.222 veillard 1761: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1.190 daniel 1762: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1763: (c == '.') || (c == '-') ||
1764: (c == '_') || (c == ':') ||
1765: (IS_COMBINING(c)) ||
1766: (IS_EXTENDER(c)))) {
1.222 veillard 1767: if (count++ > 100) {
1768: count = 0;
1769: GROW;
1770: }
1.160 daniel 1771: COPY_BUF(l,buf,len,c);
1772: NEXTL(l);
1773: c = CUR_CHAR(l);
1.91 daniel 1774: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1775: /*
1776: * Okay someone managed to make a huge name, so he's ready to pay
1777: * for the processing speed.
1778: */
1779: xmlChar *buffer;
1780: int max = len * 2;
1781:
1782: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1783: if (buffer == NULL) {
1784: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1785: ctxt->sax->error(ctxt->userData,
1786: "xmlParseName: out of memory\n");
1787: return(NULL);
1788: }
1789: memcpy(buffer, buf, len);
1790: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1.160 daniel 1791: (c == '.') || (c == '-') ||
1792: (c == '_') || (c == ':') ||
1793: (IS_COMBINING(c)) ||
1794: (IS_EXTENDER(c))) {
1.222 veillard 1795: if (count++ > 100) {
1796: count = 0;
1797: GROW;
1798: }
1799: if (len + 10 > max) {
1800: max *= 2;
1801: buffer = (xmlChar *) xmlRealloc(buffer,
1802: max * sizeof(xmlChar));
1803: if (buffer == NULL) {
1804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1805: ctxt->sax->error(ctxt->userData,
1806: "xmlParseName: out of memory\n");
1807: return(NULL);
1808: }
1809: }
1810: COPY_BUF(l,buffer,len,c);
1.160 daniel 1811: NEXTL(l);
1812: c = CUR_CHAR(l);
1.97 daniel 1813: }
1.222 veillard 1814: buffer[len] = 0;
1815: return(buffer);
1.91 daniel 1816: }
1817: }
1818: return(xmlStrndup(buf, len));
1.22 daniel 1819: }
1820:
1.50 daniel 1821: /**
1.135 daniel 1822: * xmlParseStringName:
1823: * @ctxt: an XML parser context
1.229 veillard 1824: * @str: a pointer to the string pointer (IN/OUT)
1.135 daniel 1825: *
1826: * parse an XML name.
1827: *
1828: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1829: * CombiningChar | Extender
1830: *
1831: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1832: *
1833: * [6] Names ::= Name (S Name)*
1834: *
1835: * Returns the Name parsed or NULL. The str pointer
1836: * is updated to the current location in the string.
1837: */
1838:
1839: xmlChar *
1840: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 1841: xmlChar buf[XML_MAX_NAMELEN + 5];
1842: const xmlChar *cur = *str;
1843: int len = 0, l;
1844: int c;
1.135 daniel 1845:
1.176 daniel 1846: c = CUR_SCHAR(cur, l);
1847: if (!IS_LETTER(c) && (c != '_') &&
1848: (c != ':')) {
1.135 daniel 1849: return(NULL);
1850: }
1851:
1.222 veillard 1852: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1853: (c == '.') || (c == '-') ||
1854: (c == '_') || (c == ':') ||
1855: (IS_COMBINING(c)) ||
1856: (IS_EXTENDER(c))) {
1857: COPY_BUF(l,buf,len,c);
1858: cur += l;
1859: c = CUR_SCHAR(cur, l);
1.222 veillard 1860: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1861: /*
1862: * Okay someone managed to make a huge name, so he's ready to pay
1863: * for the processing speed.
1864: */
1865: xmlChar *buffer;
1866: int max = len * 2;
1867:
1868: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1869: if (buffer == NULL) {
1870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1871: ctxt->sax->error(ctxt->userData,
1872: "xmlParseStringName: out of memory\n");
1873: return(NULL);
1874: }
1875: memcpy(buffer, buf, len);
1876: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1877: (c == '.') || (c == '-') ||
1878: (c == '_') || (c == ':') ||
1879: (IS_COMBINING(c)) ||
1880: (IS_EXTENDER(c))) {
1.222 veillard 1881: if (len + 10 > max) {
1882: max *= 2;
1883: buffer = (xmlChar *) xmlRealloc(buffer,
1884: max * sizeof(xmlChar));
1885: if (buffer == NULL) {
1886: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1887: ctxt->sax->error(ctxt->userData,
1888: "xmlParseStringName: out of memory\n");
1889: return(NULL);
1890: }
1891: }
1892: COPY_BUF(l,buffer,len,c);
1.176 daniel 1893: cur += l;
1894: c = CUR_SCHAR(cur, l);
1895: }
1.222 veillard 1896: buffer[len] = 0;
1897: *str = cur;
1898: return(buffer);
1.176 daniel 1899: }
1.135 daniel 1900: }
1.176 daniel 1901: *str = cur;
1902: return(xmlStrndup(buf, len));
1.135 daniel 1903: }
1904:
1905: /**
1.50 daniel 1906: * xmlParseNmtoken:
1907: * @ctxt: an XML parser context
1908: *
1909: * parse an XML Nmtoken.
1.22 daniel 1910: *
1911: * [7] Nmtoken ::= (NameChar)+
1912: *
1913: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1914: *
1915: * Returns the Nmtoken parsed or NULL
1.22 daniel 1916: */
1917:
1.123 daniel 1918: xmlChar *
1.55 daniel 1919: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.222 veillard 1920: xmlChar buf[XML_MAX_NAMELEN + 5];
1921: int len = 0, l;
1922: int c;
1923: int count = 0;
1.22 daniel 1924:
1.91 daniel 1925: GROW;
1.160 daniel 1926: c = CUR_CHAR(l);
1.222 veillard 1927:
1928: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1929: (c == '.') || (c == '-') ||
1930: (c == '_') || (c == ':') ||
1931: (IS_COMBINING(c)) ||
1932: (IS_EXTENDER(c))) {
1.222 veillard 1933: if (count++ > 100) {
1934: count = 0;
1935: GROW;
1936: }
1.160 daniel 1937: COPY_BUF(l,buf,len,c);
1938: NEXTL(l);
1939: c = CUR_CHAR(l);
1.91 daniel 1940: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1941: /*
1942: * Okay someone managed to make a huge token, so he's ready to pay
1943: * for the processing speed.
1944: */
1945: xmlChar *buffer;
1946: int max = len * 2;
1947:
1948: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1949: if (buffer == NULL) {
1950: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1951: ctxt->sax->error(ctxt->userData,
1952: "xmlParseNmtoken: out of memory\n");
1953: return(NULL);
1954: }
1955: memcpy(buffer, buf, len);
1956: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1957: (c == '.') || (c == '-') ||
1958: (c == '_') || (c == ':') ||
1959: (IS_COMBINING(c)) ||
1960: (IS_EXTENDER(c))) {
1.222 veillard 1961: if (count++ > 100) {
1962: count = 0;
1963: GROW;
1964: }
1965: if (len + 10 > max) {
1966: max *= 2;
1967: buffer = (xmlChar *) xmlRealloc(buffer,
1968: max * sizeof(xmlChar));
1969: if (buffer == NULL) {
1970: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1971: ctxt->sax->error(ctxt->userData,
1972: "xmlParseName: out of memory\n");
1973: return(NULL);
1974: }
1975: }
1976: COPY_BUF(l,buffer,len,c);
1.160 daniel 1977: NEXTL(l);
1978: c = CUR_CHAR(l);
1979: }
1.222 veillard 1980: buffer[len] = 0;
1981: return(buffer);
1.91 daniel 1982: }
1983: }
1.168 daniel 1984: if (len == 0)
1985: return(NULL);
1.91 daniel 1986: return(xmlStrndup(buf, len));
1.1 veillard 1987: }
1988:
1.50 daniel 1989: /**
1990: * xmlParseEntityValue:
1991: * @ctxt: an XML parser context
1.78 daniel 1992: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1993: *
1.229 veillard 1994: * parse a value for ENTITY declarations
1.24 daniel 1995: *
1996: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1997: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1998: *
1.78 daniel 1999: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2000: */
2001:
1.123 daniel 2002: xmlChar *
2003: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 2004: xmlChar *buf = NULL;
2005: int len = 0;
1.140 daniel 2006: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2007: int c, l;
1.135 daniel 2008: xmlChar stop;
1.123 daniel 2009: xmlChar *ret = NULL;
1.176 daniel 2010: const xmlChar *cur = NULL;
1.98 daniel 2011: xmlParserInputPtr input;
1.24 daniel 2012:
1.152 daniel 2013: if (RAW == '"') stop = '"';
2014: else if (RAW == '\'') stop = '\'';
1.135 daniel 2015: else {
2016: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2017: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2018: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2019: ctxt->wellFormed = 0;
1.180 daniel 2020: ctxt->disableSAX = 1;
1.135 daniel 2021: return(NULL);
2022: }
2023: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2024: if (buf == NULL) {
2025: fprintf(stderr, "malloc of %d byte failed\n", size);
2026: return(NULL);
2027: }
1.94 daniel 2028:
1.135 daniel 2029: /*
2030: * The content of the entity definition is copied in a buffer.
2031: */
1.94 daniel 2032:
1.135 daniel 2033: ctxt->instate = XML_PARSER_ENTITY_VALUE;
2034: input = ctxt->input;
2035: GROW;
2036: NEXT;
1.152 daniel 2037: c = CUR_CHAR(l);
1.135 daniel 2038: /*
2039: * NOTE: 4.4.5 Included in Literal
2040: * When a parameter entity reference appears in a literal entity
2041: * value, ... a single or double quote character in the replacement
2042: * text is always treated as a normal data character and will not
2043: * terminate the literal.
2044: * In practice it means we stop the loop only when back at parsing
2045: * the initial entity and the quote is found
2046: */
1.222 veillard 2047: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2048: (ctxt->input != input))) {
1.152 daniel 2049: if (len + 5 >= size) {
1.135 daniel 2050: size *= 2;
1.204 veillard 2051: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2052: if (buf == NULL) {
2053: fprintf(stderr, "realloc of %d byte failed\n", size);
2054: return(NULL);
1.94 daniel 2055: }
1.79 daniel 2056: }
1.152 daniel 2057: COPY_BUF(l,buf,len,c);
2058: NEXTL(l);
1.98 daniel 2059: /*
1.135 daniel 2060: * Pop-up of finished entities.
1.98 daniel 2061: */
1.222 veillard 2062: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1.135 daniel 2063: xmlPopInput(ctxt);
1.152 daniel 2064:
1.221 veillard 2065: GROW;
1.152 daniel 2066: c = CUR_CHAR(l);
1.135 daniel 2067: if (c == 0) {
1.94 daniel 2068: GROW;
1.152 daniel 2069: c = CUR_CHAR(l);
1.79 daniel 2070: }
1.135 daniel 2071: }
2072: buf[len] = 0;
2073:
2074: /*
1.176 daniel 2075: * Raise problem w.r.t. '&' and '%' being used in non-entities
2076: * reference constructs. Note Charref will be handled in
2077: * xmlStringDecodeEntities()
2078: */
2079: cur = buf;
1.223 veillard 2080: while (*cur != 0) { /* non input consuming */
1.176 daniel 2081: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2082: xmlChar *name;
2083: xmlChar tmp = *cur;
2084:
2085: cur++;
2086: name = xmlParseStringName(ctxt, &cur);
2087: if ((name == NULL) || (*cur != ';')) {
1.230 ! veillard 2088: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1.176 daniel 2089: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2090: ctxt->sax->error(ctxt->userData,
2091: "EntityValue: '%c' forbidden except for entities references\n",
2092: tmp);
2093: ctxt->wellFormed = 0;
1.180 daniel 2094: ctxt->disableSAX = 1;
1.176 daniel 2095: }
2096: if ((ctxt->inSubset == 1) && (tmp == '%')) {
1.230 ! veillard 2097: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1.176 daniel 2098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2099: ctxt->sax->error(ctxt->userData,
2100: "EntityValue: PEReferences forbidden in internal subset\n",
2101: tmp);
2102: ctxt->wellFormed = 0;
1.180 daniel 2103: ctxt->disableSAX = 1;
1.176 daniel 2104: }
2105: if (name != NULL)
2106: xmlFree(name);
2107: }
2108: cur++;
2109: }
2110:
2111: /*
1.135 daniel 2112: * Then PEReference entities are substituted.
2113: */
2114: if (c != stop) {
2115: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 2116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 2117: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2118: ctxt->wellFormed = 0;
1.180 daniel 2119: ctxt->disableSAX = 1;
1.170 daniel 2120: xmlFree(buf);
1.135 daniel 2121: } else {
2122: NEXT;
2123: /*
2124: * NOTE: 4.4.7 Bypassed
2125: * When a general entity reference appears in the EntityValue in
2126: * an entity declaration, it is bypassed and left as is.
1.176 daniel 2127: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 2128: */
2129: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2130: 0, 0, 0);
2131: if (orig != NULL)
2132: *orig = buf;
2133: else
2134: xmlFree(buf);
1.24 daniel 2135: }
2136:
2137: return(ret);
2138: }
2139:
1.50 daniel 2140: /**
2141: * xmlParseAttValue:
2142: * @ctxt: an XML parser context
2143: *
2144: * parse a value for an attribute
1.78 daniel 2145: * Note: the parser won't do substitution of entities here, this
1.113 daniel 2146: * will be handled later in xmlStringGetNodeList
1.29 daniel 2147: *
2148: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2149: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2150: *
1.129 daniel 2151: * 3.3.3 Attribute-Value Normalization:
2152: * Before the value of an attribute is passed to the application or
2153: * checked for validity, the XML processor must normalize it as follows:
2154: * - a character reference is processed by appending the referenced
2155: * character to the attribute value
2156: * - an entity reference is processed by recursively processing the
2157: * replacement text of the entity
2158: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2159: * appending #x20 to the normalized value, except that only a single
2160: * #x20 is appended for a "#xD#xA" sequence that is part of an external
2161: * parsed entity or the literal entity value of an internal parsed entity
2162: * - other characters are processed by appending them to the normalized value
1.130 daniel 2163: * If the declared value is not CDATA, then the XML processor must further
2164: * process the normalized attribute value by discarding any leading and
2165: * trailing space (#x20) characters, and by replacing sequences of space
2166: * (#x20) characters by a single space (#x20) character.
2167: * All attributes for which no declaration has been read should be treated
2168: * by a non-validating parser as if declared CDATA.
1.129 daniel 2169: *
2170: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 2171: */
2172:
1.123 daniel 2173: xmlChar *
1.55 daniel 2174: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 2175: xmlChar limit = 0;
1.198 daniel 2176: xmlChar *buf = NULL;
2177: int len = 0;
2178: int buf_size = 0;
2179: int c, l;
1.129 daniel 2180: xmlChar *current = NULL;
2181: xmlEntityPtr ent;
2182:
1.29 daniel 2183:
1.91 daniel 2184: SHRINK;
1.151 daniel 2185: if (NXT(0) == '"') {
1.96 daniel 2186: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 2187: limit = '"';
1.40 daniel 2188: NEXT;
1.151 daniel 2189: } else if (NXT(0) == '\'') {
1.129 daniel 2190: limit = '\'';
1.96 daniel 2191: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2192: NEXT;
1.29 daniel 2193: } else {
1.123 daniel 2194: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2196: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2197: ctxt->wellFormed = 0;
1.180 daniel 2198: ctxt->disableSAX = 1;
1.129 daniel 2199: return(NULL);
1.29 daniel 2200: }
2201:
1.129 daniel 2202: /*
2203: * allocate a translation buffer.
2204: */
1.198 daniel 2205: buf_size = XML_PARSER_BUFFER_SIZE;
2206: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2207: if (buf == NULL) {
1.129 daniel 2208: perror("xmlParseAttValue: malloc failed");
2209: return(NULL);
2210: }
2211:
2212: /*
2213: * Ok loop until we reach one of the ending char or a size limit.
2214: */
1.198 daniel 2215: c = CUR_CHAR(l);
1.223 veillard 2216: while (((NXT(0) != limit) && /* checked */
2217: (c != '<')) || (ctxt->token != 0)) {
1.198 daniel 2218: if (c == 0) break;
1.205 veillard 2219: if (ctxt->token == '&') {
1.229 veillard 2220: /*
2221: * The reparsing will be done in xmlStringGetNodeList()
2222: * called by the attribute() function in SAX.c
2223: */
1.205 veillard 2224: static xmlChar buffer[6] = "&";
2225:
2226: if (len > buf_size - 10) {
2227: growBuffer(buf);
2228: }
2229: current = &buffer[0];
1.223 veillard 2230: while (*current != 0) { /* non input consuming */
1.205 veillard 2231: buf[len++] = *current++;
2232: }
2233: ctxt->token = 0;
2234: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 2235: int val = xmlParseCharRef(ctxt);
1.229 veillard 2236: if (val == '&') {
2237: /*
2238: * The reparsing will be done in xmlStringGetNodeList()
2239: * called by the attribute() function in SAX.c
2240: */
2241: static xmlChar buffer[6] = "&";
2242:
2243: if (len > buf_size - 10) {
2244: growBuffer(buf);
2245: }
2246: current = &buffer[0];
2247: while (*current != 0) { /* non input consuming */
2248: buf[len++] = *current++;
2249: }
2250: } else {
2251: COPY_BUF(l,buf,len,val);
2252: NEXTL(l);
2253: }
1.198 daniel 2254: } else if (c == '&') {
1.129 daniel 2255: ent = xmlParseEntityRef(ctxt);
2256: if ((ent != NULL) &&
2257: (ctxt->replaceEntities != 0)) {
1.185 daniel 2258: xmlChar *rep;
2259:
1.186 daniel 2260: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2261: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 2262: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 2263: if (rep != NULL) {
2264: current = rep;
1.223 veillard 2265: while (*current != 0) { /* non input consuming */
1.198 daniel 2266: buf[len++] = *current++;
2267: if (len > buf_size - 10) {
2268: growBuffer(buf);
1.186 daniel 2269: }
1.185 daniel 2270: }
1.186 daniel 2271: xmlFree(rep);
1.129 daniel 2272: }
1.186 daniel 2273: } else {
2274: if (ent->content != NULL)
1.198 daniel 2275: buf[len++] = ent->content[0];
1.129 daniel 2276: }
2277: } else if (ent != NULL) {
2278: int i = xmlStrlen(ent->name);
2279: const xmlChar *cur = ent->name;
2280:
1.186 daniel 2281: /*
2282: * This may look absurd but is needed to detect
2283: * entities problems
2284: */
1.211 veillard 2285: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2286: (ent->content != NULL)) {
1.186 daniel 2287: xmlChar *rep;
2288: rep = xmlStringDecodeEntities(ctxt, ent->content,
2289: XML_SUBSTITUTE_REF, 0, 0, 0);
2290: if (rep != NULL)
2291: xmlFree(rep);
2292: }
2293:
2294: /*
2295: * Just output the reference
2296: */
1.198 daniel 2297: buf[len++] = '&';
2298: if (len > buf_size - i - 10) {
2299: growBuffer(buf);
1.129 daniel 2300: }
2301: for (;i > 0;i--)
1.198 daniel 2302: buf[len++] = *cur++;
2303: buf[len++] = ';';
1.129 daniel 2304: }
2305: } else {
1.198 daniel 2306: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2307: COPY_BUF(l,buf,len,0x20);
2308: if (len > buf_size - 10) {
2309: growBuffer(buf);
1.129 daniel 2310: }
2311: } else {
1.198 daniel 2312: COPY_BUF(l,buf,len,c);
2313: if (len > buf_size - 10) {
2314: growBuffer(buf);
1.129 daniel 2315: }
2316: }
1.198 daniel 2317: NEXTL(l);
1.129 daniel 2318: }
1.198 daniel 2319: GROW;
2320: c = CUR_CHAR(l);
1.129 daniel 2321: }
1.198 daniel 2322: buf[len++] = 0;
1.152 daniel 2323: if (RAW == '<') {
1.230 ! veillard 2324: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.129 daniel 2325: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2326: ctxt->sax->error(ctxt->userData,
2327: "Unescaped '<' not allowed in attributes values\n");
2328: ctxt->wellFormed = 0;
1.180 daniel 2329: ctxt->disableSAX = 1;
1.152 daniel 2330: } else if (RAW != limit) {
1.230 ! veillard 2331: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.129 daniel 2332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2333: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2334: ctxt->wellFormed = 0;
1.180 daniel 2335: ctxt->disableSAX = 1;
1.129 daniel 2336: } else
2337: NEXT;
1.198 daniel 2338: return(buf);
1.29 daniel 2339: }
2340:
1.50 daniel 2341: /**
2342: * xmlParseSystemLiteral:
2343: * @ctxt: an XML parser context
2344: *
2345: * parse an XML Literal
1.21 daniel 2346: *
1.22 daniel 2347: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2348: *
2349: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2350: */
2351:
1.123 daniel 2352: xmlChar *
1.55 daniel 2353: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2354: xmlChar *buf = NULL;
2355: int len = 0;
1.140 daniel 2356: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2357: int cur, l;
1.135 daniel 2358: xmlChar stop;
1.168 daniel 2359: int state = ctxt->instate;
1.223 veillard 2360: int count = 0;
1.21 daniel 2361:
1.91 daniel 2362: SHRINK;
1.152 daniel 2363: if (RAW == '"') {
1.40 daniel 2364: NEXT;
1.135 daniel 2365: stop = '"';
1.152 daniel 2366: } else if (RAW == '\'') {
1.40 daniel 2367: NEXT;
1.135 daniel 2368: stop = '\'';
1.21 daniel 2369: } else {
1.230 ! veillard 2370: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2371: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2372: ctxt->sax->error(ctxt->userData,
2373: "SystemLiteral \" or ' expected\n");
1.59 daniel 2374: ctxt->wellFormed = 0;
1.180 daniel 2375: ctxt->disableSAX = 1;
1.135 daniel 2376: return(NULL);
1.21 daniel 2377: }
2378:
1.135 daniel 2379: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2380: if (buf == NULL) {
2381: fprintf(stderr, "malloc of %d byte failed\n", size);
2382: return(NULL);
2383: }
1.168 daniel 2384: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 2385: cur = CUR_CHAR(l);
1.223 veillard 2386: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
1.152 daniel 2387: if (len + 5 >= size) {
1.135 daniel 2388: size *= 2;
1.204 veillard 2389: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2390: if (buf == NULL) {
2391: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 2392: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2393: return(NULL);
2394: }
2395: }
1.223 veillard 2396: count++;
2397: if (count > 50) {
2398: GROW;
2399: count = 0;
2400: }
1.152 daniel 2401: COPY_BUF(l,buf,len,cur);
2402: NEXTL(l);
2403: cur = CUR_CHAR(l);
1.135 daniel 2404: if (cur == 0) {
2405: GROW;
2406: SHRINK;
1.152 daniel 2407: cur = CUR_CHAR(l);
1.135 daniel 2408: }
2409: }
2410: buf[len] = 0;
1.204 veillard 2411: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2412: if (!IS_CHAR(cur)) {
1.230 ! veillard 2413: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2415: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2416: ctxt->wellFormed = 0;
1.180 daniel 2417: ctxt->disableSAX = 1;
1.135 daniel 2418: } else {
2419: NEXT;
2420: }
2421: return(buf);
1.21 daniel 2422: }
2423:
1.50 daniel 2424: /**
2425: * xmlParsePubidLiteral:
2426: * @ctxt: an XML parser context
1.21 daniel 2427: *
1.50 daniel 2428: * parse an XML public literal
1.68 daniel 2429: *
2430: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2431: *
2432: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2433: */
2434:
1.123 daniel 2435: xmlChar *
1.55 daniel 2436: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2437: xmlChar *buf = NULL;
2438: int len = 0;
1.140 daniel 2439: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 2440: xmlChar cur;
2441: xmlChar stop;
1.223 veillard 2442: int count = 0;
1.125 daniel 2443:
1.91 daniel 2444: SHRINK;
1.152 daniel 2445: if (RAW == '"') {
1.40 daniel 2446: NEXT;
1.135 daniel 2447: stop = '"';
1.152 daniel 2448: } else if (RAW == '\'') {
1.40 daniel 2449: NEXT;
1.135 daniel 2450: stop = '\'';
1.21 daniel 2451: } else {
1.230 ! veillard 2452: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2454: ctxt->sax->error(ctxt->userData,
2455: "SystemLiteral \" or ' expected\n");
1.59 daniel 2456: ctxt->wellFormed = 0;
1.180 daniel 2457: ctxt->disableSAX = 1;
1.135 daniel 2458: return(NULL);
2459: }
2460: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2461: if (buf == NULL) {
2462: fprintf(stderr, "malloc of %d byte failed\n", size);
2463: return(NULL);
2464: }
2465: cur = CUR;
1.223 veillard 2466: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
1.135 daniel 2467: if (len + 1 >= size) {
2468: size *= 2;
1.204 veillard 2469: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2470: if (buf == NULL) {
2471: fprintf(stderr, "realloc of %d byte failed\n", size);
2472: return(NULL);
2473: }
2474: }
2475: buf[len++] = cur;
1.223 veillard 2476: count++;
2477: if (count > 50) {
2478: GROW;
2479: count = 0;
2480: }
1.135 daniel 2481: NEXT;
2482: cur = CUR;
2483: if (cur == 0) {
2484: GROW;
2485: SHRINK;
2486: cur = CUR;
2487: }
2488: }
2489: buf[len] = 0;
2490: if (cur != stop) {
1.230 ! veillard 2491: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2493: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2494: ctxt->wellFormed = 0;
1.180 daniel 2495: ctxt->disableSAX = 1;
1.135 daniel 2496: } else {
2497: NEXT;
1.21 daniel 2498: }
1.135 daniel 2499: return(buf);
1.21 daniel 2500: }
2501:
1.50 daniel 2502: /**
2503: * xmlParseCharData:
2504: * @ctxt: an XML parser context
2505: * @cdata: int indicating whether we are within a CDATA section
2506: *
2507: * parse a CharData section.
2508: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2509: *
1.151 daniel 2510: * The right angle bracket (>) may be represented using the string ">",
2511: * and must, for compatibility, be escaped using ">" or a character
2512: * reference when it appears in the string "]]>" in content, when that
2513: * string is not marking the end of a CDATA section.
2514: *
1.27 daniel 2515: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2516: */
2517:
1.55 daniel 2518: void
2519: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 2520: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 2521: int nbchar = 0;
1.152 daniel 2522: int cur, l;
1.223 veillard 2523: int count = 0;
1.27 daniel 2524:
1.91 daniel 2525: SHRINK;
1.223 veillard 2526: GROW;
1.152 daniel 2527: cur = CUR_CHAR(l);
1.223 veillard 2528: while (((cur != '<') || (ctxt->token == '<')) && /* checked */
1.190 daniel 2529: ((cur != '&') || (ctxt->token == '&')) &&
1.229 veillard 2530: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
1.97 daniel 2531: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2532: (NXT(2) == '>')) {
2533: if (cdata) break;
2534: else {
1.230 ! veillard 2535: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2536: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 2537: ctxt->sax->error(ctxt->userData,
1.59 daniel 2538: "Sequence ']]>' not allowed in content\n");
1.151 daniel 2539: /* Should this be relaxed ??? I see a "must here */
2540: ctxt->wellFormed = 0;
1.180 daniel 2541: ctxt->disableSAX = 1;
1.59 daniel 2542: }
2543: }
1.152 daniel 2544: COPY_BUF(l,buf,nbchar,cur);
2545: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 2546: /*
2547: * Ok the segment is to be consumed as chars.
2548: */
1.171 daniel 2549: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2550: if (areBlanks(ctxt, buf, nbchar)) {
2551: if (ctxt->sax->ignorableWhitespace != NULL)
2552: ctxt->sax->ignorableWhitespace(ctxt->userData,
2553: buf, nbchar);
2554: } else {
2555: if (ctxt->sax->characters != NULL)
2556: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2557: }
2558: }
2559: nbchar = 0;
2560: }
1.223 veillard 2561: count++;
2562: if (count > 50) {
2563: GROW;
2564: count = 0;
2565: }
1.152 daniel 2566: NEXTL(l);
2567: cur = CUR_CHAR(l);
1.27 daniel 2568: }
1.91 daniel 2569: if (nbchar != 0) {
2570: /*
2571: * Ok the segment is to be consumed as chars.
2572: */
1.171 daniel 2573: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2574: if (areBlanks(ctxt, buf, nbchar)) {
2575: if (ctxt->sax->ignorableWhitespace != NULL)
2576: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2577: } else {
2578: if (ctxt->sax->characters != NULL)
2579: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2580: }
2581: }
1.45 daniel 2582: }
1.27 daniel 2583: }
2584:
1.50 daniel 2585: /**
2586: * xmlParseExternalID:
2587: * @ctxt: an XML parser context
1.123 daniel 2588: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2589: * @strict: indicate whether we should restrict parsing to only
2590: * production [75], see NOTE below
1.50 daniel 2591: *
1.67 daniel 2592: * Parse an External ID or a Public ID
2593: *
2594: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2595: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2596: *
2597: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2598: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2599: *
2600: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2601: *
1.68 daniel 2602: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2603: * case publicID receives PubidLiteral, is strict is off
2604: * it is possible to return NULL and have publicID set.
1.22 daniel 2605: */
2606:
1.123 daniel 2607: xmlChar *
2608: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2609: xmlChar *URI = NULL;
1.22 daniel 2610:
1.91 daniel 2611: SHRINK;
1.152 daniel 2612: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 2613: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2614: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2615: SKIP(6);
1.59 daniel 2616: if (!IS_BLANK(CUR)) {
1.230 ! veillard 2617: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2619: ctxt->sax->error(ctxt->userData,
1.59 daniel 2620: "Space required after 'SYSTEM'\n");
2621: ctxt->wellFormed = 0;
1.180 daniel 2622: ctxt->disableSAX = 1;
1.59 daniel 2623: }
1.42 daniel 2624: SKIP_BLANKS;
1.39 daniel 2625: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2626: if (URI == NULL) {
1.230 ! veillard 2627: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2629: ctxt->sax->error(ctxt->userData,
1.39 daniel 2630: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2631: ctxt->wellFormed = 0;
1.180 daniel 2632: ctxt->disableSAX = 1;
1.59 daniel 2633: }
1.152 daniel 2634: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 2635: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2636: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2637: SKIP(6);
1.59 daniel 2638: if (!IS_BLANK(CUR)) {
1.230 ! veillard 2639: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2641: ctxt->sax->error(ctxt->userData,
1.59 daniel 2642: "Space required after 'PUBLIC'\n");
2643: ctxt->wellFormed = 0;
1.180 daniel 2644: ctxt->disableSAX = 1;
1.59 daniel 2645: }
1.42 daniel 2646: SKIP_BLANKS;
1.39 daniel 2647: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2648: if (*publicID == NULL) {
1.230 ! veillard 2649: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.55 daniel 2650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2651: ctxt->sax->error(ctxt->userData,
1.39 daniel 2652: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2653: ctxt->wellFormed = 0;
1.180 daniel 2654: ctxt->disableSAX = 1;
1.59 daniel 2655: }
1.67 daniel 2656: if (strict) {
2657: /*
2658: * We don't handle [83] so "S SystemLiteral" is required.
2659: */
2660: if (!IS_BLANK(CUR)) {
1.230 ! veillard 2661: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2663: ctxt->sax->error(ctxt->userData,
1.67 daniel 2664: "Space required after the Public Identifier\n");
2665: ctxt->wellFormed = 0;
1.180 daniel 2666: ctxt->disableSAX = 1;
1.67 daniel 2667: }
2668: } else {
2669: /*
2670: * We handle [83] so we return immediately, if
2671: * "S SystemLiteral" is not detected. From a purely parsing
2672: * point of view that's a nice mess.
2673: */
1.135 daniel 2674: const xmlChar *ptr;
2675: GROW;
2676:
2677: ptr = CUR_PTR;
1.67 daniel 2678: if (!IS_BLANK(*ptr)) return(NULL);
2679:
1.223 veillard 2680: while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
1.173 daniel 2681: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 2682: }
1.42 daniel 2683: SKIP_BLANKS;
1.39 daniel 2684: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2685: if (URI == NULL) {
1.230 ! veillard 2686: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2688: ctxt->sax->error(ctxt->userData,
1.39 daniel 2689: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2690: ctxt->wellFormed = 0;
1.180 daniel 2691: ctxt->disableSAX = 1;
1.59 daniel 2692: }
1.22 daniel 2693: }
1.39 daniel 2694: return(URI);
1.22 daniel 2695: }
2696:
1.50 daniel 2697: /**
2698: * xmlParseComment:
1.69 daniel 2699: * @ctxt: an XML parser context
1.50 daniel 2700: *
1.3 veillard 2701: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2702: * The spec says that "For compatibility, the string "--" (double-hyphen)
2703: * must not occur within comments. "
1.22 daniel 2704: *
2705: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2706: */
1.72 daniel 2707: void
1.114 daniel 2708: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 2709: xmlChar *buf = NULL;
1.195 daniel 2710: int len;
1.140 daniel 2711: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2712: int q, ql;
2713: int r, rl;
2714: int cur, l;
1.140 daniel 2715: xmlParserInputState state;
1.187 daniel 2716: xmlParserInputPtr input = ctxt->input;
1.223 veillard 2717: int count = 0;
1.3 veillard 2718:
2719: /*
1.22 daniel 2720: * Check that there is a comment right here.
1.3 veillard 2721: */
1.152 daniel 2722: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 2723: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2724:
1.140 daniel 2725: state = ctxt->instate;
1.97 daniel 2726: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2727: SHRINK;
1.40 daniel 2728: SKIP(4);
1.135 daniel 2729: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2730: if (buf == NULL) {
2731: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2732: ctxt->instate = state;
1.135 daniel 2733: return;
2734: }
1.152 daniel 2735: q = CUR_CHAR(ql);
2736: NEXTL(ql);
2737: r = CUR_CHAR(rl);
2738: NEXTL(rl);
2739: cur = CUR_CHAR(l);
1.195 daniel 2740: len = 0;
1.223 veillard 2741: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2742: ((cur != '>') ||
2743: (r != '-') || (q != '-'))) {
1.195 daniel 2744: if ((r == '-') && (q == '-') && (len > 1)) {
1.230 ! veillard 2745: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.55 daniel 2746: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2747: ctxt->sax->error(ctxt->userData,
1.38 daniel 2748: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2749: ctxt->wellFormed = 0;
1.180 daniel 2750: ctxt->disableSAX = 1;
1.59 daniel 2751: }
1.152 daniel 2752: if (len + 5 >= size) {
1.135 daniel 2753: size *= 2;
1.204 veillard 2754: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2755: if (buf == NULL) {
2756: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2757: ctxt->instate = state;
1.135 daniel 2758: return;
2759: }
2760: }
1.152 daniel 2761: COPY_BUF(ql,buf,len,q);
1.135 daniel 2762: q = r;
1.152 daniel 2763: ql = rl;
1.135 daniel 2764: r = cur;
1.152 daniel 2765: rl = l;
1.223 veillard 2766:
2767: count++;
2768: if (count > 50) {
2769: GROW;
2770: count = 0;
2771: }
1.152 daniel 2772: NEXTL(l);
2773: cur = CUR_CHAR(l);
1.135 daniel 2774: if (cur == 0) {
2775: SHRINK;
2776: GROW;
1.152 daniel 2777: cur = CUR_CHAR(l);
1.135 daniel 2778: }
1.3 veillard 2779: }
1.135 daniel 2780: buf[len] = 0;
2781: if (!IS_CHAR(cur)) {
1.230 ! veillard 2782: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.55 daniel 2783: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2784: ctxt->sax->error(ctxt->userData,
1.135 daniel 2785: "Comment not terminated \n<!--%.50s\n", buf);
1.59 daniel 2786: ctxt->wellFormed = 0;
1.180 daniel 2787: ctxt->disableSAX = 1;
1.178 daniel 2788: xmlFree(buf);
1.3 veillard 2789: } else {
1.187 daniel 2790: if (input != ctxt->input) {
1.230 ! veillard 2791: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2793: ctxt->sax->error(ctxt->userData,
2794: "Comment doesn't start and stop in the same entity\n");
2795: ctxt->wellFormed = 0;
2796: ctxt->disableSAX = 1;
2797: }
1.40 daniel 2798: NEXT;
1.171 daniel 2799: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2800: (!ctxt->disableSAX))
1.135 daniel 2801: ctxt->sax->comment(ctxt->userData, buf);
2802: xmlFree(buf);
1.3 veillard 2803: }
1.140 daniel 2804: ctxt->instate = state;
1.3 veillard 2805: }
2806:
1.50 daniel 2807: /**
2808: * xmlParsePITarget:
2809: * @ctxt: an XML parser context
2810: *
2811: * parse the name of a PI
1.22 daniel 2812: *
2813: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2814: *
2815: * Returns the PITarget name or NULL
1.22 daniel 2816: */
2817:
1.123 daniel 2818: xmlChar *
1.55 daniel 2819: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2820: xmlChar *name;
1.22 daniel 2821:
2822: name = xmlParseName(ctxt);
1.139 daniel 2823: if ((name != NULL) &&
1.22 daniel 2824: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2825: ((name[1] == 'm') || (name[1] == 'M')) &&
2826: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 2827: int i;
1.177 daniel 2828: if ((name[0] == 'x') && (name[1] == 'm') &&
2829: (name[2] == 'l') && (name[3] == 0)) {
1.230 ! veillard 2830: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2832: ctxt->sax->error(ctxt->userData,
2833: "XML declaration allowed only at the start of the document\n");
2834: ctxt->wellFormed = 0;
1.180 daniel 2835: ctxt->disableSAX = 1;
1.151 daniel 2836: return(name);
2837: } else if (name[3] == 0) {
1.230 ! veillard 2838: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2840: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2841: ctxt->wellFormed = 0;
1.180 daniel 2842: ctxt->disableSAX = 1;
1.151 daniel 2843: return(name);
2844: }
1.139 daniel 2845: for (i = 0;;i++) {
2846: if (xmlW3CPIs[i] == NULL) break;
2847: if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
2848: return(name);
2849: }
2850: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
1.230 ! veillard 2851: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.139 daniel 2852: ctxt->sax->warning(ctxt->userData,
1.122 daniel 2853: "xmlParsePItarget: invalid name prefix 'xml'\n");
2854: }
1.22 daniel 2855: }
2856: return(name);
2857: }
2858:
1.50 daniel 2859: /**
2860: * xmlParsePI:
2861: * @ctxt: an XML parser context
2862: *
2863: * parse an XML Processing Instruction.
1.22 daniel 2864: *
2865: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2866: *
1.69 daniel 2867: * The processing is transfered to SAX once parsed.
1.3 veillard 2868: */
2869:
1.55 daniel 2870: void
2871: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 2872: xmlChar *buf = NULL;
2873: int len = 0;
1.140 daniel 2874: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2875: int cur, l;
1.123 daniel 2876: xmlChar *target;
1.140 daniel 2877: xmlParserInputState state;
1.223 veillard 2878: int count = 0;
1.22 daniel 2879:
1.152 daniel 2880: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 2881: xmlParserInputPtr input = ctxt->input;
1.140 daniel 2882: state = ctxt->instate;
2883: ctxt->instate = XML_PARSER_PI;
1.3 veillard 2884: /*
2885: * this is a Processing Instruction.
2886: */
1.40 daniel 2887: SKIP(2);
1.91 daniel 2888: SHRINK;
1.3 veillard 2889:
2890: /*
1.22 daniel 2891: * Parse the target name and check for special support like
2892: * namespace.
1.3 veillard 2893: */
1.22 daniel 2894: target = xmlParsePITarget(ctxt);
2895: if (target != NULL) {
1.156 daniel 2896: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 2897: if (input != ctxt->input) {
1.230 ! veillard 2898: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2900: ctxt->sax->error(ctxt->userData,
2901: "PI declaration doesn't start and stop in the same entity\n");
2902: ctxt->wellFormed = 0;
2903: ctxt->disableSAX = 1;
2904: }
1.156 daniel 2905: SKIP(2);
2906:
2907: /*
2908: * SAX: PI detected.
2909: */
1.171 daniel 2910: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 2911: (ctxt->sax->processingInstruction != NULL))
2912: ctxt->sax->processingInstruction(ctxt->userData,
2913: target, NULL);
2914: ctxt->instate = state;
1.170 daniel 2915: xmlFree(target);
1.156 daniel 2916: return;
2917: }
1.135 daniel 2918: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2919: if (buf == NULL) {
2920: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2921: ctxt->instate = state;
1.135 daniel 2922: return;
2923: }
2924: cur = CUR;
2925: if (!IS_BLANK(cur)) {
1.230 ! veillard 2926: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928: ctxt->sax->error(ctxt->userData,
2929: "xmlParsePI: PI %s space expected\n", target);
2930: ctxt->wellFormed = 0;
1.180 daniel 2931: ctxt->disableSAX = 1;
1.114 daniel 2932: }
2933: SKIP_BLANKS;
1.152 daniel 2934: cur = CUR_CHAR(l);
1.223 veillard 2935: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2936: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 2937: if (len + 5 >= size) {
1.135 daniel 2938: size *= 2;
1.204 veillard 2939: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2940: if (buf == NULL) {
2941: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2942: ctxt->instate = state;
1.135 daniel 2943: return;
2944: }
1.223 veillard 2945: }
2946: count++;
2947: if (count > 50) {
2948: GROW;
2949: count = 0;
1.135 daniel 2950: }
1.152 daniel 2951: COPY_BUF(l,buf,len,cur);
2952: NEXTL(l);
2953: cur = CUR_CHAR(l);
1.135 daniel 2954: if (cur == 0) {
2955: SHRINK;
2956: GROW;
1.152 daniel 2957: cur = CUR_CHAR(l);
1.135 daniel 2958: }
2959: }
2960: buf[len] = 0;
1.152 daniel 2961: if (cur != '?') {
1.230 ! veillard 2962: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2964: ctxt->sax->error(ctxt->userData,
1.72 daniel 2965: "xmlParsePI: PI %s never end ...\n", target);
2966: ctxt->wellFormed = 0;
1.180 daniel 2967: ctxt->disableSAX = 1;
1.22 daniel 2968: } else {
1.187 daniel 2969: if (input != ctxt->input) {
1.230 ! veillard 2970: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2972: ctxt->sax->error(ctxt->userData,
2973: "PI declaration doesn't start and stop in the same entity\n");
2974: ctxt->wellFormed = 0;
2975: ctxt->disableSAX = 1;
2976: }
1.72 daniel 2977: SKIP(2);
1.44 daniel 2978:
1.72 daniel 2979: /*
2980: * SAX: PI detected.
2981: */
1.171 daniel 2982: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 2983: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2984: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 2985: target, buf);
1.22 daniel 2986: }
1.135 daniel 2987: xmlFree(buf);
1.119 daniel 2988: xmlFree(target);
1.3 veillard 2989: } else {
1.230 ! veillard 2990: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.55 daniel 2991: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2992: ctxt->sax->error(ctxt->userData,
2993: "xmlParsePI : no target name\n");
1.59 daniel 2994: ctxt->wellFormed = 0;
1.180 daniel 2995: ctxt->disableSAX = 1;
1.22 daniel 2996: }
1.140 daniel 2997: ctxt->instate = state;
1.22 daniel 2998: }
2999: }
3000:
1.50 daniel 3001: /**
3002: * xmlParseNotationDecl:
3003: * @ctxt: an XML parser context
3004: *
3005: * parse a notation declaration
1.22 daniel 3006: *
3007: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3008: *
3009: * Hence there is actually 3 choices:
3010: * 'PUBLIC' S PubidLiteral
3011: * 'PUBLIC' S PubidLiteral S SystemLiteral
3012: * and 'SYSTEM' S SystemLiteral
1.50 daniel 3013: *
1.67 daniel 3014: * See the NOTE on xmlParseExternalID().
1.22 daniel 3015: */
3016:
1.55 daniel 3017: void
3018: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3019: xmlChar *name;
3020: xmlChar *Pubid;
3021: xmlChar *Systemid;
1.22 daniel 3022:
1.152 daniel 3023: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3024: (NXT(2) == 'N') && (NXT(3) == 'O') &&
3025: (NXT(4) == 'T') && (NXT(5) == 'A') &&
3026: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 3027: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 3028: xmlParserInputPtr input = ctxt->input;
1.91 daniel 3029: SHRINK;
1.40 daniel 3030: SKIP(10);
1.67 daniel 3031: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3032: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3034: ctxt->sax->error(ctxt->userData,
3035: "Space required after '<!NOTATION'\n");
1.67 daniel 3036: ctxt->wellFormed = 0;
1.180 daniel 3037: ctxt->disableSAX = 1;
1.67 daniel 3038: return;
3039: }
3040: SKIP_BLANKS;
1.22 daniel 3041:
3042: name = xmlParseName(ctxt);
3043: if (name == NULL) {
1.230 ! veillard 3044: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.55 daniel 3045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3046: ctxt->sax->error(ctxt->userData,
3047: "NOTATION: Name expected here\n");
1.67 daniel 3048: ctxt->wellFormed = 0;
1.180 daniel 3049: ctxt->disableSAX = 1;
1.67 daniel 3050: return;
3051: }
3052: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3053: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 3054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3055: ctxt->sax->error(ctxt->userData,
1.67 daniel 3056: "Space required after the NOTATION name'\n");
1.59 daniel 3057: ctxt->wellFormed = 0;
1.180 daniel 3058: ctxt->disableSAX = 1;
1.22 daniel 3059: return;
3060: }
1.42 daniel 3061: SKIP_BLANKS;
1.67 daniel 3062:
1.22 daniel 3063: /*
1.67 daniel 3064: * Parse the IDs.
1.22 daniel 3065: */
1.160 daniel 3066: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 3067: SKIP_BLANKS;
3068:
1.152 daniel 3069: if (RAW == '>') {
1.187 daniel 3070: if (input != ctxt->input) {
1.230 ! veillard 3071: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3073: ctxt->sax->error(ctxt->userData,
3074: "Notation declaration doesn't start and stop in the same entity\n");
3075: ctxt->wellFormed = 0;
3076: ctxt->disableSAX = 1;
3077: }
1.40 daniel 3078: NEXT;
1.171 daniel 3079: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3080: (ctxt->sax->notationDecl != NULL))
1.74 daniel 3081: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 3082: } else {
1.230 ! veillard 3083: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 3084: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3085: ctxt->sax->error(ctxt->userData,
1.67 daniel 3086: "'>' required to close NOTATION declaration\n");
3087: ctxt->wellFormed = 0;
1.180 daniel 3088: ctxt->disableSAX = 1;
1.67 daniel 3089: }
1.119 daniel 3090: xmlFree(name);
3091: if (Systemid != NULL) xmlFree(Systemid);
3092: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 3093: }
3094: }
3095:
1.50 daniel 3096: /**
3097: * xmlParseEntityDecl:
3098: * @ctxt: an XML parser context
3099: *
3100: * parse <!ENTITY declarations
1.22 daniel 3101: *
3102: * [70] EntityDecl ::= GEDecl | PEDecl
3103: *
3104: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3105: *
3106: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3107: *
3108: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3109: *
3110: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 3111: *
3112: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 3113: *
3114: * [ VC: Notation Declared ]
1.116 daniel 3115: * The Name must match the declared name of a notation.
1.22 daniel 3116: */
3117:
1.55 daniel 3118: void
3119: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3120: xmlChar *name = NULL;
3121: xmlChar *value = NULL;
3122: xmlChar *URI = NULL, *literal = NULL;
3123: xmlChar *ndata = NULL;
1.39 daniel 3124: int isParameter = 0;
1.123 daniel 3125: xmlChar *orig = NULL;
1.22 daniel 3126:
1.94 daniel 3127: GROW;
1.152 daniel 3128: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3129: (NXT(2) == 'E') && (NXT(3) == 'N') &&
3130: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 3131: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 3132: xmlParserInputPtr input = ctxt->input;
1.96 daniel 3133: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 3134: SHRINK;
1.40 daniel 3135: SKIP(8);
1.59 daniel 3136: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3137: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3138: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3139: ctxt->sax->error(ctxt->userData,
3140: "Space required after '<!ENTITY'\n");
1.59 daniel 3141: ctxt->wellFormed = 0;
1.180 daniel 3142: ctxt->disableSAX = 1;
1.59 daniel 3143: }
3144: SKIP_BLANKS;
1.40 daniel 3145:
1.152 daniel 3146: if (RAW == '%') {
1.40 daniel 3147: NEXT;
1.59 daniel 3148: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3149: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3150: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3151: ctxt->sax->error(ctxt->userData,
3152: "Space required after '%'\n");
1.59 daniel 3153: ctxt->wellFormed = 0;
1.180 daniel 3154: ctxt->disableSAX = 1;
1.59 daniel 3155: }
1.42 daniel 3156: SKIP_BLANKS;
1.39 daniel 3157: isParameter = 1;
1.22 daniel 3158: }
3159:
3160: name = xmlParseName(ctxt);
1.24 daniel 3161: if (name == NULL) {
1.230 ! veillard 3162: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3164: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3165: ctxt->wellFormed = 0;
1.180 daniel 3166: ctxt->disableSAX = 1;
1.24 daniel 3167: return;
3168: }
1.59 daniel 3169: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3170: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3171: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3172: ctxt->sax->error(ctxt->userData,
1.59 daniel 3173: "Space required after the entity name\n");
3174: ctxt->wellFormed = 0;
1.180 daniel 3175: ctxt->disableSAX = 1;
1.59 daniel 3176: }
1.42 daniel 3177: SKIP_BLANKS;
1.24 daniel 3178:
1.22 daniel 3179: /*
1.68 daniel 3180: * handle the various case of definitions...
1.22 daniel 3181: */
1.39 daniel 3182: if (isParameter) {
1.225 veillard 3183: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3184: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3185: if (value) {
1.171 daniel 3186: if ((ctxt->sax != NULL) &&
3187: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3188: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3189: XML_INTERNAL_PARAMETER_ENTITY,
3190: NULL, NULL, value);
3191: }
1.225 veillard 3192: } else {
1.67 daniel 3193: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3194: if ((URI == NULL) && (literal == NULL)) {
1.230 ! veillard 3195: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3197: ctxt->sax->error(ctxt->userData,
3198: "Entity value required\n");
3199: ctxt->wellFormed = 0;
1.180 daniel 3200: ctxt->disableSAX = 1;
1.169 daniel 3201: }
1.39 daniel 3202: if (URI) {
1.193 daniel 3203: xmlURIPtr uri;
3204:
3205: uri = xmlParseURI((const char *) URI);
3206: if (uri == NULL) {
1.230 ! veillard 3207: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3208: if ((ctxt->sax != NULL) &&
3209: (!ctxt->disableSAX) &&
3210: (ctxt->sax->error != NULL))
3211: ctxt->sax->error(ctxt->userData,
3212: "Invalid URI: %s\n", URI);
3213: ctxt->wellFormed = 0;
3214: } else {
3215: if (uri->fragment != NULL) {
1.230 ! veillard 3216: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3217: if ((ctxt->sax != NULL) &&
3218: (!ctxt->disableSAX) &&
3219: (ctxt->sax->error != NULL))
3220: ctxt->sax->error(ctxt->userData,
3221: "Fragment not allowed: %s\n", URI);
3222: ctxt->wellFormed = 0;
3223: } else {
3224: if ((ctxt->sax != NULL) &&
3225: (!ctxt->disableSAX) &&
3226: (ctxt->sax->entityDecl != NULL))
3227: ctxt->sax->entityDecl(ctxt->userData, name,
3228: XML_EXTERNAL_PARAMETER_ENTITY,
3229: literal, URI, NULL);
3230: }
3231: xmlFreeURI(uri);
3232: }
1.39 daniel 3233: }
1.24 daniel 3234: }
3235: } else {
1.152 daniel 3236: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3237: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 3238: if ((ctxt->sax != NULL) &&
3239: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3240: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3241: XML_INTERNAL_GENERAL_ENTITY,
3242: NULL, NULL, value);
3243: } else {
1.67 daniel 3244: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3245: if ((URI == NULL) && (literal == NULL)) {
1.230 ! veillard 3246: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3247: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3248: ctxt->sax->error(ctxt->userData,
3249: "Entity value required\n");
3250: ctxt->wellFormed = 0;
1.180 daniel 3251: ctxt->disableSAX = 1;
1.169 daniel 3252: }
1.193 daniel 3253: if (URI) {
3254: xmlURIPtr uri;
3255:
3256: uri = xmlParseURI((const char *)URI);
3257: if (uri == NULL) {
1.230 ! veillard 3258: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3259: if ((ctxt->sax != NULL) &&
3260: (!ctxt->disableSAX) &&
3261: (ctxt->sax->error != NULL))
3262: ctxt->sax->error(ctxt->userData,
3263: "Invalid URI: %s\n", URI);
3264: ctxt->wellFormed = 0;
3265: } else {
3266: if (uri->fragment != NULL) {
1.230 ! veillard 3267: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3268: if ((ctxt->sax != NULL) &&
3269: (!ctxt->disableSAX) &&
3270: (ctxt->sax->error != NULL))
3271: ctxt->sax->error(ctxt->userData,
3272: "Fragment not allowed: %s\n", URI);
3273: ctxt->wellFormed = 0;
3274: }
3275: xmlFreeURI(uri);
3276: }
3277: }
1.152 daniel 3278: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.230 ! veillard 3279: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3281: ctxt->sax->error(ctxt->userData,
1.59 daniel 3282: "Space required before 'NDATA'\n");
3283: ctxt->wellFormed = 0;
1.180 daniel 3284: ctxt->disableSAX = 1;
1.59 daniel 3285: }
1.42 daniel 3286: SKIP_BLANKS;
1.152 daniel 3287: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 3288: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3289: (NXT(4) == 'A')) {
3290: SKIP(5);
1.59 daniel 3291: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3292: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3293: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3294: ctxt->sax->error(ctxt->userData,
1.59 daniel 3295: "Space required after 'NDATA'\n");
3296: ctxt->wellFormed = 0;
1.180 daniel 3297: ctxt->disableSAX = 1;
1.59 daniel 3298: }
1.42 daniel 3299: SKIP_BLANKS;
1.24 daniel 3300: ndata = xmlParseName(ctxt);
1.171 daniel 3301: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 3302: (ctxt->sax->unparsedEntityDecl != NULL))
3303: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3304: literal, URI, ndata);
3305: } else {
1.171 daniel 3306: if ((ctxt->sax != NULL) &&
3307: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3308: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3309: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3310: literal, URI, NULL);
1.24 daniel 3311: }
3312: }
3313: }
1.42 daniel 3314: SKIP_BLANKS;
1.152 daniel 3315: if (RAW != '>') {
1.230 ! veillard 3316: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3318: ctxt->sax->error(ctxt->userData,
1.31 daniel 3319: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3320: ctxt->wellFormed = 0;
1.180 daniel 3321: ctxt->disableSAX = 1;
1.187 daniel 3322: } else {
3323: if (input != ctxt->input) {
1.230 ! veillard 3324: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3325: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3326: ctxt->sax->error(ctxt->userData,
3327: "Entity declaration doesn't start and stop in the same entity\n");
3328: ctxt->wellFormed = 0;
3329: ctxt->disableSAX = 1;
3330: }
1.40 daniel 3331: NEXT;
1.187 daniel 3332: }
1.78 daniel 3333: if (orig != NULL) {
3334: /*
1.98 daniel 3335: * Ugly mechanism to save the raw entity value.
1.78 daniel 3336: */
3337: xmlEntityPtr cur = NULL;
3338:
1.98 daniel 3339: if (isParameter) {
3340: if ((ctxt->sax != NULL) &&
3341: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3342: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3343: } else {
3344: if ((ctxt->sax != NULL) &&
3345: (ctxt->sax->getEntity != NULL))
1.120 daniel 3346: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3347: }
3348: if (cur != NULL) {
3349: if (cur->orig != NULL)
1.119 daniel 3350: xmlFree(orig);
1.98 daniel 3351: else
3352: cur->orig = orig;
3353: } else
1.119 daniel 3354: xmlFree(orig);
1.78 daniel 3355: }
1.119 daniel 3356: if (name != NULL) xmlFree(name);
3357: if (value != NULL) xmlFree(value);
3358: if (URI != NULL) xmlFree(URI);
3359: if (literal != NULL) xmlFree(literal);
3360: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3361: }
3362: }
3363:
1.50 daniel 3364: /**
1.59 daniel 3365: * xmlParseDefaultDecl:
3366: * @ctxt: an XML parser context
3367: * @value: Receive a possible fixed default value for the attribute
3368: *
3369: * Parse an attribute default declaration
3370: *
3371: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3372: *
1.99 daniel 3373: * [ VC: Required Attribute ]
1.117 daniel 3374: * if the default declaration is the keyword #REQUIRED, then the
3375: * attribute must be specified for all elements of the type in the
3376: * attribute-list declaration.
1.99 daniel 3377: *
3378: * [ VC: Attribute Default Legal ]
1.102 daniel 3379: * The declared default value must meet the lexical constraints of
3380: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3381: *
3382: * [ VC: Fixed Attribute Default ]
1.117 daniel 3383: * if an attribute has a default value declared with the #FIXED
3384: * keyword, instances of that attribute must match the default value.
1.99 daniel 3385: *
3386: * [ WFC: No < in Attribute Values ]
3387: * handled in xmlParseAttValue()
3388: *
1.59 daniel 3389: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3390: * or XML_ATTRIBUTE_FIXED.
3391: */
3392:
3393: int
1.123 daniel 3394: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3395: int val;
1.123 daniel 3396: xmlChar *ret;
1.59 daniel 3397:
3398: *value = NULL;
1.152 daniel 3399: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 3400: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3401: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3402: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3403: (NXT(8) == 'D')) {
3404: SKIP(9);
3405: return(XML_ATTRIBUTE_REQUIRED);
3406: }
1.152 daniel 3407: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 3408: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3409: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3410: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3411: SKIP(8);
3412: return(XML_ATTRIBUTE_IMPLIED);
3413: }
3414: val = XML_ATTRIBUTE_NONE;
1.152 daniel 3415: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 3416: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3417: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3418: SKIP(6);
3419: val = XML_ATTRIBUTE_FIXED;
3420: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3421: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3422: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3423: ctxt->sax->error(ctxt->userData,
3424: "Space required after '#FIXED'\n");
1.59 daniel 3425: ctxt->wellFormed = 0;
1.180 daniel 3426: ctxt->disableSAX = 1;
1.59 daniel 3427: }
3428: SKIP_BLANKS;
3429: }
3430: ret = xmlParseAttValue(ctxt);
1.96 daniel 3431: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3432: if (ret == NULL) {
3433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3434: ctxt->sax->error(ctxt->userData,
1.59 daniel 3435: "Attribute default value declaration error\n");
3436: ctxt->wellFormed = 0;
1.180 daniel 3437: ctxt->disableSAX = 1;
1.59 daniel 3438: } else
3439: *value = ret;
3440: return(val);
3441: }
3442:
3443: /**
1.66 daniel 3444: * xmlParseNotationType:
3445: * @ctxt: an XML parser context
3446: *
3447: * parse an Notation attribute type.
3448: *
1.99 daniel 3449: * Note: the leading 'NOTATION' S part has already being parsed...
3450: *
1.66 daniel 3451: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3452: *
1.99 daniel 3453: * [ VC: Notation Attributes ]
1.117 daniel 3454: * Values of this type must match one of the notation names included
1.99 daniel 3455: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3456: *
3457: * Returns: the notation attribute tree built while parsing
3458: */
3459:
3460: xmlEnumerationPtr
3461: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3462: xmlChar *name;
1.66 daniel 3463: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3464:
1.152 daniel 3465: if (RAW != '(') {
1.230 ! veillard 3466: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3468: ctxt->sax->error(ctxt->userData,
3469: "'(' required to start 'NOTATION'\n");
1.66 daniel 3470: ctxt->wellFormed = 0;
1.180 daniel 3471: ctxt->disableSAX = 1;
1.66 daniel 3472: return(NULL);
3473: }
1.91 daniel 3474: SHRINK;
1.66 daniel 3475: do {
3476: NEXT;
3477: SKIP_BLANKS;
3478: name = xmlParseName(ctxt);
3479: if (name == NULL) {
1.230 ! veillard 3480: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3482: ctxt->sax->error(ctxt->userData,
1.66 daniel 3483: "Name expected in NOTATION declaration\n");
3484: ctxt->wellFormed = 0;
1.180 daniel 3485: ctxt->disableSAX = 1;
1.66 daniel 3486: return(ret);
3487: }
3488: cur = xmlCreateEnumeration(name);
1.119 daniel 3489: xmlFree(name);
1.66 daniel 3490: if (cur == NULL) return(ret);
3491: if (last == NULL) ret = last = cur;
3492: else {
3493: last->next = cur;
3494: last = cur;
3495: }
3496: SKIP_BLANKS;
1.152 daniel 3497: } while (RAW == '|');
3498: if (RAW != ')') {
1.230 ! veillard 3499: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3501: ctxt->sax->error(ctxt->userData,
1.66 daniel 3502: "')' required to finish NOTATION declaration\n");
3503: ctxt->wellFormed = 0;
1.180 daniel 3504: ctxt->disableSAX = 1;
1.170 daniel 3505: if ((last != NULL) && (last != ret))
3506: xmlFreeEnumeration(last);
1.66 daniel 3507: return(ret);
3508: }
3509: NEXT;
3510: return(ret);
3511: }
3512:
3513: /**
3514: * xmlParseEnumerationType:
3515: * @ctxt: an XML parser context
3516: *
3517: * parse an Enumeration attribute type.
3518: *
3519: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3520: *
1.99 daniel 3521: * [ VC: Enumeration ]
1.117 daniel 3522: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3523: * the declaration
3524: *
1.66 daniel 3525: * Returns: the enumeration attribute tree built while parsing
3526: */
3527:
3528: xmlEnumerationPtr
3529: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3530: xmlChar *name;
1.66 daniel 3531: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3532:
1.152 daniel 3533: if (RAW != '(') {
1.230 ! veillard 3534: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3535: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3536: ctxt->sax->error(ctxt->userData,
1.66 daniel 3537: "'(' required to start ATTLIST enumeration\n");
3538: ctxt->wellFormed = 0;
1.180 daniel 3539: ctxt->disableSAX = 1;
1.66 daniel 3540: return(NULL);
3541: }
1.91 daniel 3542: SHRINK;
1.66 daniel 3543: do {
3544: NEXT;
3545: SKIP_BLANKS;
3546: name = xmlParseNmtoken(ctxt);
3547: if (name == NULL) {
1.230 ! veillard 3548: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3550: ctxt->sax->error(ctxt->userData,
1.66 daniel 3551: "NmToken expected in ATTLIST enumeration\n");
3552: ctxt->wellFormed = 0;
1.180 daniel 3553: ctxt->disableSAX = 1;
1.66 daniel 3554: return(ret);
3555: }
3556: cur = xmlCreateEnumeration(name);
1.119 daniel 3557: xmlFree(name);
1.66 daniel 3558: if (cur == NULL) return(ret);
3559: if (last == NULL) ret = last = cur;
3560: else {
3561: last->next = cur;
3562: last = cur;
3563: }
3564: SKIP_BLANKS;
1.152 daniel 3565: } while (RAW == '|');
3566: if (RAW != ')') {
1.230 ! veillard 3567: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3569: ctxt->sax->error(ctxt->userData,
1.66 daniel 3570: "')' required to finish ATTLIST enumeration\n");
3571: ctxt->wellFormed = 0;
1.180 daniel 3572: ctxt->disableSAX = 1;
1.66 daniel 3573: return(ret);
3574: }
3575: NEXT;
3576: return(ret);
3577: }
3578:
3579: /**
1.50 daniel 3580: * xmlParseEnumeratedType:
3581: * @ctxt: an XML parser context
1.66 daniel 3582: * @tree: the enumeration tree built while parsing
1.50 daniel 3583: *
1.66 daniel 3584: * parse an Enumerated attribute type.
1.22 daniel 3585: *
3586: * [57] EnumeratedType ::= NotationType | Enumeration
3587: *
3588: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3589: *
1.50 daniel 3590: *
1.66 daniel 3591: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3592: */
3593:
1.66 daniel 3594: int
3595: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 3596: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 3597: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3598: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3599: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3600: SKIP(8);
3601: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3602: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3604: ctxt->sax->error(ctxt->userData,
3605: "Space required after 'NOTATION'\n");
1.66 daniel 3606: ctxt->wellFormed = 0;
1.180 daniel 3607: ctxt->disableSAX = 1;
1.66 daniel 3608: return(0);
3609: }
3610: SKIP_BLANKS;
3611: *tree = xmlParseNotationType(ctxt);
3612: if (*tree == NULL) return(0);
3613: return(XML_ATTRIBUTE_NOTATION);
3614: }
3615: *tree = xmlParseEnumerationType(ctxt);
3616: if (*tree == NULL) return(0);
3617: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3618: }
3619:
1.50 daniel 3620: /**
3621: * xmlParseAttributeType:
3622: * @ctxt: an XML parser context
1.66 daniel 3623: * @tree: the enumeration tree built while parsing
1.50 daniel 3624: *
1.59 daniel 3625: * parse the Attribute list def for an element
1.22 daniel 3626: *
3627: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3628: *
3629: * [55] StringType ::= 'CDATA'
3630: *
3631: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3632: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3633: *
1.102 daniel 3634: * Validity constraints for attribute values syntax are checked in
3635: * xmlValidateAttributeValue()
3636: *
1.99 daniel 3637: * [ VC: ID ]
1.117 daniel 3638: * Values of type ID must match the Name production. A name must not
1.99 daniel 3639: * appear more than once in an XML document as a value of this type;
3640: * i.e., ID values must uniquely identify the elements which bear them.
3641: *
3642: * [ VC: One ID per Element Type ]
1.117 daniel 3643: * No element type may have more than one ID attribute specified.
1.99 daniel 3644: *
3645: * [ VC: ID Attribute Default ]
1.117 daniel 3646: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3647: *
3648: * [ VC: IDREF ]
1.102 daniel 3649: * Values of type IDREF must match the Name production, and values
1.140 daniel 3650: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 3651: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3652: * values must match the value of some ID attribute.
3653: *
3654: * [ VC: Entity Name ]
1.102 daniel 3655: * Values of type ENTITY must match the Name production, values
1.140 daniel 3656: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 3657: * name of an unparsed entity declared in the DTD.
1.99 daniel 3658: *
3659: * [ VC: Name Token ]
1.102 daniel 3660: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3661: * of type NMTOKENS must match Nmtokens.
3662: *
1.69 daniel 3663: * Returns the attribute type
1.22 daniel 3664: */
1.59 daniel 3665: int
1.66 daniel 3666: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3667: SHRINK;
1.152 daniel 3668: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 3669: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3670: (NXT(4) == 'A')) {
3671: SKIP(5);
1.66 daniel 3672: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 3673: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 3674: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3675: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3676: SKIP(6);
3677: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 3678: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 3679: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3680: (NXT(4) == 'F')) {
3681: SKIP(5);
1.59 daniel 3682: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 3683: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 3684: SKIP(2);
3685: return(XML_ATTRIBUTE_ID);
1.152 daniel 3686: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3687: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3688: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3689: SKIP(6);
1.59 daniel 3690: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 3691: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3692: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3693: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3694: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3695: SKIP(8);
1.59 daniel 3696: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 3697: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 3698: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3699: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3700: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3701: SKIP(8);
3702: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 3703: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 3704: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3705: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3706: (NXT(6) == 'N')) {
3707: SKIP(7);
1.59 daniel 3708: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3709: }
1.66 daniel 3710: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3711: }
3712:
1.50 daniel 3713: /**
3714: * xmlParseAttributeListDecl:
3715: * @ctxt: an XML parser context
3716: *
3717: * : parse the Attribute list def for an element
1.22 daniel 3718: *
3719: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3720: *
3721: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3722: *
1.22 daniel 3723: */
1.55 daniel 3724: void
3725: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3726: xmlChar *elemName;
3727: xmlChar *attrName;
1.103 daniel 3728: xmlEnumerationPtr tree;
1.22 daniel 3729:
1.152 daniel 3730: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3731: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3732: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3733: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3734: (NXT(8) == 'T')) {
1.187 daniel 3735: xmlParserInputPtr input = ctxt->input;
3736:
1.40 daniel 3737: SKIP(9);
1.59 daniel 3738: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3739: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3741: ctxt->sax->error(ctxt->userData,
3742: "Space required after '<!ATTLIST'\n");
1.59 daniel 3743: ctxt->wellFormed = 0;
1.180 daniel 3744: ctxt->disableSAX = 1;
1.59 daniel 3745: }
1.42 daniel 3746: SKIP_BLANKS;
1.59 daniel 3747: elemName = xmlParseName(ctxt);
3748: if (elemName == NULL) {
1.230 ! veillard 3749: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3750: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3751: ctxt->sax->error(ctxt->userData,
3752: "ATTLIST: no name for Element\n");
1.59 daniel 3753: ctxt->wellFormed = 0;
1.180 daniel 3754: ctxt->disableSAX = 1;
1.22 daniel 3755: return;
3756: }
1.42 daniel 3757: SKIP_BLANKS;
1.220 veillard 3758: GROW;
1.152 daniel 3759: while (RAW != '>') {
1.123 daniel 3760: const xmlChar *check = CUR_PTR;
1.59 daniel 3761: int type;
3762: int def;
1.123 daniel 3763: xmlChar *defaultValue = NULL;
1.59 daniel 3764:
1.220 veillard 3765: GROW;
1.103 daniel 3766: tree = NULL;
1.59 daniel 3767: attrName = xmlParseName(ctxt);
3768: if (attrName == NULL) {
1.230 ! veillard 3769: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3770: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3771: ctxt->sax->error(ctxt->userData,
3772: "ATTLIST: no name for Attribute\n");
1.59 daniel 3773: ctxt->wellFormed = 0;
1.180 daniel 3774: ctxt->disableSAX = 1;
1.59 daniel 3775: break;
3776: }
1.97 daniel 3777: GROW;
1.59 daniel 3778: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3779: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3780: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3781: ctxt->sax->error(ctxt->userData,
1.59 daniel 3782: "Space required after the attribute name\n");
3783: ctxt->wellFormed = 0;
1.180 daniel 3784: ctxt->disableSAX = 1;
1.170 daniel 3785: if (attrName != NULL)
3786: xmlFree(attrName);
3787: if (defaultValue != NULL)
3788: xmlFree(defaultValue);
1.59 daniel 3789: break;
3790: }
3791: SKIP_BLANKS;
3792:
1.66 daniel 3793: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 3794: if (type <= 0) {
3795: if (attrName != NULL)
3796: xmlFree(attrName);
3797: if (defaultValue != NULL)
3798: xmlFree(defaultValue);
3799: break;
3800: }
1.22 daniel 3801:
1.97 daniel 3802: GROW;
1.59 daniel 3803: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3804: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3805: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3806: ctxt->sax->error(ctxt->userData,
1.59 daniel 3807: "Space required after the attribute type\n");
3808: ctxt->wellFormed = 0;
1.180 daniel 3809: ctxt->disableSAX = 1;
1.170 daniel 3810: if (attrName != NULL)
3811: xmlFree(attrName);
3812: if (defaultValue != NULL)
3813: xmlFree(defaultValue);
3814: if (tree != NULL)
3815: xmlFreeEnumeration(tree);
1.59 daniel 3816: break;
3817: }
1.42 daniel 3818: SKIP_BLANKS;
1.59 daniel 3819:
3820: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 3821: if (def <= 0) {
3822: if (attrName != NULL)
3823: xmlFree(attrName);
3824: if (defaultValue != NULL)
3825: xmlFree(defaultValue);
3826: if (tree != NULL)
3827: xmlFreeEnumeration(tree);
3828: break;
3829: }
1.59 daniel 3830:
1.97 daniel 3831: GROW;
1.152 daniel 3832: if (RAW != '>') {
1.59 daniel 3833: if (!IS_BLANK(CUR)) {
1.230 ! veillard 3834: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3835: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3836: ctxt->sax->error(ctxt->userData,
1.59 daniel 3837: "Space required after the attribute default value\n");
3838: ctxt->wellFormed = 0;
1.180 daniel 3839: ctxt->disableSAX = 1;
1.170 daniel 3840: if (attrName != NULL)
3841: xmlFree(attrName);
3842: if (defaultValue != NULL)
3843: xmlFree(defaultValue);
3844: if (tree != NULL)
3845: xmlFreeEnumeration(tree);
1.59 daniel 3846: break;
3847: }
3848: SKIP_BLANKS;
3849: }
1.40 daniel 3850: if (check == CUR_PTR) {
1.230 ! veillard 3851: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3853: ctxt->sax->error(ctxt->userData,
1.59 daniel 3854: "xmlParseAttributeListDecl: detected internal error\n");
1.170 daniel 3855: if (attrName != NULL)
3856: xmlFree(attrName);
3857: if (defaultValue != NULL)
3858: xmlFree(defaultValue);
3859: if (tree != NULL)
3860: xmlFreeEnumeration(tree);
1.22 daniel 3861: break;
3862: }
1.171 daniel 3863: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3864: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3865: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3866: type, def, defaultValue, tree);
1.59 daniel 3867: if (attrName != NULL)
1.119 daniel 3868: xmlFree(attrName);
1.59 daniel 3869: if (defaultValue != NULL)
1.119 daniel 3870: xmlFree(defaultValue);
1.97 daniel 3871: GROW;
1.22 daniel 3872: }
1.187 daniel 3873: if (RAW == '>') {
3874: if (input != ctxt->input) {
1.230 ! veillard 3875: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3877: ctxt->sax->error(ctxt->userData,
3878: "Attribute list declaration doesn't start and stop in the same entity\n");
3879: ctxt->wellFormed = 0;
3880: ctxt->disableSAX = 1;
3881: }
1.40 daniel 3882: NEXT;
1.187 daniel 3883: }
1.22 daniel 3884:
1.119 daniel 3885: xmlFree(elemName);
1.22 daniel 3886: }
3887: }
3888:
1.50 daniel 3889: /**
1.61 daniel 3890: * xmlParseElementMixedContentDecl:
3891: * @ctxt: an XML parser context
3892: *
3893: * parse the declaration for a Mixed Element content
3894: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3895: *
3896: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3897: * '(' S? '#PCDATA' S? ')'
3898: *
1.99 daniel 3899: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3900: *
3901: * [ VC: No Duplicate Types ]
1.117 daniel 3902: * The same name must not appear more than once in a single
3903: * mixed-content declaration.
1.99 daniel 3904: *
1.61 daniel 3905: * returns: the list of the xmlElementContentPtr describing the element choices
3906: */
3907: xmlElementContentPtr
1.62 daniel 3908: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3909: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3910: xmlChar *elem = NULL;
1.61 daniel 3911:
1.97 daniel 3912: GROW;
1.152 daniel 3913: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 3914: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3915: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3916: (NXT(6) == 'A')) {
3917: SKIP(7);
3918: SKIP_BLANKS;
1.91 daniel 3919: SHRINK;
1.152 daniel 3920: if (RAW == ')') {
1.187 daniel 3921: ctxt->entity = ctxt->input;
1.63 daniel 3922: NEXT;
3923: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 3924: if (RAW == '*') {
1.136 daniel 3925: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3926: NEXT;
3927: }
1.63 daniel 3928: return(ret);
3929: }
1.152 daniel 3930: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 3931: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3932: if (ret == NULL) return(NULL);
1.99 daniel 3933: }
1.152 daniel 3934: while (RAW == '|') {
1.64 daniel 3935: NEXT;
1.61 daniel 3936: if (elem == NULL) {
3937: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3938: if (ret == NULL) return(NULL);
3939: ret->c1 = cur;
1.64 daniel 3940: cur = ret;
1.61 daniel 3941: } else {
1.64 daniel 3942: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3943: if (n == NULL) return(NULL);
3944: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3945: cur->c2 = n;
3946: cur = n;
1.119 daniel 3947: xmlFree(elem);
1.61 daniel 3948: }
3949: SKIP_BLANKS;
3950: elem = xmlParseName(ctxt);
3951: if (elem == NULL) {
1.230 ! veillard 3952: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3953: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3954: ctxt->sax->error(ctxt->userData,
1.61 daniel 3955: "xmlParseElementMixedContentDecl : Name expected\n");
3956: ctxt->wellFormed = 0;
1.180 daniel 3957: ctxt->disableSAX = 1;
1.61 daniel 3958: xmlFreeElementContent(cur);
3959: return(NULL);
3960: }
3961: SKIP_BLANKS;
1.97 daniel 3962: GROW;
1.61 daniel 3963: }
1.152 daniel 3964: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 3965: if (elem != NULL) {
1.61 daniel 3966: cur->c2 = xmlNewElementContent(elem,
3967: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3968: xmlFree(elem);
1.66 daniel 3969: }
1.65 daniel 3970: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 3971: ctxt->entity = ctxt->input;
1.64 daniel 3972: SKIP(2);
1.61 daniel 3973: } else {
1.119 daniel 3974: if (elem != NULL) xmlFree(elem);
1.230 ! veillard 3975: xmlFreeElementContent(ret);
! 3976: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3978: ctxt->sax->error(ctxt->userData,
1.63 daniel 3979: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3980: ctxt->wellFormed = 0;
1.180 daniel 3981: ctxt->disableSAX = 1;
1.61 daniel 3982: return(NULL);
3983: }
3984:
3985: } else {
1.230 ! veillard 3986: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3988: ctxt->sax->error(ctxt->userData,
1.61 daniel 3989: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3990: ctxt->wellFormed = 0;
1.180 daniel 3991: ctxt->disableSAX = 1;
1.61 daniel 3992: }
3993: return(ret);
3994: }
3995:
3996: /**
3997: * xmlParseElementChildrenContentDecl:
1.50 daniel 3998: * @ctxt: an XML parser context
3999: *
1.61 daniel 4000: * parse the declaration for a Mixed Element content
4001: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 4002: *
1.61 daniel 4003: *
1.22 daniel 4004: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4005: *
4006: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4007: *
4008: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4009: *
4010: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4011: *
1.99 daniel 4012: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4013: * TODO Parameter-entity replacement text must be properly nested
4014: * with parenthetized groups. That is to say, if either of the
4015: * opening or closing parentheses in a choice, seq, or Mixed
4016: * construct is contained in the replacement text for a parameter
4017: * entity, both must be contained in the same replacement text. For
4018: * interoperability, if a parameter-entity reference appears in a
4019: * choice, seq, or Mixed construct, its replacement text should not
4020: * be empty, and neither the first nor last non-blank character of
4021: * the replacement text should be a connector (| or ,).
4022: *
1.62 daniel 4023: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 4024: * hierarchy.
4025: */
4026: xmlElementContentPtr
1.62 daniel 4027: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 4028: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 4029: xmlChar *elem;
4030: xmlChar type = 0;
1.62 daniel 4031:
4032: SKIP_BLANKS;
1.94 daniel 4033: GROW;
1.152 daniel 4034: if (RAW == '(') {
1.63 daniel 4035: /* Recurse on first child */
1.62 daniel 4036: NEXT;
4037: SKIP_BLANKS;
4038: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4039: SKIP_BLANKS;
1.101 daniel 4040: GROW;
1.62 daniel 4041: } else {
4042: elem = xmlParseName(ctxt);
4043: if (elem == NULL) {
1.230 ! veillard 4044: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4046: ctxt->sax->error(ctxt->userData,
1.62 daniel 4047: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4048: ctxt->wellFormed = 0;
1.180 daniel 4049: ctxt->disableSAX = 1;
1.62 daniel 4050: return(NULL);
4051: }
4052: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 4053: GROW;
1.152 daniel 4054: if (RAW == '?') {
1.104 daniel 4055: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 4056: NEXT;
1.152 daniel 4057: } else if (RAW == '*') {
1.104 daniel 4058: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 4059: NEXT;
1.152 daniel 4060: } else if (RAW == '+') {
1.104 daniel 4061: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 4062: NEXT;
4063: } else {
1.104 daniel 4064: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 4065: }
1.119 daniel 4066: xmlFree(elem);
1.101 daniel 4067: GROW;
1.62 daniel 4068: }
4069: SKIP_BLANKS;
1.91 daniel 4070: SHRINK;
1.152 daniel 4071: while (RAW != ')') {
1.63 daniel 4072: /*
4073: * Each loop we parse one separator and one element.
4074: */
1.152 daniel 4075: if (RAW == ',') {
1.62 daniel 4076: if (type == 0) type = CUR;
4077:
4078: /*
4079: * Detect "Name | Name , Name" error
4080: */
4081: else if (type != CUR) {
1.230 ! veillard 4082: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 4083: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4084: ctxt->sax->error(ctxt->userData,
1.62 daniel 4085: "xmlParseElementChildrenContentDecl : '%c' expected\n",
4086: type);
4087: ctxt->wellFormed = 0;
1.180 daniel 4088: ctxt->disableSAX = 1;
1.170 daniel 4089: if ((op != NULL) && (op != ret))
4090: xmlFreeElementContent(op);
1.211 veillard 4091: if ((last != NULL) && (last != ret) &&
4092: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4093: xmlFreeElementContent(last);
4094: if (ret != NULL)
4095: xmlFreeElementContent(ret);
1.62 daniel 4096: return(NULL);
4097: }
1.64 daniel 4098: NEXT;
1.62 daniel 4099:
1.63 daniel 4100: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4101: if (op == NULL) {
4102: xmlFreeElementContent(ret);
4103: return(NULL);
4104: }
4105: if (last == NULL) {
4106: op->c1 = ret;
1.65 daniel 4107: ret = cur = op;
1.63 daniel 4108: } else {
4109: cur->c2 = op;
4110: op->c1 = last;
4111: cur =op;
1.65 daniel 4112: last = NULL;
1.63 daniel 4113: }
1.152 daniel 4114: } else if (RAW == '|') {
1.62 daniel 4115: if (type == 0) type = CUR;
4116:
4117: /*
1.63 daniel 4118: * Detect "Name , Name | Name" error
1.62 daniel 4119: */
4120: else if (type != CUR) {
1.230 ! veillard 4121: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 4122: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4123: ctxt->sax->error(ctxt->userData,
1.62 daniel 4124: "xmlParseElementChildrenContentDecl : '%c' expected\n",
4125: type);
4126: ctxt->wellFormed = 0;
1.180 daniel 4127: ctxt->disableSAX = 1;
1.211 veillard 4128: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 4129: xmlFreeElementContent(op);
1.211 veillard 4130: if ((last != NULL) && (last != ret) &&
4131: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4132: xmlFreeElementContent(last);
4133: if (ret != NULL)
4134: xmlFreeElementContent(ret);
1.62 daniel 4135: return(NULL);
4136: }
1.64 daniel 4137: NEXT;
1.62 daniel 4138:
1.63 daniel 4139: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4140: if (op == NULL) {
1.170 daniel 4141: if ((op != NULL) && (op != ret))
4142: xmlFreeElementContent(op);
1.211 veillard 4143: if ((last != NULL) && (last != ret) &&
4144: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4145: xmlFreeElementContent(last);
4146: if (ret != NULL)
4147: xmlFreeElementContent(ret);
1.63 daniel 4148: return(NULL);
4149: }
4150: if (last == NULL) {
4151: op->c1 = ret;
1.65 daniel 4152: ret = cur = op;
1.63 daniel 4153: } else {
4154: cur->c2 = op;
4155: op->c1 = last;
4156: cur =op;
1.65 daniel 4157: last = NULL;
1.63 daniel 4158: }
1.62 daniel 4159: } else {
1.230 ! veillard 4160: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 4161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4162: ctxt->sax->error(ctxt->userData,
1.62 daniel 4163: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4164: ctxt->wellFormed = 0;
1.180 daniel 4165: ctxt->disableSAX = 1;
1.170 daniel 4166: if ((op != NULL) && (op != ret))
4167: xmlFreeElementContent(op);
1.211 veillard 4168: if ((last != NULL) && (last != ret) &&
4169: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4170: xmlFreeElementContent(last);
4171: if (ret != NULL)
4172: xmlFreeElementContent(ret);
1.62 daniel 4173: return(NULL);
4174: }
1.101 daniel 4175: GROW;
1.62 daniel 4176: SKIP_BLANKS;
1.101 daniel 4177: GROW;
1.152 daniel 4178: if (RAW == '(') {
1.63 daniel 4179: /* Recurse on second child */
1.62 daniel 4180: NEXT;
4181: SKIP_BLANKS;
1.65 daniel 4182: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4183: SKIP_BLANKS;
4184: } else {
4185: elem = xmlParseName(ctxt);
4186: if (elem == NULL) {
1.230 ! veillard 4187: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4189: ctxt->sax->error(ctxt->userData,
1.122 daniel 4190: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.62 daniel 4191: ctxt->wellFormed = 0;
1.180 daniel 4192: ctxt->disableSAX = 1;
1.170 daniel 4193: if ((op != NULL) && (op != ret))
4194: xmlFreeElementContent(op);
1.211 veillard 4195: if ((last != NULL) && (last != ret) &&
4196: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4197: xmlFreeElementContent(last);
4198: if (ret != NULL)
4199: xmlFreeElementContent(ret);
1.62 daniel 4200: return(NULL);
4201: }
1.65 daniel 4202: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4203: xmlFree(elem);
1.152 daniel 4204: if (RAW == '?') {
1.105 daniel 4205: last->ocur = XML_ELEMENT_CONTENT_OPT;
4206: NEXT;
1.152 daniel 4207: } else if (RAW == '*') {
1.105 daniel 4208: last->ocur = XML_ELEMENT_CONTENT_MULT;
4209: NEXT;
1.152 daniel 4210: } else if (RAW == '+') {
1.105 daniel 4211: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4212: NEXT;
4213: } else {
4214: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4215: }
1.63 daniel 4216: }
4217: SKIP_BLANKS;
1.97 daniel 4218: GROW;
1.64 daniel 4219: }
1.65 daniel 4220: if ((cur != NULL) && (last != NULL)) {
4221: cur->c2 = last;
1.62 daniel 4222: }
1.187 daniel 4223: ctxt->entity = ctxt->input;
1.62 daniel 4224: NEXT;
1.152 daniel 4225: if (RAW == '?') {
1.62 daniel 4226: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4227: NEXT;
1.152 daniel 4228: } else if (RAW == '*') {
1.62 daniel 4229: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4230: NEXT;
1.152 daniel 4231: } else if (RAW == '+') {
1.62 daniel 4232: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4233: NEXT;
4234: }
4235: return(ret);
1.61 daniel 4236: }
4237:
4238: /**
4239: * xmlParseElementContentDecl:
4240: * @ctxt: an XML parser context
4241: * @name: the name of the element being defined.
4242: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4243: *
1.61 daniel 4244: * parse the declaration for an Element content either Mixed or Children,
4245: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4246: *
4247: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4248: *
1.61 daniel 4249: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4250: */
4251:
1.61 daniel 4252: int
1.123 daniel 4253: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4254: xmlElementContentPtr *result) {
4255:
4256: xmlElementContentPtr tree = NULL;
1.187 daniel 4257: xmlParserInputPtr input = ctxt->input;
1.61 daniel 4258: int res;
4259:
4260: *result = NULL;
4261:
1.152 daniel 4262: if (RAW != '(') {
1.230 ! veillard 4263: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4264: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4265: ctxt->sax->error(ctxt->userData,
1.61 daniel 4266: "xmlParseElementContentDecl : '(' expected\n");
4267: ctxt->wellFormed = 0;
1.180 daniel 4268: ctxt->disableSAX = 1;
1.61 daniel 4269: return(-1);
4270: }
4271: NEXT;
1.97 daniel 4272: GROW;
1.61 daniel 4273: SKIP_BLANKS;
1.152 daniel 4274: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4275: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4276: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4277: (NXT(6) == 'A')) {
1.62 daniel 4278: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4279: res = XML_ELEMENT_TYPE_MIXED;
4280: } else {
1.62 daniel 4281: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4282: res = XML_ELEMENT_TYPE_ELEMENT;
4283: }
1.187 daniel 4284: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
1.230 ! veillard 4285: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4286: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4287: ctxt->sax->error(ctxt->userData,
4288: "Element content declaration doesn't start and stop in the same entity\n");
4289: ctxt->wellFormed = 0;
4290: ctxt->disableSAX = 1;
4291: }
1.61 daniel 4292: SKIP_BLANKS;
1.63 daniel 4293: *result = tree;
1.61 daniel 4294: return(res);
1.22 daniel 4295: }
4296:
1.50 daniel 4297: /**
4298: * xmlParseElementDecl:
4299: * @ctxt: an XML parser context
4300: *
4301: * parse an Element declaration.
1.22 daniel 4302: *
4303: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4304: *
1.99 daniel 4305: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4306: * No element type may be declared more than once
1.69 daniel 4307: *
4308: * Returns the type of the element, or -1 in case of error
1.22 daniel 4309: */
1.59 daniel 4310: int
1.55 daniel 4311: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4312: xmlChar *name;
1.59 daniel 4313: int ret = -1;
1.61 daniel 4314: xmlElementContentPtr content = NULL;
1.22 daniel 4315:
1.97 daniel 4316: GROW;
1.152 daniel 4317: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4318: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4319: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4320: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4321: (NXT(8) == 'T')) {
1.187 daniel 4322: xmlParserInputPtr input = ctxt->input;
4323:
1.40 daniel 4324: SKIP(9);
1.59 daniel 4325: if (!IS_BLANK(CUR)) {
1.230 ! veillard 4326: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4328: ctxt->sax->error(ctxt->userData,
1.59 daniel 4329: "Space required after 'ELEMENT'\n");
4330: ctxt->wellFormed = 0;
1.180 daniel 4331: ctxt->disableSAX = 1;
1.59 daniel 4332: }
1.42 daniel 4333: SKIP_BLANKS;
1.22 daniel 4334: name = xmlParseName(ctxt);
4335: if (name == NULL) {
1.230 ! veillard 4336: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4338: ctxt->sax->error(ctxt->userData,
1.59 daniel 4339: "xmlParseElementDecl: no name for Element\n");
4340: ctxt->wellFormed = 0;
1.180 daniel 4341: ctxt->disableSAX = 1;
1.59 daniel 4342: return(-1);
4343: }
4344: if (!IS_BLANK(CUR)) {
1.230 ! veillard 4345: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4346: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4347: ctxt->sax->error(ctxt->userData,
1.59 daniel 4348: "Space required after the element name\n");
4349: ctxt->wellFormed = 0;
1.180 daniel 4350: ctxt->disableSAX = 1;
1.22 daniel 4351: }
1.42 daniel 4352: SKIP_BLANKS;
1.152 daniel 4353: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 4354: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4355: (NXT(4) == 'Y')) {
4356: SKIP(5);
1.22 daniel 4357: /*
4358: * Element must always be empty.
4359: */
1.59 daniel 4360: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 4361: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 4362: (NXT(2) == 'Y')) {
4363: SKIP(3);
1.22 daniel 4364: /*
4365: * Element is a generic container.
4366: */
1.59 daniel 4367: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 4368: } else if (RAW == '(') {
1.61 daniel 4369: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4370: } else {
1.98 daniel 4371: /*
4372: * [ WFC: PEs in Internal Subset ] error handling.
4373: */
1.152 daniel 4374: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 4375: (ctxt->inputNr == 1)) {
1.230 ! veillard 4376: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4378: ctxt->sax->error(ctxt->userData,
4379: "PEReference: forbidden within markup decl in internal subset\n");
4380: } else {
1.230 ! veillard 4381: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4383: ctxt->sax->error(ctxt->userData,
4384: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4385: }
1.61 daniel 4386: ctxt->wellFormed = 0;
1.180 daniel 4387: ctxt->disableSAX = 1;
1.119 daniel 4388: if (name != NULL) xmlFree(name);
1.61 daniel 4389: return(-1);
1.22 daniel 4390: }
1.142 daniel 4391:
4392: SKIP_BLANKS;
4393: /*
4394: * Pop-up of finished entities.
4395: */
1.152 daniel 4396: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 4397: xmlPopInput(ctxt);
1.42 daniel 4398: SKIP_BLANKS;
1.142 daniel 4399:
1.152 daniel 4400: if (RAW != '>') {
1.230 ! veillard 4401: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 4402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4403: ctxt->sax->error(ctxt->userData,
1.31 daniel 4404: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4405: ctxt->wellFormed = 0;
1.180 daniel 4406: ctxt->disableSAX = 1;
1.61 daniel 4407: } else {
1.187 daniel 4408: if (input != ctxt->input) {
1.230 ! veillard 4409: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4410: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4411: ctxt->sax->error(ctxt->userData,
4412: "Element declaration doesn't start and stop in the same entity\n");
4413: ctxt->wellFormed = 0;
4414: ctxt->disableSAX = 1;
4415: }
4416:
1.40 daniel 4417: NEXT;
1.171 daniel 4418: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4419: (ctxt->sax->elementDecl != NULL))
1.76 daniel 4420: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4421: content);
1.61 daniel 4422: }
1.84 daniel 4423: if (content != NULL) {
4424: xmlFreeElementContent(content);
4425: }
1.61 daniel 4426: if (name != NULL) {
1.119 daniel 4427: xmlFree(name);
1.61 daniel 4428: }
1.22 daniel 4429: }
1.59 daniel 4430: return(ret);
1.22 daniel 4431: }
4432:
1.50 daniel 4433: /**
4434: * xmlParseMarkupDecl:
4435: * @ctxt: an XML parser context
4436: *
4437: * parse Markup declarations
1.22 daniel 4438: *
4439: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4440: * NotationDecl | PI | Comment
4441: *
1.98 daniel 4442: * [ VC: Proper Declaration/PE Nesting ]
1.229 veillard 4443: * Parameter-entity replacement text must be properly nested with
1.98 daniel 4444: * markup declarations. That is to say, if either the first character
4445: * or the last character of a markup declaration (markupdecl above) is
4446: * contained in the replacement text for a parameter-entity reference,
4447: * both must be contained in the same replacement text.
4448: *
4449: * [ WFC: PEs in Internal Subset ]
4450: * In the internal DTD subset, parameter-entity references can occur
4451: * only where markup declarations can occur, not within markup declarations.
4452: * (This does not apply to references that occur in external parameter
4453: * entities or to the external subset.)
1.22 daniel 4454: */
1.55 daniel 4455: void
4456: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4457: GROW;
1.22 daniel 4458: xmlParseElementDecl(ctxt);
4459: xmlParseAttributeListDecl(ctxt);
4460: xmlParseEntityDecl(ctxt);
4461: xmlParseNotationDecl(ctxt);
4462: xmlParsePI(ctxt);
1.114 daniel 4463: xmlParseComment(ctxt);
1.98 daniel 4464: /*
4465: * This is only for internal subset. On external entities,
4466: * the replacement is done before parsing stage
4467: */
4468: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4469: xmlParsePEReference(ctxt);
1.97 daniel 4470: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4471: }
4472:
1.50 daniel 4473: /**
1.76 daniel 4474: * xmlParseTextDecl:
4475: * @ctxt: an XML parser context
4476: *
4477: * parse an XML declaration header for external entities
4478: *
4479: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 4480: *
4481: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 4482: */
4483:
1.172 daniel 4484: void
1.76 daniel 4485: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4486: xmlChar *version;
1.76 daniel 4487:
4488: /*
4489: * We know that '<?xml' is here.
4490: */
1.193 daniel 4491: if ((RAW == '<') && (NXT(1) == '?') &&
4492: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4493: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4494: SKIP(5);
4495: } else {
1.230 ! veillard 4496: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
1.193 daniel 4497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4498: ctxt->sax->error(ctxt->userData,
4499: "Text declaration '<?xml' required\n");
4500: ctxt->wellFormed = 0;
4501: ctxt->disableSAX = 1;
4502:
4503: return;
4504: }
1.76 daniel 4505:
4506: if (!IS_BLANK(CUR)) {
1.230 ! veillard 4507: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4509: ctxt->sax->error(ctxt->userData,
4510: "Space needed after '<?xml'\n");
1.76 daniel 4511: ctxt->wellFormed = 0;
1.180 daniel 4512: ctxt->disableSAX = 1;
1.76 daniel 4513: }
4514: SKIP_BLANKS;
4515:
4516: /*
4517: * We may have the VersionInfo here.
4518: */
4519: version = xmlParseVersionInfo(ctxt);
4520: if (version == NULL)
4521: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 4522: ctxt->input->version = version;
1.76 daniel 4523:
4524: /*
4525: * We must have the encoding declaration
4526: */
4527: if (!IS_BLANK(CUR)) {
1.230 ! veillard 4528: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4529: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4530: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.76 daniel 4531: ctxt->wellFormed = 0;
1.180 daniel 4532: ctxt->disableSAX = 1;
1.76 daniel 4533: }
1.195 daniel 4534: xmlParseEncodingDecl(ctxt);
1.193 daniel 4535: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4536: /*
4537: * The XML REC instructs us to stop parsing right here
4538: */
4539: return;
4540: }
1.76 daniel 4541:
4542: SKIP_BLANKS;
1.152 daniel 4543: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 4544: SKIP(2);
1.152 daniel 4545: } else if (RAW == '>') {
1.76 daniel 4546: /* Deprecated old WD ... */
1.230 ! veillard 4547: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4549: ctxt->sax->error(ctxt->userData,
4550: "XML declaration must end-up with '?>'\n");
1.76 daniel 4551: ctxt->wellFormed = 0;
1.180 daniel 4552: ctxt->disableSAX = 1;
1.76 daniel 4553: NEXT;
4554: } else {
1.230 ! veillard 4555: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4557: ctxt->sax->error(ctxt->userData,
4558: "parsing XML declaration: '?>' expected\n");
1.76 daniel 4559: ctxt->wellFormed = 0;
1.180 daniel 4560: ctxt->disableSAX = 1;
1.76 daniel 4561: MOVETO_ENDTAG(CUR_PTR);
4562: NEXT;
4563: }
4564: }
4565:
4566: /*
4567: * xmlParseConditionalSections
4568: * @ctxt: an XML parser context
4569: *
4570: * [61] conditionalSect ::= includeSect | ignoreSect
4571: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4572: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4573: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4574: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4575: */
4576:
4577: void
4578: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 4579: SKIP(3);
4580: SKIP_BLANKS;
1.168 daniel 4581: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4582: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4583: (NXT(6) == 'E')) {
1.165 daniel 4584: SKIP(7);
1.168 daniel 4585: SKIP_BLANKS;
4586: if (RAW != '[') {
1.230 ! veillard 4587: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4589: ctxt->sax->error(ctxt->userData,
4590: "XML conditional section '[' expected\n");
4591: ctxt->wellFormed = 0;
1.180 daniel 4592: ctxt->disableSAX = 1;
1.168 daniel 4593: } else {
4594: NEXT;
4595: }
1.220 veillard 4596: if (xmlParserDebugEntities) {
4597: if ((ctxt->input != NULL) && (ctxt->input->filename))
4598: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4599: ctxt->input->line);
4600: fprintf(stderr, "Entering INCLUDE Conditional Section\n");
4601: }
4602:
1.165 daniel 4603: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4604: (NXT(2) != '>'))) {
4605: const xmlChar *check = CUR_PTR;
4606: int cons = ctxt->input->consumed;
4607: int tok = ctxt->token;
4608:
4609: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4610: xmlParseConditionalSections(ctxt);
4611: } else if (IS_BLANK(CUR)) {
4612: NEXT;
4613: } else if (RAW == '%') {
4614: xmlParsePEReference(ctxt);
4615: } else
4616: xmlParseMarkupDecl(ctxt);
4617:
4618: /*
4619: * Pop-up of finished entities.
4620: */
4621: while ((RAW == 0) && (ctxt->inputNr > 1))
4622: xmlPopInput(ctxt);
4623:
4624: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4625: (tok == ctxt->token)) {
1.230 ! veillard 4626: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.165 daniel 4627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4628: ctxt->sax->error(ctxt->userData,
4629: "Content error in the external subset\n");
4630: ctxt->wellFormed = 0;
1.180 daniel 4631: ctxt->disableSAX = 1;
1.165 daniel 4632: break;
4633: }
4634: }
1.220 veillard 4635: if (xmlParserDebugEntities) {
4636: if ((ctxt->input != NULL) && (ctxt->input->filename))
4637: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4638: ctxt->input->line);
4639: fprintf(stderr, "Leaving INCLUDE Conditional Section\n");
4640: }
4641:
1.168 daniel 4642: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4643: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 4644: int state;
4645:
1.168 daniel 4646: SKIP(6);
4647: SKIP_BLANKS;
4648: if (RAW != '[') {
1.230 ! veillard 4649: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4651: ctxt->sax->error(ctxt->userData,
4652: "XML conditional section '[' expected\n");
4653: ctxt->wellFormed = 0;
1.180 daniel 4654: ctxt->disableSAX = 1;
1.168 daniel 4655: } else {
4656: NEXT;
4657: }
1.220 veillard 4658: if (xmlParserDebugEntities) {
4659: if ((ctxt->input != NULL) && (ctxt->input->filename))
4660: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4661: ctxt->input->line);
4662: fprintf(stderr, "Entering IGNORE Conditional Section\n");
4663: }
1.171 daniel 4664:
1.143 daniel 4665: /*
1.171 daniel 4666: * Parse up to the end of the conditionnal section
4667: * But disable SAX event generating DTD building in the meantime
1.143 daniel 4668: */
1.171 daniel 4669: state = ctxt->disableSAX;
1.220 veillard 4670: ctxt->disableSAX = 1;
1.165 daniel 4671: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4672: (NXT(2) != '>'))) {
1.171 daniel 4673: const xmlChar *check = CUR_PTR;
4674: int cons = ctxt->input->consumed;
4675: int tok = ctxt->token;
4676:
4677: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4678: xmlParseConditionalSections(ctxt);
4679: } else if (IS_BLANK(CUR)) {
4680: NEXT;
4681: } else if (RAW == '%') {
4682: xmlParsePEReference(ctxt);
4683: } else
4684: xmlParseMarkupDecl(ctxt);
4685:
1.165 daniel 4686: /*
4687: * Pop-up of finished entities.
4688: */
4689: while ((RAW == 0) && (ctxt->inputNr > 1))
4690: xmlPopInput(ctxt);
1.143 daniel 4691:
1.171 daniel 4692: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4693: (tok == ctxt->token)) {
1.230 ! veillard 4694: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.171 daniel 4695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4696: ctxt->sax->error(ctxt->userData,
4697: "Content error in the external subset\n");
4698: ctxt->wellFormed = 0;
1.180 daniel 4699: ctxt->disableSAX = 1;
1.171 daniel 4700: break;
4701: }
1.165 daniel 4702: }
1.171 daniel 4703: ctxt->disableSAX = state;
1.220 veillard 4704: if (xmlParserDebugEntities) {
4705: if ((ctxt->input != NULL) && (ctxt->input->filename))
4706: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4707: ctxt->input->line);
4708: fprintf(stderr, "Leaving IGNORE Conditional Section\n");
4709: }
4710:
1.168 daniel 4711: } else {
1.230 ! veillard 4712: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4714: ctxt->sax->error(ctxt->userData,
4715: "XML conditional section INCLUDE or IGNORE keyword expected\n");
4716: ctxt->wellFormed = 0;
1.180 daniel 4717: ctxt->disableSAX = 1;
1.143 daniel 4718: }
4719:
1.152 daniel 4720: if (RAW == 0)
1.143 daniel 4721: SHRINK;
4722:
1.152 daniel 4723: if (RAW == 0) {
1.230 ! veillard 4724: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4725: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4726: ctxt->sax->error(ctxt->userData,
4727: "XML conditional section not closed\n");
4728: ctxt->wellFormed = 0;
1.180 daniel 4729: ctxt->disableSAX = 1;
1.143 daniel 4730: } else {
4731: SKIP(3);
1.76 daniel 4732: }
4733: }
4734:
4735: /**
1.124 daniel 4736: * xmlParseExternalSubset:
1.76 daniel 4737: * @ctxt: an XML parser context
1.124 daniel 4738: * @ExternalID: the external identifier
4739: * @SystemID: the system identifier (or URL)
1.76 daniel 4740: *
4741: * parse Markup declarations from an external subset
4742: *
4743: * [30] extSubset ::= textDecl? extSubsetDecl
4744: *
4745: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4746: */
4747: void
1.123 daniel 4748: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4749: const xmlChar *SystemID) {
1.132 daniel 4750: GROW;
1.152 daniel 4751: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 4752: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4753: (NXT(4) == 'l')) {
1.172 daniel 4754: xmlParseTextDecl(ctxt);
1.193 daniel 4755: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4756: /*
4757: * The XML REC instructs us to stop parsing right here
4758: */
4759: ctxt->instate = XML_PARSER_EOF;
4760: return;
4761: }
1.76 daniel 4762: }
1.79 daniel 4763: if (ctxt->myDoc == NULL) {
1.116 daniel 4764: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4765: }
4766: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4767: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4768:
1.96 daniel 4769: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4770: ctxt->external = 1;
1.152 daniel 4771: while (((RAW == '<') && (NXT(1) == '?')) ||
4772: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 4773: IS_BLANK(CUR)) {
1.123 daniel 4774: const xmlChar *check = CUR_PTR;
1.115 daniel 4775: int cons = ctxt->input->consumed;
1.164 daniel 4776: int tok = ctxt->token;
1.115 daniel 4777:
1.221 veillard 4778: GROW;
1.152 daniel 4779: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 4780: xmlParseConditionalSections(ctxt);
4781: } else if (IS_BLANK(CUR)) {
4782: NEXT;
1.152 daniel 4783: } else if (RAW == '%') {
1.76 daniel 4784: xmlParsePEReference(ctxt);
4785: } else
4786: xmlParseMarkupDecl(ctxt);
1.77 daniel 4787:
4788: /*
4789: * Pop-up of finished entities.
4790: */
1.166 daniel 4791: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 4792: xmlPopInput(ctxt);
4793:
1.164 daniel 4794: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4795: (tok == ctxt->token)) {
1.230 ! veillard 4796: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4798: ctxt->sax->error(ctxt->userData,
4799: "Content error in the external subset\n");
4800: ctxt->wellFormed = 0;
1.180 daniel 4801: ctxt->disableSAX = 1;
1.115 daniel 4802: break;
4803: }
1.76 daniel 4804: }
4805:
1.152 daniel 4806: if (RAW != 0) {
1.230 ! veillard 4807: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4809: ctxt->sax->error(ctxt->userData,
4810: "Extra content at the end of the document\n");
4811: ctxt->wellFormed = 0;
1.180 daniel 4812: ctxt->disableSAX = 1;
1.76 daniel 4813: }
4814:
4815: }
4816:
4817: /**
1.77 daniel 4818: * xmlParseReference:
4819: * @ctxt: an XML parser context
4820: *
4821: * parse and handle entity references in content, depending on the SAX
4822: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4823: * CharRef, a predefined entity, if there is no reference() callback.
4824: * or if the parser was asked to switch to that mode.
1.77 daniel 4825: *
4826: * [67] Reference ::= EntityRef | CharRef
4827: */
4828: void
4829: xmlParseReference(xmlParserCtxtPtr ctxt) {
4830: xmlEntityPtr ent;
1.123 daniel 4831: xmlChar *val;
1.152 daniel 4832: if (RAW != '&') return;
1.77 daniel 4833:
4834: if (NXT(1) == '#') {
1.152 daniel 4835: int i = 0;
1.153 daniel 4836: xmlChar out[10];
4837: int hex = NXT(2);
1.77 daniel 4838: int val = xmlParseCharRef(ctxt);
1.152 daniel 4839:
1.198 daniel 4840: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 4841: /*
4842: * So we are using non-UTF-8 buffers
4843: * Check that the char fit on 8bits, if not
4844: * generate a CharRef.
4845: */
4846: if (val <= 0xFF) {
4847: out[0] = val;
4848: out[1] = 0;
1.171 daniel 4849: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4850: (!ctxt->disableSAX))
1.153 daniel 4851: ctxt->sax->characters(ctxt->userData, out, 1);
4852: } else {
4853: if ((hex == 'x') || (hex == 'X'))
4854: sprintf((char *)out, "#x%X", val);
4855: else
4856: sprintf((char *)out, "#%d", val);
1.171 daniel 4857: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4858: (!ctxt->disableSAX))
1.153 daniel 4859: ctxt->sax->reference(ctxt->userData, out);
4860: }
4861: } else {
4862: /*
4863: * Just encode the value in UTF-8
4864: */
4865: COPY_BUF(0 ,out, i, val);
4866: out[i] = 0;
1.171 daniel 4867: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4868: (!ctxt->disableSAX))
1.153 daniel 4869: ctxt->sax->characters(ctxt->userData, out, i);
4870: }
1.77 daniel 4871: } else {
4872: ent = xmlParseEntityRef(ctxt);
4873: if (ent == NULL) return;
4874: if ((ent->name != NULL) &&
1.159 daniel 4875: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 4876: xmlNodePtr list = NULL;
4877: int ret;
4878:
4879:
4880: /*
4881: * The first reference to the entity trigger a parsing phase
4882: * where the ent->children is filled with the result from
4883: * the parsing.
4884: */
4885: if (ent->children == NULL) {
4886: xmlChar *value;
4887: value = ent->content;
4888:
4889: /*
4890: * Check that this entity is well formed
4891: */
4892: if ((value != NULL) &&
4893: (value[1] == 0) && (value[0] == '<') &&
4894: (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
4895: /*
1.222 veillard 4896: * DONE: get definite answer on this !!!
1.180 daniel 4897: * Lots of entity decls are used to declare a single
4898: * char
4899: * <!ENTITY lt "<">
4900: * Which seems to be valid since
4901: * 2.4: The ampersand character (&) and the left angle
4902: * bracket (<) may appear in their literal form only
4903: * when used ... They are also legal within the literal
4904: * entity value of an internal entity declaration;i
4905: * see "4.3.2 Well-Formed Parsed Entities".
4906: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4907: * Looking at the OASIS test suite and James Clark
4908: * tests, this is broken. However the XML REC uses
4909: * it. Is the XML REC not well-formed ????
4910: * This is a hack to avoid this problem
1.222 veillard 4911: *
4912: * ANSWER: since lt gt amp .. are already defined,
4913: * this is a redefinition and hence the fact that the
4914: * contentis not well balanced is not a Wf error, this
4915: * is lousy but acceptable.
1.180 daniel 4916: */
4917: list = xmlNewDocText(ctxt->myDoc, value);
4918: if (list != NULL) {
4919: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4920: (ent->children == NULL)) {
4921: ent->children = list;
4922: ent->last = list;
4923: list->parent = (xmlNodePtr) ent;
4924: } else {
4925: xmlFreeNodeList(list);
4926: }
4927: } else if (list != NULL) {
4928: xmlFreeNodeList(list);
4929: }
1.181 daniel 4930: } else {
1.180 daniel 4931: /*
4932: * 4.3.2: An internal general parsed entity is well-formed
4933: * if its replacement text matches the production labeled
4934: * content.
4935: */
1.185 daniel 4936: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4937: ctxt->depth++;
1.180 daniel 4938: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 4939: ctxt->sax, NULL, ctxt->depth,
4940: value, &list);
4941: ctxt->depth--;
4942: } else if (ent->etype ==
4943: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4944: ctxt->depth++;
1.180 daniel 4945: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 4946: ctxt->sax, NULL, ctxt->depth,
1.228 veillard 4947: ent->URI, ent->ExternalID, &list);
1.185 daniel 4948: ctxt->depth--;
4949: } else {
1.180 daniel 4950: ret = -1;
4951: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4952: ctxt->sax->error(ctxt->userData,
4953: "Internal: invalid entity type\n");
4954: }
1.185 daniel 4955: if (ret == XML_ERR_ENTITY_LOOP) {
1.230 ! veillard 4956: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 4957: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4958: ctxt->sax->error(ctxt->userData,
4959: "Detected entity reference loop\n");
4960: ctxt->wellFormed = 0;
4961: ctxt->disableSAX = 1;
4962: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 4963: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4964: (ent->children == NULL)) {
4965: ent->children = list;
4966: while (list != NULL) {
4967: list->parent = (xmlNodePtr) ent;
4968: if (list->next == NULL)
4969: ent->last = list;
4970: list = list->next;
4971: }
4972: } else {
4973: xmlFreeNodeList(list);
4974: }
4975: } else if (ret > 0) {
1.230 ! veillard 4976: ctxt->errNo = ret;
1.180 daniel 4977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4978: ctxt->sax->error(ctxt->userData,
4979: "Entity value required\n");
4980: ctxt->wellFormed = 0;
4981: ctxt->disableSAX = 1;
4982: } else if (list != NULL) {
4983: xmlFreeNodeList(list);
4984: }
4985: }
4986: }
1.113 daniel 4987: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 4988: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 4989: /*
4990: * Create a node.
4991: */
4992: ctxt->sax->reference(ctxt->userData, ent->name);
4993: return;
4994: } else if (ctxt->replaceEntities) {
1.222 veillard 4995: if ((ctxt->node != NULL) && (ent->children != NULL)) {
4996: /*
4997: * Seems we are generating the DOM content, do
4998: * a simple tree copy
4999: */
5000: xmlNodePtr new;
5001: new = xmlCopyNodeList(ent->children);
5002:
5003: xmlAddChildList(ctxt->node, new);
5004: /*
5005: * This is to avoid a nasty side effect, see
5006: * characters() in SAX.c
5007: */
5008: ctxt->nodemem = 0;
5009: ctxt->nodelen = 0;
5010: return;
5011: } else {
5012: /*
5013: * Probably running in SAX mode
5014: */
5015: xmlParserInputPtr input;
1.79 daniel 5016:
1.222 veillard 5017: input = xmlNewEntityInputStream(ctxt, ent);
5018: xmlPushInput(ctxt, input);
5019: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5020: (RAW == '<') && (NXT(1) == '?') &&
5021: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5022: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5023: xmlParseTextDecl(ctxt);
5024: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5025: /*
5026: * The XML REC instructs us to stop parsing right here
5027: */
5028: ctxt->instate = XML_PARSER_EOF;
5029: return;
5030: }
5031: if (input->standalone == 1) {
1.230 ! veillard 5032: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
1.222 veillard 5033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5034: ctxt->sax->error(ctxt->userData,
5035: "external parsed entities cannot be standalone\n");
5036: ctxt->wellFormed = 0;
5037: ctxt->disableSAX = 1;
5038: }
1.167 daniel 5039: }
1.222 veillard 5040: return;
1.167 daniel 5041: }
1.113 daniel 5042: }
1.222 veillard 5043: } else {
5044: val = ent->content;
5045: if (val == NULL) return;
5046: /*
5047: * inline the entity.
5048: */
5049: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5050: (!ctxt->disableSAX))
5051: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
1.77 daniel 5052: }
5053: }
1.24 daniel 5054: }
5055:
1.50 daniel 5056: /**
5057: * xmlParseEntityRef:
5058: * @ctxt: an XML parser context
5059: *
5060: * parse ENTITY references declarations
1.24 daniel 5061: *
5062: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 5063: *
1.98 daniel 5064: * [ WFC: Entity Declared ]
5065: * In a document without any DTD, a document with only an internal DTD
5066: * subset which contains no parameter entity references, or a document
5067: * with "standalone='yes'", the Name given in the entity reference
5068: * must match that in an entity declaration, except that well-formed
5069: * documents need not declare any of the following entities: amp, lt,
5070: * gt, apos, quot. The declaration of a parameter entity must precede
5071: * any reference to it. Similarly, the declaration of a general entity
5072: * must precede any reference to it which appears in a default value in an
5073: * attribute-list declaration. Note that if entities are declared in the
5074: * external subset or in external parameter entities, a non-validating
5075: * processor is not obligated to read and process their declarations;
5076: * for such documents, the rule that an entity must be declared is a
5077: * well-formedness constraint only if standalone='yes'.
5078: *
5079: * [ WFC: Parsed Entity ]
5080: * An entity reference must not contain the name of an unparsed entity
5081: *
1.77 daniel 5082: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 5083: */
1.77 daniel 5084: xmlEntityPtr
1.55 daniel 5085: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 5086: xmlChar *name;
1.72 daniel 5087: xmlEntityPtr ent = NULL;
1.24 daniel 5088:
1.91 daniel 5089: GROW;
1.111 daniel 5090:
1.152 daniel 5091: if (RAW == '&') {
1.40 daniel 5092: NEXT;
1.24 daniel 5093: name = xmlParseName(ctxt);
5094: if (name == NULL) {
1.230 ! veillard 5095: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5096: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5097: ctxt->sax->error(ctxt->userData,
5098: "xmlParseEntityRef: no name\n");
1.59 daniel 5099: ctxt->wellFormed = 0;
1.180 daniel 5100: ctxt->disableSAX = 1;
1.24 daniel 5101: } else {
1.152 daniel 5102: if (RAW == ';') {
1.40 daniel 5103: NEXT;
1.24 daniel 5104: /*
1.77 daniel 5105: * Ask first SAX for entity resolution, otherwise try the
5106: * predefined set.
5107: */
5108: if (ctxt->sax != NULL) {
5109: if (ctxt->sax->getEntity != NULL)
5110: ent = ctxt->sax->getEntity(ctxt->userData, name);
5111: if (ent == NULL)
5112: ent = xmlGetPredefinedEntity(name);
5113: }
5114: /*
1.98 daniel 5115: * [ WFC: Entity Declared ]
5116: * In a document without any DTD, a document with only an
5117: * internal DTD subset which contains no parameter entity
5118: * references, or a document with "standalone='yes'", the
5119: * Name given in the entity reference must match that in an
5120: * entity declaration, except that well-formed documents
5121: * need not declare any of the following entities: amp, lt,
5122: * gt, apos, quot.
5123: * The declaration of a parameter entity must precede any
5124: * reference to it.
5125: * Similarly, the declaration of a general entity must
5126: * precede any reference to it which appears in a default
5127: * value in an attribute-list declaration. Note that if
5128: * entities are declared in the external subset or in
5129: * external parameter entities, a non-validating processor
5130: * is not obligated to read and process their declarations;
5131: * for such documents, the rule that an entity must be
5132: * declared is a well-formedness constraint only if
5133: * standalone='yes'.
1.59 daniel 5134: */
1.77 daniel 5135: if (ent == NULL) {
1.98 daniel 5136: if ((ctxt->standalone == 1) ||
5137: ((ctxt->hasExternalSubset == 0) &&
5138: (ctxt->hasPErefs == 0))) {
1.230 ! veillard 5139: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 5140: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5141: ctxt->sax->error(ctxt->userData,
5142: "Entity '%s' not defined\n", name);
5143: ctxt->wellFormed = 0;
1.180 daniel 5144: ctxt->disableSAX = 1;
1.77 daniel 5145: } else {
1.230 ! veillard 5146: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.98 daniel 5147: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5148: ctxt->sax->warning(ctxt->userData,
5149: "Entity '%s' not defined\n", name);
1.59 daniel 5150: }
1.77 daniel 5151: }
1.59 daniel 5152:
5153: /*
1.98 daniel 5154: * [ WFC: Parsed Entity ]
5155: * An entity reference must not contain the name of an
5156: * unparsed entity
5157: */
1.159 daniel 5158: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 ! veillard 5159: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5161: ctxt->sax->error(ctxt->userData,
5162: "Entity reference to unparsed entity %s\n", name);
5163: ctxt->wellFormed = 0;
1.180 daniel 5164: ctxt->disableSAX = 1;
1.98 daniel 5165: }
5166:
5167: /*
5168: * [ WFC: No External Entity References ]
5169: * Attribute values cannot contain direct or indirect
5170: * entity references to external entities.
5171: */
5172: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5173: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 ! veillard 5174: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5175: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5176: ctxt->sax->error(ctxt->userData,
5177: "Attribute references external entity '%s'\n", name);
5178: ctxt->wellFormed = 0;
1.180 daniel 5179: ctxt->disableSAX = 1;
1.98 daniel 5180: }
5181: /*
5182: * [ WFC: No < in Attribute Values ]
5183: * The replacement text of any entity referred to directly or
5184: * indirectly in an attribute value (other than "<") must
5185: * not contain a <.
1.59 daniel 5186: */
1.98 daniel 5187: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5188: (ent != NULL) &&
5189: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5190: (ent->content != NULL) &&
5191: (xmlStrchr(ent->content, '<'))) {
1.230 ! veillard 5192: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5193: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5194: ctxt->sax->error(ctxt->userData,
5195: "'<' in entity '%s' is not allowed in attributes values\n", name);
5196: ctxt->wellFormed = 0;
1.180 daniel 5197: ctxt->disableSAX = 1;
1.98 daniel 5198: }
5199:
5200: /*
5201: * Internal check, no parameter entities here ...
5202: */
5203: else {
1.159 daniel 5204: switch (ent->etype) {
1.59 daniel 5205: case XML_INTERNAL_PARAMETER_ENTITY:
5206: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 ! veillard 5207: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5208: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5209: ctxt->sax->error(ctxt->userData,
1.59 daniel 5210: "Attempt to reference the parameter entity '%s'\n", name);
5211: ctxt->wellFormed = 0;
1.180 daniel 5212: ctxt->disableSAX = 1;
5213: break;
5214: default:
1.59 daniel 5215: break;
5216: }
5217: }
5218:
5219: /*
1.98 daniel 5220: * [ WFC: No Recursion ]
1.229 veillard 5221: * A parsed entity must not contain a recursive reference
1.117 daniel 5222: * to itself, either directly or indirectly.
1.229 veillard 5223: * Done somewhere else
1.59 daniel 5224: */
1.77 daniel 5225:
1.24 daniel 5226: } else {
1.230 ! veillard 5227: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5228: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5229: ctxt->sax->error(ctxt->userData,
1.59 daniel 5230: "xmlParseEntityRef: expecting ';'\n");
5231: ctxt->wellFormed = 0;
1.180 daniel 5232: ctxt->disableSAX = 1;
1.24 daniel 5233: }
1.119 daniel 5234: xmlFree(name);
1.24 daniel 5235: }
5236: }
1.77 daniel 5237: return(ent);
1.24 daniel 5238: }
1.229 veillard 5239:
1.135 daniel 5240: /**
5241: * xmlParseStringEntityRef:
5242: * @ctxt: an XML parser context
5243: * @str: a pointer to an index in the string
5244: *
5245: * parse ENTITY references declarations, but this version parses it from
5246: * a string value.
5247: *
5248: * [68] EntityRef ::= '&' Name ';'
5249: *
5250: * [ WFC: Entity Declared ]
5251: * In a document without any DTD, a document with only an internal DTD
5252: * subset which contains no parameter entity references, or a document
5253: * with "standalone='yes'", the Name given in the entity reference
5254: * must match that in an entity declaration, except that well-formed
5255: * documents need not declare any of the following entities: amp, lt,
5256: * gt, apos, quot. The declaration of a parameter entity must precede
5257: * any reference to it. Similarly, the declaration of a general entity
5258: * must precede any reference to it which appears in a default value in an
5259: * attribute-list declaration. Note that if entities are declared in the
5260: * external subset or in external parameter entities, a non-validating
5261: * processor is not obligated to read and process their declarations;
5262: * for such documents, the rule that an entity must be declared is a
5263: * well-formedness constraint only if standalone='yes'.
5264: *
5265: * [ WFC: Parsed Entity ]
5266: * An entity reference must not contain the name of an unparsed entity
5267: *
5268: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5269: * is updated to the current location in the string.
5270: */
5271: xmlEntityPtr
5272: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5273: xmlChar *name;
5274: const xmlChar *ptr;
5275: xmlChar cur;
5276: xmlEntityPtr ent = NULL;
5277:
1.156 daniel 5278: if ((str == NULL) || (*str == NULL))
5279: return(NULL);
1.135 daniel 5280: ptr = *str;
5281: cur = *ptr;
5282: if (cur == '&') {
5283: ptr++;
5284: cur = *ptr;
5285: name = xmlParseStringName(ctxt, &ptr);
5286: if (name == NULL) {
1.230 ! veillard 5287: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5289: ctxt->sax->error(ctxt->userData,
5290: "xmlParseEntityRef: no name\n");
5291: ctxt->wellFormed = 0;
1.180 daniel 5292: ctxt->disableSAX = 1;
1.135 daniel 5293: } else {
1.185 daniel 5294: if (*ptr == ';') {
5295: ptr++;
1.135 daniel 5296: /*
5297: * Ask first SAX for entity resolution, otherwise try the
5298: * predefined set.
5299: */
5300: if (ctxt->sax != NULL) {
5301: if (ctxt->sax->getEntity != NULL)
5302: ent = ctxt->sax->getEntity(ctxt->userData, name);
5303: if (ent == NULL)
5304: ent = xmlGetPredefinedEntity(name);
5305: }
5306: /*
5307: * [ WFC: Entity Declared ]
5308: * In a document without any DTD, a document with only an
5309: * internal DTD subset which contains no parameter entity
5310: * references, or a document with "standalone='yes'", the
5311: * Name given in the entity reference must match that in an
5312: * entity declaration, except that well-formed documents
5313: * need not declare any of the following entities: amp, lt,
5314: * gt, apos, quot.
5315: * The declaration of a parameter entity must precede any
5316: * reference to it.
5317: * Similarly, the declaration of a general entity must
5318: * precede any reference to it which appears in a default
5319: * value in an attribute-list declaration. Note that if
5320: * entities are declared in the external subset or in
5321: * external parameter entities, a non-validating processor
5322: * is not obligated to read and process their declarations;
5323: * for such documents, the rule that an entity must be
5324: * declared is a well-formedness constraint only if
5325: * standalone='yes'.
5326: */
5327: if (ent == NULL) {
5328: if ((ctxt->standalone == 1) ||
5329: ((ctxt->hasExternalSubset == 0) &&
5330: (ctxt->hasPErefs == 0))) {
1.230 ! veillard 5331: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5333: ctxt->sax->error(ctxt->userData,
5334: "Entity '%s' not defined\n", name);
5335: ctxt->wellFormed = 0;
1.180 daniel 5336: ctxt->disableSAX = 1;
1.135 daniel 5337: } else {
1.230 ! veillard 5338: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.135 daniel 5339: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5340: ctxt->sax->warning(ctxt->userData,
5341: "Entity '%s' not defined\n", name);
5342: }
5343: }
5344:
5345: /*
5346: * [ WFC: Parsed Entity ]
5347: * An entity reference must not contain the name of an
5348: * unparsed entity
5349: */
1.159 daniel 5350: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 ! veillard 5351: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.135 daniel 5352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5353: ctxt->sax->error(ctxt->userData,
5354: "Entity reference to unparsed entity %s\n", name);
5355: ctxt->wellFormed = 0;
1.180 daniel 5356: ctxt->disableSAX = 1;
1.135 daniel 5357: }
5358:
5359: /*
5360: * [ WFC: No External Entity References ]
5361: * Attribute values cannot contain direct or indirect
5362: * entity references to external entities.
5363: */
5364: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5365: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 ! veillard 5366: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.135 daniel 5367: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5368: ctxt->sax->error(ctxt->userData,
5369: "Attribute references external entity '%s'\n", name);
5370: ctxt->wellFormed = 0;
1.180 daniel 5371: ctxt->disableSAX = 1;
1.135 daniel 5372: }
5373: /*
5374: * [ WFC: No < in Attribute Values ]
5375: * The replacement text of any entity referred to directly or
5376: * indirectly in an attribute value (other than "<") must
5377: * not contain a <.
5378: */
5379: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5380: (ent != NULL) &&
5381: (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
5382: (ent->content != NULL) &&
5383: (xmlStrchr(ent->content, '<'))) {
1.230 ! veillard 5384: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.135 daniel 5385: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5386: ctxt->sax->error(ctxt->userData,
5387: "'<' in entity '%s' is not allowed in attributes values\n", name);
5388: ctxt->wellFormed = 0;
1.180 daniel 5389: ctxt->disableSAX = 1;
1.135 daniel 5390: }
5391:
5392: /*
5393: * Internal check, no parameter entities here ...
5394: */
5395: else {
1.159 daniel 5396: switch (ent->etype) {
1.135 daniel 5397: case XML_INTERNAL_PARAMETER_ENTITY:
5398: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 ! veillard 5399: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.135 daniel 5400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5401: ctxt->sax->error(ctxt->userData,
5402: "Attempt to reference the parameter entity '%s'\n", name);
5403: ctxt->wellFormed = 0;
1.180 daniel 5404: ctxt->disableSAX = 1;
5405: break;
5406: default:
1.135 daniel 5407: break;
5408: }
5409: }
5410:
5411: /*
5412: * [ WFC: No Recursion ]
1.229 veillard 5413: * A parsed entity must not contain a recursive reference
1.135 daniel 5414: * to itself, either directly or indirectly.
1.229 veillard 5415: * Done somewhwere else
1.135 daniel 5416: */
5417:
5418: } else {
1.230 ! veillard 5419: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5421: ctxt->sax->error(ctxt->userData,
5422: "xmlParseEntityRef: expecting ';'\n");
5423: ctxt->wellFormed = 0;
1.180 daniel 5424: ctxt->disableSAX = 1;
1.135 daniel 5425: }
5426: xmlFree(name);
5427: }
5428: }
1.185 daniel 5429: *str = ptr;
1.135 daniel 5430: return(ent);
5431: }
1.24 daniel 5432:
1.50 daniel 5433: /**
5434: * xmlParsePEReference:
5435: * @ctxt: an XML parser context
5436: *
5437: * parse PEReference declarations
1.77 daniel 5438: * The entity content is handled directly by pushing it's content as
5439: * a new input stream.
1.22 daniel 5440: *
5441: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5442: *
1.98 daniel 5443: * [ WFC: No Recursion ]
1.229 veillard 5444: * A parsed entity must not contain a recursive
1.98 daniel 5445: * reference to itself, either directly or indirectly.
5446: *
5447: * [ WFC: Entity Declared ]
5448: * In a document without any DTD, a document with only an internal DTD
5449: * subset which contains no parameter entity references, or a document
5450: * with "standalone='yes'", ... ... The declaration of a parameter
5451: * entity must precede any reference to it...
5452: *
5453: * [ VC: Entity Declared ]
5454: * In a document with an external subset or external parameter entities
5455: * with "standalone='no'", ... ... The declaration of a parameter entity
5456: * must precede any reference to it...
5457: *
5458: * [ WFC: In DTD ]
5459: * Parameter-entity references may only appear in the DTD.
5460: * NOTE: misleading but this is handled.
1.22 daniel 5461: */
1.77 daniel 5462: void
1.55 daniel 5463: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5464: xmlChar *name;
1.72 daniel 5465: xmlEntityPtr entity = NULL;
1.50 daniel 5466: xmlParserInputPtr input;
1.22 daniel 5467:
1.152 daniel 5468: if (RAW == '%') {
1.40 daniel 5469: NEXT;
1.22 daniel 5470: name = xmlParseName(ctxt);
5471: if (name == NULL) {
1.230 ! veillard 5472: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5474: ctxt->sax->error(ctxt->userData,
5475: "xmlParsePEReference: no name\n");
1.59 daniel 5476: ctxt->wellFormed = 0;
1.180 daniel 5477: ctxt->disableSAX = 1;
1.22 daniel 5478: } else {
1.152 daniel 5479: if (RAW == ';') {
1.40 daniel 5480: NEXT;
1.98 daniel 5481: if ((ctxt->sax != NULL) &&
5482: (ctxt->sax->getParameterEntity != NULL))
5483: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5484: name);
1.45 daniel 5485: if (entity == NULL) {
1.98 daniel 5486: /*
5487: * [ WFC: Entity Declared ]
5488: * In a document without any DTD, a document with only an
5489: * internal DTD subset which contains no parameter entity
5490: * references, or a document with "standalone='yes'", ...
5491: * ... The declaration of a parameter entity must precede
5492: * any reference to it...
5493: */
5494: if ((ctxt->standalone == 1) ||
5495: ((ctxt->hasExternalSubset == 0) &&
5496: (ctxt->hasPErefs == 0))) {
1.230 ! veillard 5497: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.220 veillard 5498: if ((!ctxt->disableSAX) &&
5499: (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5500: ctxt->sax->error(ctxt->userData,
5501: "PEReference: %%%s; not found\n", name);
5502: ctxt->wellFormed = 0;
1.180 daniel 5503: ctxt->disableSAX = 1;
1.98 daniel 5504: } else {
5505: /*
5506: * [ VC: Entity Declared ]
5507: * In a document with an external subset or external
5508: * parameter entities with "standalone='no'", ...
5509: * ... The declaration of a parameter entity must precede
5510: * any reference to it...
5511: */
1.220 veillard 5512: if ((!ctxt->disableSAX) &&
5513: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 5514: ctxt->sax->warning(ctxt->userData,
5515: "PEReference: %%%s; not found\n", name);
5516: ctxt->valid = 0;
5517: }
1.50 daniel 5518: } else {
1.98 daniel 5519: /*
5520: * Internal checking in case the entity quest barfed
5521: */
1.159 daniel 5522: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5523: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5524: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5525: ctxt->sax->warning(ctxt->userData,
5526: "Internal: %%%s; is not a parameter entity\n", name);
5527: } else {
1.164 daniel 5528: /*
5529: * TODO !!!
5530: * handle the extra spaces added before and after
5531: * c.f. http://www.w3.org/TR/REC-xml#as-PE
5532: */
1.98 daniel 5533: input = xmlNewEntityInputStream(ctxt, entity);
5534: xmlPushInput(ctxt, input);
1.164 daniel 5535: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5536: (RAW == '<') && (NXT(1) == '?') &&
5537: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5538: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5539: xmlParseTextDecl(ctxt);
1.193 daniel 5540: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5541: /*
5542: * The XML REC instructs us to stop parsing
5543: * right here
5544: */
5545: ctxt->instate = XML_PARSER_EOF;
5546: xmlFree(name);
5547: return;
5548: }
1.164 daniel 5549: }
5550: if (ctxt->token == 0)
5551: ctxt->token = ' ';
1.98 daniel 5552: }
1.45 daniel 5553: }
1.98 daniel 5554: ctxt->hasPErefs = 1;
1.22 daniel 5555: } else {
1.230 ! veillard 5556: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5558: ctxt->sax->error(ctxt->userData,
1.59 daniel 5559: "xmlParsePEReference: expecting ';'\n");
5560: ctxt->wellFormed = 0;
1.180 daniel 5561: ctxt->disableSAX = 1;
1.22 daniel 5562: }
1.119 daniel 5563: xmlFree(name);
1.3 veillard 5564: }
5565: }
5566: }
5567:
1.50 daniel 5568: /**
1.135 daniel 5569: * xmlParseStringPEReference:
5570: * @ctxt: an XML parser context
5571: * @str: a pointer to an index in the string
5572: *
5573: * parse PEReference declarations
5574: *
5575: * [69] PEReference ::= '%' Name ';'
5576: *
5577: * [ WFC: No Recursion ]
1.229 veillard 5578: * A parsed entity must not contain a recursive
1.135 daniel 5579: * reference to itself, either directly or indirectly.
5580: *
5581: * [ WFC: Entity Declared ]
5582: * In a document without any DTD, a document with only an internal DTD
5583: * subset which contains no parameter entity references, or a document
5584: * with "standalone='yes'", ... ... The declaration of a parameter
5585: * entity must precede any reference to it...
5586: *
5587: * [ VC: Entity Declared ]
5588: * In a document with an external subset or external parameter entities
5589: * with "standalone='no'", ... ... The declaration of a parameter entity
5590: * must precede any reference to it...
5591: *
5592: * [ WFC: In DTD ]
5593: * Parameter-entity references may only appear in the DTD.
5594: * NOTE: misleading but this is handled.
5595: *
5596: * Returns the string of the entity content.
5597: * str is updated to the current value of the index
5598: */
5599: xmlEntityPtr
5600: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5601: const xmlChar *ptr;
5602: xmlChar cur;
5603: xmlChar *name;
5604: xmlEntityPtr entity = NULL;
5605:
5606: if ((str == NULL) || (*str == NULL)) return(NULL);
5607: ptr = *str;
5608: cur = *ptr;
5609: if (cur == '%') {
5610: ptr++;
5611: cur = *ptr;
5612: name = xmlParseStringName(ctxt, &ptr);
5613: if (name == NULL) {
1.230 ! veillard 5614: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5616: ctxt->sax->error(ctxt->userData,
5617: "xmlParseStringPEReference: no name\n");
5618: ctxt->wellFormed = 0;
1.180 daniel 5619: ctxt->disableSAX = 1;
1.135 daniel 5620: } else {
5621: cur = *ptr;
5622: if (cur == ';') {
5623: ptr++;
5624: cur = *ptr;
5625: if ((ctxt->sax != NULL) &&
5626: (ctxt->sax->getParameterEntity != NULL))
5627: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5628: name);
5629: if (entity == NULL) {
5630: /*
5631: * [ WFC: Entity Declared ]
5632: * In a document without any DTD, a document with only an
5633: * internal DTD subset which contains no parameter entity
5634: * references, or a document with "standalone='yes'", ...
5635: * ... The declaration of a parameter entity must precede
5636: * any reference to it...
5637: */
5638: if ((ctxt->standalone == 1) ||
5639: ((ctxt->hasExternalSubset == 0) &&
5640: (ctxt->hasPErefs == 0))) {
1.230 ! veillard 5641: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5642: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5643: ctxt->sax->error(ctxt->userData,
5644: "PEReference: %%%s; not found\n", name);
5645: ctxt->wellFormed = 0;
1.180 daniel 5646: ctxt->disableSAX = 1;
1.135 daniel 5647: } else {
5648: /*
5649: * [ VC: Entity Declared ]
5650: * In a document with an external subset or external
5651: * parameter entities with "standalone='no'", ...
5652: * ... The declaration of a parameter entity must
5653: * precede any reference to it...
5654: */
5655: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5656: ctxt->sax->warning(ctxt->userData,
5657: "PEReference: %%%s; not found\n", name);
5658: ctxt->valid = 0;
5659: }
5660: } else {
5661: /*
5662: * Internal checking in case the entity quest barfed
5663: */
1.159 daniel 5664: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5665: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 5666: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5667: ctxt->sax->warning(ctxt->userData,
5668: "Internal: %%%s; is not a parameter entity\n", name);
5669: }
5670: }
5671: ctxt->hasPErefs = 1;
5672: } else {
1.230 ! veillard 5673: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5674: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5675: ctxt->sax->error(ctxt->userData,
5676: "xmlParseStringPEReference: expecting ';'\n");
5677: ctxt->wellFormed = 0;
1.180 daniel 5678: ctxt->disableSAX = 1;
1.135 daniel 5679: }
5680: xmlFree(name);
5681: }
5682: }
5683: *str = ptr;
5684: return(entity);
5685: }
5686:
5687: /**
1.181 daniel 5688: * xmlParseDocTypeDecl:
1.50 daniel 5689: * @ctxt: an XML parser context
5690: *
5691: * parse a DOCTYPE declaration
1.21 daniel 5692: *
1.22 daniel 5693: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5694: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 5695: *
5696: * [ VC: Root Element Type ]
1.99 daniel 5697: * The Name in the document type declaration must match the element
1.98 daniel 5698: * type of the root element.
1.21 daniel 5699: */
5700:
1.55 daniel 5701: void
5702: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 5703: xmlChar *name = NULL;
1.123 daniel 5704: xmlChar *ExternalID = NULL;
5705: xmlChar *URI = NULL;
1.21 daniel 5706:
5707: /*
5708: * We know that '<!DOCTYPE' has been detected.
5709: */
1.40 daniel 5710: SKIP(9);
1.21 daniel 5711:
1.42 daniel 5712: SKIP_BLANKS;
1.21 daniel 5713:
5714: /*
5715: * Parse the DOCTYPE name.
5716: */
5717: name = xmlParseName(ctxt);
5718: if (name == NULL) {
1.230 ! veillard 5719: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5721: ctxt->sax->error(ctxt->userData,
5722: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 5723: ctxt->wellFormed = 0;
1.180 daniel 5724: ctxt->disableSAX = 1;
1.21 daniel 5725: }
1.165 daniel 5726: ctxt->intSubName = name;
1.21 daniel 5727:
1.42 daniel 5728: SKIP_BLANKS;
1.21 daniel 5729:
5730: /*
1.22 daniel 5731: * Check for SystemID and ExternalID
5732: */
1.67 daniel 5733: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 5734:
5735: if ((URI != NULL) || (ExternalID != NULL)) {
5736: ctxt->hasExternalSubset = 1;
5737: }
1.165 daniel 5738: ctxt->extSubURI = URI;
5739: ctxt->extSubSystem = ExternalID;
1.98 daniel 5740:
1.42 daniel 5741: SKIP_BLANKS;
1.36 daniel 5742:
1.76 daniel 5743: /*
1.165 daniel 5744: * Create and update the internal subset.
1.76 daniel 5745: */
1.171 daniel 5746: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5747: (!ctxt->disableSAX))
1.74 daniel 5748: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 5749:
5750: /*
1.140 daniel 5751: * Is there any internal subset declarations ?
5752: * they are handled separately in xmlParseInternalSubset()
5753: */
1.152 daniel 5754: if (RAW == '[')
1.140 daniel 5755: return;
5756:
5757: /*
5758: * We should be at the end of the DOCTYPE declaration.
5759: */
1.152 daniel 5760: if (RAW != '>') {
1.230 ! veillard 5761: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.140 daniel 5762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5763: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5764: ctxt->wellFormed = 0;
1.180 daniel 5765: ctxt->disableSAX = 1;
1.140 daniel 5766: }
5767: NEXT;
5768: }
5769:
5770: /**
1.181 daniel 5771: * xmlParseInternalsubset:
1.140 daniel 5772: * @ctxt: an XML parser context
5773: *
5774: * parse the internal subset declaration
5775: *
5776: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5777: */
5778:
5779: void
5780: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5781: /*
1.22 daniel 5782: * Is there any DTD definition ?
5783: */
1.152 daniel 5784: if (RAW == '[') {
1.96 daniel 5785: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 5786: NEXT;
1.22 daniel 5787: /*
5788: * Parse the succession of Markup declarations and
5789: * PEReferences.
5790: * Subsequence (markupdecl | PEReference | S)*
5791: */
1.152 daniel 5792: while (RAW != ']') {
1.123 daniel 5793: const xmlChar *check = CUR_PTR;
1.115 daniel 5794: int cons = ctxt->input->consumed;
1.22 daniel 5795:
1.42 daniel 5796: SKIP_BLANKS;
1.22 daniel 5797: xmlParseMarkupDecl(ctxt);
1.50 daniel 5798: xmlParsePEReference(ctxt);
1.22 daniel 5799:
1.115 daniel 5800: /*
5801: * Pop-up of finished entities.
5802: */
1.152 daniel 5803: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 5804: xmlPopInput(ctxt);
5805:
1.118 daniel 5806: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.230 ! veillard 5807: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5809: ctxt->sax->error(ctxt->userData,
1.140 daniel 5810: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 5811: ctxt->wellFormed = 0;
1.180 daniel 5812: ctxt->disableSAX = 1;
1.22 daniel 5813: break;
5814: }
5815: }
1.209 veillard 5816: if (RAW == ']') {
5817: NEXT;
5818: SKIP_BLANKS;
5819: }
1.22 daniel 5820: }
5821:
5822: /*
5823: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 5824: */
1.152 daniel 5825: if (RAW != '>') {
1.230 ! veillard 5826: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.55 daniel 5827: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5828: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 5829: ctxt->wellFormed = 0;
1.180 daniel 5830: ctxt->disableSAX = 1;
1.21 daniel 5831: }
1.40 daniel 5832: NEXT;
1.21 daniel 5833: }
5834:
1.50 daniel 5835: /**
5836: * xmlParseAttribute:
5837: * @ctxt: an XML parser context
1.123 daniel 5838: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 5839: *
5840: * parse an attribute
1.3 veillard 5841: *
1.22 daniel 5842: * [41] Attribute ::= Name Eq AttValue
5843: *
1.98 daniel 5844: * [ WFC: No External Entity References ]
5845: * Attribute values cannot contain direct or indirect entity references
5846: * to external entities.
5847: *
5848: * [ WFC: No < in Attribute Values ]
5849: * The replacement text of any entity referred to directly or indirectly in
5850: * an attribute value (other than "<") must not contain a <.
5851: *
5852: * [ VC: Attribute Value Type ]
1.117 daniel 5853: * The attribute must have been declared; the value must be of the type
1.99 daniel 5854: * declared for it.
1.98 daniel 5855: *
1.22 daniel 5856: * [25] Eq ::= S? '=' S?
5857: *
1.29 daniel 5858: * With namespace:
5859: *
5860: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 5861: *
5862: * Also the case QName == xmlns:??? is handled independently as a namespace
5863: * definition.
1.69 daniel 5864: *
1.72 daniel 5865: * Returns the attribute name, and the value in *value.
1.3 veillard 5866: */
5867:
1.123 daniel 5868: xmlChar *
5869: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5870: xmlChar *name, *val;
1.3 veillard 5871:
1.72 daniel 5872: *value = NULL;
5873: name = xmlParseName(ctxt);
1.22 daniel 5874: if (name == NULL) {
1.230 ! veillard 5875: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5877: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 5878: ctxt->wellFormed = 0;
1.180 daniel 5879: ctxt->disableSAX = 1;
1.52 daniel 5880: return(NULL);
1.3 veillard 5881: }
5882:
5883: /*
1.29 daniel 5884: * read the value
1.3 veillard 5885: */
1.42 daniel 5886: SKIP_BLANKS;
1.152 daniel 5887: if (RAW == '=') {
1.40 daniel 5888: NEXT;
1.42 daniel 5889: SKIP_BLANKS;
1.72 daniel 5890: val = xmlParseAttValue(ctxt);
1.96 daniel 5891: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 5892: } else {
1.230 ! veillard 5893: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.55 daniel 5894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5895: ctxt->sax->error(ctxt->userData,
1.59 daniel 5896: "Specification mandate value for attribute %s\n", name);
5897: ctxt->wellFormed = 0;
1.180 daniel 5898: ctxt->disableSAX = 1;
1.170 daniel 5899: xmlFree(name);
1.52 daniel 5900: return(NULL);
1.43 daniel 5901: }
5902:
1.172 daniel 5903: /*
5904: * Check that xml:lang conforms to the specification
1.222 veillard 5905: * No more registered as an error, just generate a warning now
5906: * since this was deprecated in XML second edition
1.172 daniel 5907: */
1.229 veillard 5908: if ((ctxt->pedantic) && (!xmlStrcmp(name, BAD_CAST "xml:lang"))) {
1.172 daniel 5909: if (!xmlCheckLanguageID(val)) {
1.222 veillard 5910: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5911: ctxt->sax->warning(ctxt->userData,
5912: "Malformed value for xml:lang : %s\n", val);
1.172 daniel 5913: }
5914: }
5915:
1.176 daniel 5916: /*
5917: * Check that xml:space conforms to the specification
5918: */
5919: if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
5920: if (!xmlStrcmp(val, BAD_CAST "default"))
5921: *(ctxt->space) = 0;
5922: else if (!xmlStrcmp(val, BAD_CAST "preserve"))
5923: *(ctxt->space) = 1;
5924: else {
1.230 ! veillard 5925: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.176 daniel 5926: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5927: ctxt->sax->error(ctxt->userData,
5928: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5929: val);
5930: ctxt->wellFormed = 0;
1.180 daniel 5931: ctxt->disableSAX = 1;
1.176 daniel 5932: }
5933: }
5934:
1.72 daniel 5935: *value = val;
5936: return(name);
1.3 veillard 5937: }
5938:
1.50 daniel 5939: /**
5940: * xmlParseStartTag:
5941: * @ctxt: an XML parser context
5942: *
5943: * parse a start of tag either for rule element or
5944: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 5945: *
5946: * [40] STag ::= '<' Name (S Attribute)* S? '>'
5947: *
1.98 daniel 5948: * [ WFC: Unique Att Spec ]
5949: * No attribute name may appear more than once in the same start-tag or
5950: * empty-element tag.
5951: *
1.29 daniel 5952: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5953: *
1.98 daniel 5954: * [ WFC: Unique Att Spec ]
5955: * No attribute name may appear more than once in the same start-tag or
5956: * empty-element tag.
5957: *
1.29 daniel 5958: * With namespace:
5959: *
5960: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5961: *
5962: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 5963: *
1.192 daniel 5964: * Returns the element name parsed
1.2 veillard 5965: */
5966:
1.123 daniel 5967: xmlChar *
1.69 daniel 5968: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5969: xmlChar *name;
5970: xmlChar *attname;
5971: xmlChar *attvalue;
5972: const xmlChar **atts = NULL;
1.72 daniel 5973: int nbatts = 0;
5974: int maxatts = 0;
5975: int i;
1.2 veillard 5976:
1.152 daniel 5977: if (RAW != '<') return(NULL);
1.40 daniel 5978: NEXT;
1.3 veillard 5979:
1.72 daniel 5980: name = xmlParseName(ctxt);
1.59 daniel 5981: if (name == NULL) {
1.230 ! veillard 5982: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5984: ctxt->sax->error(ctxt->userData,
1.59 daniel 5985: "xmlParseStartTag: invalid element name\n");
5986: ctxt->wellFormed = 0;
1.180 daniel 5987: ctxt->disableSAX = 1;
1.83 daniel 5988: return(NULL);
1.50 daniel 5989: }
5990:
5991: /*
1.3 veillard 5992: * Now parse the attributes, it ends up with the ending
5993: *
5994: * (S Attribute)* S?
5995: */
1.42 daniel 5996: SKIP_BLANKS;
1.91 daniel 5997: GROW;
1.168 daniel 5998:
1.153 daniel 5999: while ((IS_CHAR(RAW)) &&
1.152 daniel 6000: (RAW != '>') &&
6001: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 6002: const xmlChar *q = CUR_PTR;
1.91 daniel 6003: int cons = ctxt->input->consumed;
1.29 daniel 6004:
1.72 daniel 6005: attname = xmlParseAttribute(ctxt, &attvalue);
6006: if ((attname != NULL) && (attvalue != NULL)) {
6007: /*
1.98 daniel 6008: * [ WFC: Unique Att Spec ]
6009: * No attribute name may appear more than once in the same
6010: * start-tag or empty-element tag.
1.72 daniel 6011: */
6012: for (i = 0; i < nbatts;i += 2) {
6013: if (!xmlStrcmp(atts[i], attname)) {
1.230 ! veillard 6014: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.72 daniel 6015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 6016: ctxt->sax->error(ctxt->userData,
6017: "Attribute %s redefined\n",
6018: attname);
1.72 daniel 6019: ctxt->wellFormed = 0;
1.180 daniel 6020: ctxt->disableSAX = 1;
1.119 daniel 6021: xmlFree(attname);
6022: xmlFree(attvalue);
1.98 daniel 6023: goto failed;
1.72 daniel 6024: }
6025: }
6026:
6027: /*
6028: * Add the pair to atts
6029: */
6030: if (atts == NULL) {
6031: maxatts = 10;
1.123 daniel 6032: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 6033: if (atts == NULL) {
1.86 daniel 6034: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 6035: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6036: return(NULL);
1.72 daniel 6037: }
1.127 daniel 6038: } else if (nbatts + 4 > maxatts) {
1.72 daniel 6039: maxatts *= 2;
1.123 daniel 6040: atts = (const xmlChar **) xmlRealloc(atts,
6041: maxatts * sizeof(xmlChar *));
1.72 daniel 6042: if (atts == NULL) {
1.86 daniel 6043: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 6044: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 6045: return(NULL);
1.72 daniel 6046: }
6047: }
6048: atts[nbatts++] = attname;
6049: atts[nbatts++] = attvalue;
6050: atts[nbatts] = NULL;
6051: atts[nbatts + 1] = NULL;
1.176 daniel 6052: } else {
6053: if (attname != NULL)
6054: xmlFree(attname);
6055: if (attvalue != NULL)
6056: xmlFree(attvalue);
1.72 daniel 6057: }
6058:
1.116 daniel 6059: failed:
1.168 daniel 6060:
6061: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6062: break;
6063: if (!IS_BLANK(RAW)) {
1.230 ! veillard 6064: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.168 daniel 6065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6066: ctxt->sax->error(ctxt->userData,
6067: "attributes construct error\n");
6068: ctxt->wellFormed = 0;
1.180 daniel 6069: ctxt->disableSAX = 1;
1.168 daniel 6070: }
1.42 daniel 6071: SKIP_BLANKS;
1.91 daniel 6072: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.230 ! veillard 6073: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6075: ctxt->sax->error(ctxt->userData,
1.31 daniel 6076: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 6077: ctxt->wellFormed = 0;
1.180 daniel 6078: ctxt->disableSAX = 1;
1.29 daniel 6079: break;
1.3 veillard 6080: }
1.91 daniel 6081: GROW;
1.3 veillard 6082: }
6083:
1.43 daniel 6084: /*
1.72 daniel 6085: * SAX: Start of Element !
1.43 daniel 6086: */
1.171 daniel 6087: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6088: (!ctxt->disableSAX))
1.74 daniel 6089: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 6090:
1.72 daniel 6091: if (atts != NULL) {
1.123 daniel 6092: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.119 daniel 6093: xmlFree(atts);
1.72 daniel 6094: }
1.83 daniel 6095: return(name);
1.3 veillard 6096: }
6097:
1.50 daniel 6098: /**
6099: * xmlParseEndTag:
6100: * @ctxt: an XML parser context
6101: *
6102: * parse an end of tag
1.27 daniel 6103: *
6104: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 6105: *
6106: * With namespace
6107: *
1.72 daniel 6108: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 6109: */
6110:
1.55 daniel 6111: void
1.140 daniel 6112: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 6113: xmlChar *name;
1.140 daniel 6114: xmlChar *oldname;
1.7 veillard 6115:
1.91 daniel 6116: GROW;
1.152 daniel 6117: if ((RAW != '<') || (NXT(1) != '/')) {
1.230 ! veillard 6118: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.55 daniel 6119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6120: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 6121: ctxt->wellFormed = 0;
1.180 daniel 6122: ctxt->disableSAX = 1;
1.27 daniel 6123: return;
6124: }
1.40 daniel 6125: SKIP(2);
1.7 veillard 6126:
1.72 daniel 6127: name = xmlParseName(ctxt);
1.7 veillard 6128:
6129: /*
6130: * We should definitely be at the ending "S? '>'" part
6131: */
1.91 daniel 6132: GROW;
1.42 daniel 6133: SKIP_BLANKS;
1.153 daniel 6134: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.230 ! veillard 6135: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6136: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6137: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 6138: ctxt->wellFormed = 0;
1.180 daniel 6139: ctxt->disableSAX = 1;
1.7 veillard 6140: } else
1.40 daniel 6141: NEXT;
1.7 veillard 6142:
1.72 daniel 6143: /*
1.98 daniel 6144: * [ WFC: Element Type Match ]
6145: * The Name in an element's end-tag must match the element type in the
6146: * start-tag.
6147: *
1.83 daniel 6148: */
1.147 daniel 6149: if ((name == NULL) || (ctxt->name == NULL) ||
6150: (xmlStrcmp(name, ctxt->name))) {
1.230 ! veillard 6151: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.147 daniel 6152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6153: if ((name != NULL) && (ctxt->name != NULL)) {
6154: ctxt->sax->error(ctxt->userData,
6155: "Opening and ending tag mismatch: %s and %s\n",
6156: ctxt->name, name);
6157: } else if (ctxt->name != NULL) {
6158: ctxt->sax->error(ctxt->userData,
6159: "Ending tag eror for: %s\n", ctxt->name);
6160: } else {
6161: ctxt->sax->error(ctxt->userData,
6162: "Ending tag error: internal error ???\n");
6163: }
1.122 daniel 6164:
1.147 daniel 6165: }
1.83 daniel 6166: ctxt->wellFormed = 0;
1.180 daniel 6167: ctxt->disableSAX = 1;
1.83 daniel 6168: }
6169:
6170: /*
1.72 daniel 6171: * SAX: End of Tag
6172: */
1.171 daniel 6173: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6174: (!ctxt->disableSAX))
1.74 daniel 6175: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6176:
6177: if (name != NULL)
1.119 daniel 6178: xmlFree(name);
1.140 daniel 6179: oldname = namePop(ctxt);
1.176 daniel 6180: spacePop(ctxt);
1.140 daniel 6181: if (oldname != NULL) {
6182: #ifdef DEBUG_STACK
6183: fprintf(stderr,"Close: popped %s\n", oldname);
6184: #endif
6185: xmlFree(oldname);
6186: }
1.7 veillard 6187: return;
6188: }
6189:
1.50 daniel 6190: /**
6191: * xmlParseCDSect:
6192: * @ctxt: an XML parser context
6193: *
6194: * Parse escaped pure raw content.
1.29 daniel 6195: *
6196: * [18] CDSect ::= CDStart CData CDEnd
6197: *
6198: * [19] CDStart ::= '<![CDATA['
6199: *
6200: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6201: *
6202: * [21] CDEnd ::= ']]>'
1.3 veillard 6203: */
1.55 daniel 6204: void
6205: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6206: xmlChar *buf = NULL;
6207: int len = 0;
1.140 daniel 6208: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6209: int r, rl;
6210: int s, sl;
6211: int cur, l;
1.3 veillard 6212:
1.106 daniel 6213: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6214: (NXT(2) == '[') && (NXT(3) == 'C') &&
6215: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6216: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6217: (NXT(8) == '[')) {
6218: SKIP(9);
1.29 daniel 6219: } else
1.45 daniel 6220: return;
1.109 daniel 6221:
6222: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6223: r = CUR_CHAR(rl);
6224: if (!IS_CHAR(r)) {
1.230 ! veillard 6225: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6226: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6227: ctxt->sax->error(ctxt->userData,
1.135 daniel 6228: "CData section not finished\n");
1.59 daniel 6229: ctxt->wellFormed = 0;
1.180 daniel 6230: ctxt->disableSAX = 1;
1.109 daniel 6231: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6232: return;
1.3 veillard 6233: }
1.152 daniel 6234: NEXTL(rl);
6235: s = CUR_CHAR(sl);
6236: if (!IS_CHAR(s)) {
1.230 ! veillard 6237: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6239: ctxt->sax->error(ctxt->userData,
1.135 daniel 6240: "CData section not finished\n");
1.59 daniel 6241: ctxt->wellFormed = 0;
1.180 daniel 6242: ctxt->disableSAX = 1;
1.109 daniel 6243: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6244: return;
1.3 veillard 6245: }
1.152 daniel 6246: NEXTL(sl);
6247: cur = CUR_CHAR(l);
1.135 daniel 6248: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6249: if (buf == NULL) {
6250: fprintf(stderr, "malloc of %d byte failed\n", size);
6251: return;
6252: }
1.108 veillard 6253: while (IS_CHAR(cur) &&
1.110 daniel 6254: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6255: if (len + 5 >= size) {
1.135 daniel 6256: size *= 2;
1.204 veillard 6257: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6258: if (buf == NULL) {
6259: fprintf(stderr, "realloc of %d byte failed\n", size);
6260: return;
6261: }
6262: }
1.152 daniel 6263: COPY_BUF(rl,buf,len,r);
1.110 daniel 6264: r = s;
1.152 daniel 6265: rl = sl;
1.110 daniel 6266: s = cur;
1.152 daniel 6267: sl = l;
6268: NEXTL(l);
6269: cur = CUR_CHAR(l);
1.3 veillard 6270: }
1.135 daniel 6271: buf[len] = 0;
1.109 daniel 6272: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6273: if (cur != '>') {
1.230 ! veillard 6274: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6276: ctxt->sax->error(ctxt->userData,
1.135 daniel 6277: "CData section not finished\n%.50s\n", buf);
1.59 daniel 6278: ctxt->wellFormed = 0;
1.180 daniel 6279: ctxt->disableSAX = 1;
1.135 daniel 6280: xmlFree(buf);
1.45 daniel 6281: return;
1.3 veillard 6282: }
1.152 daniel 6283: NEXTL(l);
1.16 daniel 6284:
1.45 daniel 6285: /*
1.135 daniel 6286: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6287: */
1.171 daniel 6288: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 6289: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6290: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6291: }
1.135 daniel 6292: xmlFree(buf);
1.2 veillard 6293: }
6294:
1.50 daniel 6295: /**
6296: * xmlParseContent:
6297: * @ctxt: an XML parser context
6298: *
6299: * Parse a content:
1.2 veillard 6300: *
1.27 daniel 6301: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6302: */
6303:
1.55 daniel 6304: void
6305: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6306: GROW;
1.176 daniel 6307: while (((RAW != 0) || (ctxt->token != 0)) &&
6308: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 6309: const xmlChar *test = CUR_PTR;
1.91 daniel 6310: int cons = ctxt->input->consumed;
1.123 daniel 6311: xmlChar tok = ctxt->token;
1.27 daniel 6312:
6313: /*
1.152 daniel 6314: * Handle possible processed charrefs.
6315: */
6316: if (ctxt->token != 0) {
6317: xmlParseCharData(ctxt, 0);
6318: }
6319: /*
1.27 daniel 6320: * First case : a Processing Instruction.
6321: */
1.152 daniel 6322: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6323: xmlParsePI(ctxt);
6324: }
1.72 daniel 6325:
1.27 daniel 6326: /*
6327: * Second case : a CDSection
6328: */
1.152 daniel 6329: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6330: (NXT(2) == '[') && (NXT(3) == 'C') &&
6331: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6332: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6333: (NXT(8) == '[')) {
1.45 daniel 6334: xmlParseCDSect(ctxt);
1.27 daniel 6335: }
1.72 daniel 6336:
1.27 daniel 6337: /*
6338: * Third case : a comment
6339: */
1.152 daniel 6340: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6341: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6342: xmlParseComment(ctxt);
1.97 daniel 6343: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6344: }
1.72 daniel 6345:
1.27 daniel 6346: /*
6347: * Fourth case : a sub-element.
6348: */
1.152 daniel 6349: else if (RAW == '<') {
1.72 daniel 6350: xmlParseElement(ctxt);
1.45 daniel 6351: }
1.72 daniel 6352:
1.45 daniel 6353: /*
1.50 daniel 6354: * Fifth case : a reference. If if has not been resolved,
6355: * parsing returns it's Name, create the node
1.45 daniel 6356: */
1.97 daniel 6357:
1.152 daniel 6358: else if (RAW == '&') {
1.77 daniel 6359: xmlParseReference(ctxt);
1.27 daniel 6360: }
1.72 daniel 6361:
1.27 daniel 6362: /*
6363: * Last case, text. Note that References are handled directly.
6364: */
6365: else {
1.45 daniel 6366: xmlParseCharData(ctxt, 0);
1.3 veillard 6367: }
1.14 veillard 6368:
1.91 daniel 6369: GROW;
1.14 veillard 6370: /*
1.45 daniel 6371: * Pop-up of finished entities.
1.14 veillard 6372: */
1.152 daniel 6373: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6374: xmlPopInput(ctxt);
1.135 daniel 6375: SHRINK;
1.45 daniel 6376:
1.113 daniel 6377: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6378: (tok == ctxt->token)) {
1.230 ! veillard 6379: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6381: ctxt->sax->error(ctxt->userData,
1.59 daniel 6382: "detected an error in element content\n");
6383: ctxt->wellFormed = 0;
1.180 daniel 6384: ctxt->disableSAX = 1;
1.224 veillard 6385: ctxt->instate = XML_PARSER_EOF;
1.29 daniel 6386: break;
6387: }
1.3 veillard 6388: }
1.2 veillard 6389: }
6390:
1.50 daniel 6391: /**
6392: * xmlParseElement:
6393: * @ctxt: an XML parser context
6394: *
6395: * parse an XML element, this is highly recursive
1.26 daniel 6396: *
6397: * [39] element ::= EmptyElemTag | STag content ETag
6398: *
1.98 daniel 6399: * [ WFC: Element Type Match ]
6400: * The Name in an element's end-tag must match the element type in the
6401: * start-tag.
6402: *
6403: * [ VC: Element Valid ]
1.117 daniel 6404: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6405: * where the Name matches the element type and one of the following holds:
6406: * - The declaration matches EMPTY and the element has no content.
6407: * - The declaration matches children and the sequence of child elements
6408: * belongs to the language generated by the regular expression in the
6409: * content model, with optional white space (characters matching the
6410: * nonterminal S) between each pair of child elements.
6411: * - The declaration matches Mixed and the content consists of character
6412: * data and child elements whose types match names in the content model.
6413: * - The declaration matches ANY, and the types of any child elements have
6414: * been declared.
1.2 veillard 6415: */
1.26 daniel 6416:
1.72 daniel 6417: void
1.69 daniel 6418: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6419: const xmlChar *openTag = CUR_PTR;
6420: xmlChar *name;
1.140 daniel 6421: xmlChar *oldname;
1.32 daniel 6422: xmlParserNodeInfo node_info;
1.118 daniel 6423: xmlNodePtr ret;
1.2 veillard 6424:
1.32 daniel 6425: /* Capture start position */
1.118 daniel 6426: if (ctxt->record_info) {
6427: node_info.begin_pos = ctxt->input->consumed +
6428: (CUR_PTR - ctxt->input->base);
6429: node_info.begin_line = ctxt->input->line;
6430: }
1.32 daniel 6431:
1.176 daniel 6432: if (ctxt->spaceNr == 0)
6433: spacePush(ctxt, -1);
6434: else
6435: spacePush(ctxt, *ctxt->space);
6436:
1.83 daniel 6437: name = xmlParseStartTag(ctxt);
6438: if (name == NULL) {
1.176 daniel 6439: spacePop(ctxt);
1.83 daniel 6440: return;
6441: }
1.140 daniel 6442: namePush(ctxt, name);
1.118 daniel 6443: ret = ctxt->node;
1.2 veillard 6444:
6445: /*
1.99 daniel 6446: * [ VC: Root Element Type ]
6447: * The Name in the document type declaration must match the element
6448: * type of the root element.
6449: */
1.105 daniel 6450: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6451: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6452: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6453:
6454: /*
1.2 veillard 6455: * Check for an Empty Element.
6456: */
1.152 daniel 6457: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6458: SKIP(2);
1.171 daniel 6459: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6460: (!ctxt->disableSAX))
1.83 daniel 6461: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6462: oldname = namePop(ctxt);
1.176 daniel 6463: spacePop(ctxt);
1.140 daniel 6464: if (oldname != NULL) {
6465: #ifdef DEBUG_STACK
6466: fprintf(stderr,"Close: popped %s\n", oldname);
6467: #endif
6468: xmlFree(oldname);
1.211 veillard 6469: }
6470: if ( ret != NULL && ctxt->record_info ) {
6471: node_info.end_pos = ctxt->input->consumed +
6472: (CUR_PTR - ctxt->input->base);
6473: node_info.end_line = ctxt->input->line;
6474: node_info.node = ret;
6475: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 6476: }
1.72 daniel 6477: return;
1.2 veillard 6478: }
1.152 daniel 6479: if (RAW == '>') {
1.91 daniel 6480: NEXT;
6481: } else {
1.230 ! veillard 6482: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6483: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6484: ctxt->sax->error(ctxt->userData,
6485: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6486: openTag);
1.59 daniel 6487: ctxt->wellFormed = 0;
1.180 daniel 6488: ctxt->disableSAX = 1;
1.45 daniel 6489:
6490: /*
6491: * end of parsing of this node.
6492: */
6493: nodePop(ctxt);
1.140 daniel 6494: oldname = namePop(ctxt);
1.176 daniel 6495: spacePop(ctxt);
1.140 daniel 6496: if (oldname != NULL) {
6497: #ifdef DEBUG_STACK
6498: fprintf(stderr,"Close: popped %s\n", oldname);
6499: #endif
6500: xmlFree(oldname);
6501: }
1.118 daniel 6502:
6503: /*
6504: * Capture end position and add node
6505: */
6506: if ( ret != NULL && ctxt->record_info ) {
6507: node_info.end_pos = ctxt->input->consumed +
6508: (CUR_PTR - ctxt->input->base);
6509: node_info.end_line = ctxt->input->line;
6510: node_info.node = ret;
6511: xmlParserAddNodeInfo(ctxt, &node_info);
6512: }
1.72 daniel 6513: return;
1.2 veillard 6514: }
6515:
6516: /*
6517: * Parse the content of the element:
6518: */
1.45 daniel 6519: xmlParseContent(ctxt);
1.153 daniel 6520: if (!IS_CHAR(RAW)) {
1.230 ! veillard 6521: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.55 daniel 6522: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6523: ctxt->sax->error(ctxt->userData,
1.57 daniel 6524: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6525: ctxt->wellFormed = 0;
1.180 daniel 6526: ctxt->disableSAX = 1;
1.45 daniel 6527:
6528: /*
6529: * end of parsing of this node.
6530: */
6531: nodePop(ctxt);
1.140 daniel 6532: oldname = namePop(ctxt);
1.176 daniel 6533: spacePop(ctxt);
1.140 daniel 6534: if (oldname != NULL) {
6535: #ifdef DEBUG_STACK
6536: fprintf(stderr,"Close: popped %s\n", oldname);
6537: #endif
6538: xmlFree(oldname);
6539: }
1.72 daniel 6540: return;
1.2 veillard 6541: }
6542:
6543: /*
1.27 daniel 6544: * parse the end of tag: '</' should be here.
1.2 veillard 6545: */
1.140 daniel 6546: xmlParseEndTag(ctxt);
1.118 daniel 6547:
6548: /*
6549: * Capture end position and add node
6550: */
6551: if ( ret != NULL && ctxt->record_info ) {
6552: node_info.end_pos = ctxt->input->consumed +
6553: (CUR_PTR - ctxt->input->base);
6554: node_info.end_line = ctxt->input->line;
6555: node_info.node = ret;
6556: xmlParserAddNodeInfo(ctxt, &node_info);
6557: }
1.2 veillard 6558: }
6559:
1.50 daniel 6560: /**
6561: * xmlParseVersionNum:
6562: * @ctxt: an XML parser context
6563: *
6564: * parse the XML version value.
1.29 daniel 6565: *
6566: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6567: *
6568: * Returns the string giving the XML version number, or NULL
1.29 daniel 6569: */
1.123 daniel 6570: xmlChar *
1.55 daniel 6571: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6572: xmlChar *buf = NULL;
6573: int len = 0;
6574: int size = 10;
6575: xmlChar cur;
1.29 daniel 6576:
1.135 daniel 6577: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6578: if (buf == NULL) {
6579: fprintf(stderr, "malloc of %d byte failed\n", size);
6580: return(NULL);
6581: }
6582: cur = CUR;
1.152 daniel 6583: while (((cur >= 'a') && (cur <= 'z')) ||
6584: ((cur >= 'A') && (cur <= 'Z')) ||
6585: ((cur >= '0') && (cur <= '9')) ||
6586: (cur == '_') || (cur == '.') ||
6587: (cur == ':') || (cur == '-')) {
1.135 daniel 6588: if (len + 1 >= size) {
6589: size *= 2;
1.204 veillard 6590: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6591: if (buf == NULL) {
6592: fprintf(stderr, "realloc of %d byte failed\n", size);
6593: return(NULL);
6594: }
6595: }
6596: buf[len++] = cur;
6597: NEXT;
6598: cur=CUR;
6599: }
6600: buf[len] = 0;
6601: return(buf);
1.29 daniel 6602: }
6603:
1.50 daniel 6604: /**
6605: * xmlParseVersionInfo:
6606: * @ctxt: an XML parser context
6607: *
6608: * parse the XML version.
1.29 daniel 6609: *
6610: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6611: *
6612: * [25] Eq ::= S? '=' S?
1.50 daniel 6613: *
1.68 daniel 6614: * Returns the version string, e.g. "1.0"
1.29 daniel 6615: */
6616:
1.123 daniel 6617: xmlChar *
1.55 daniel 6618: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6619: xmlChar *version = NULL;
6620: const xmlChar *q;
1.29 daniel 6621:
1.152 daniel 6622: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6623: (NXT(2) == 'r') && (NXT(3) == 's') &&
6624: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6625: (NXT(6) == 'n')) {
6626: SKIP(7);
1.42 daniel 6627: SKIP_BLANKS;
1.152 daniel 6628: if (RAW != '=') {
1.230 ! veillard 6629: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6630: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6631: ctxt->sax->error(ctxt->userData,
6632: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6633: ctxt->wellFormed = 0;
1.180 daniel 6634: ctxt->disableSAX = 1;
1.31 daniel 6635: return(NULL);
6636: }
1.40 daniel 6637: NEXT;
1.42 daniel 6638: SKIP_BLANKS;
1.152 daniel 6639: if (RAW == '"') {
1.40 daniel 6640: NEXT;
6641: q = CUR_PTR;
1.29 daniel 6642: version = xmlParseVersionNum(ctxt);
1.152 daniel 6643: if (RAW != '"') {
1.230 ! veillard 6644: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6646: ctxt->sax->error(ctxt->userData,
6647: "String not closed\n%.50s\n", q);
1.59 daniel 6648: ctxt->wellFormed = 0;
1.180 daniel 6649: ctxt->disableSAX = 1;
1.55 daniel 6650: } else
1.40 daniel 6651: NEXT;
1.152 daniel 6652: } else if (RAW == '\''){
1.40 daniel 6653: NEXT;
6654: q = CUR_PTR;
1.29 daniel 6655: version = xmlParseVersionNum(ctxt);
1.152 daniel 6656: if (RAW != '\'') {
1.230 ! veillard 6657: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6659: ctxt->sax->error(ctxt->userData,
6660: "String not closed\n%.50s\n", q);
1.59 daniel 6661: ctxt->wellFormed = 0;
1.180 daniel 6662: ctxt->disableSAX = 1;
1.55 daniel 6663: } else
1.40 daniel 6664: NEXT;
1.31 daniel 6665: } else {
1.230 ! veillard 6666: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6668: ctxt->sax->error(ctxt->userData,
1.59 daniel 6669: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6670: ctxt->wellFormed = 0;
1.180 daniel 6671: ctxt->disableSAX = 1;
1.29 daniel 6672: }
6673: }
6674: return(version);
6675: }
6676:
1.50 daniel 6677: /**
6678: * xmlParseEncName:
6679: * @ctxt: an XML parser context
6680: *
6681: * parse the XML encoding name
1.29 daniel 6682: *
6683: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 6684: *
1.68 daniel 6685: * Returns the encoding name value or NULL
1.29 daniel 6686: */
1.123 daniel 6687: xmlChar *
1.55 daniel 6688: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 6689: xmlChar *buf = NULL;
6690: int len = 0;
6691: int size = 10;
6692: xmlChar cur;
1.29 daniel 6693:
1.135 daniel 6694: cur = CUR;
6695: if (((cur >= 'a') && (cur <= 'z')) ||
6696: ((cur >= 'A') && (cur <= 'Z'))) {
6697: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6698: if (buf == NULL) {
6699: fprintf(stderr, "malloc of %d byte failed\n", size);
6700: return(NULL);
6701: }
6702:
6703: buf[len++] = cur;
1.40 daniel 6704: NEXT;
1.135 daniel 6705: cur = CUR;
1.152 daniel 6706: while (((cur >= 'a') && (cur <= 'z')) ||
6707: ((cur >= 'A') && (cur <= 'Z')) ||
6708: ((cur >= '0') && (cur <= '9')) ||
6709: (cur == '.') || (cur == '_') ||
6710: (cur == '-')) {
1.135 daniel 6711: if (len + 1 >= size) {
6712: size *= 2;
1.204 veillard 6713: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6714: if (buf == NULL) {
6715: fprintf(stderr, "realloc of %d byte failed\n", size);
6716: return(NULL);
6717: }
6718: }
6719: buf[len++] = cur;
6720: NEXT;
6721: cur = CUR;
6722: if (cur == 0) {
6723: SHRINK;
6724: GROW;
6725: cur = CUR;
6726: }
6727: }
6728: buf[len] = 0;
1.29 daniel 6729: } else {
1.230 ! veillard 6730: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.55 daniel 6731: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6732: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 6733: ctxt->wellFormed = 0;
1.180 daniel 6734: ctxt->disableSAX = 1;
1.29 daniel 6735: }
1.135 daniel 6736: return(buf);
1.29 daniel 6737: }
6738:
1.50 daniel 6739: /**
6740: * xmlParseEncodingDecl:
6741: * @ctxt: an XML parser context
6742: *
6743: * parse the XML encoding declaration
1.29 daniel 6744: *
6745: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 6746: *
1.229 veillard 6747: * this setups the conversion filters.
1.50 daniel 6748: *
1.68 daniel 6749: * Returns the encoding value or NULL
1.29 daniel 6750: */
6751:
1.123 daniel 6752: xmlChar *
1.55 daniel 6753: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6754: xmlChar *encoding = NULL;
6755: const xmlChar *q;
1.29 daniel 6756:
1.42 daniel 6757: SKIP_BLANKS;
1.152 daniel 6758: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 6759: (NXT(2) == 'c') && (NXT(3) == 'o') &&
6760: (NXT(4) == 'd') && (NXT(5) == 'i') &&
6761: (NXT(6) == 'n') && (NXT(7) == 'g')) {
6762: SKIP(8);
1.42 daniel 6763: SKIP_BLANKS;
1.152 daniel 6764: if (RAW != '=') {
1.230 ! veillard 6765: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6766: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6767: ctxt->sax->error(ctxt->userData,
6768: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 6769: ctxt->wellFormed = 0;
1.180 daniel 6770: ctxt->disableSAX = 1;
1.31 daniel 6771: return(NULL);
6772: }
1.40 daniel 6773: NEXT;
1.42 daniel 6774: SKIP_BLANKS;
1.152 daniel 6775: if (RAW == '"') {
1.40 daniel 6776: NEXT;
6777: q = CUR_PTR;
1.29 daniel 6778: encoding = xmlParseEncName(ctxt);
1.152 daniel 6779: if (RAW != '"') {
1.230 ! veillard 6780: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6782: ctxt->sax->error(ctxt->userData,
6783: "String not closed\n%.50s\n", q);
1.59 daniel 6784: ctxt->wellFormed = 0;
1.180 daniel 6785: ctxt->disableSAX = 1;
1.55 daniel 6786: } else
1.40 daniel 6787: NEXT;
1.152 daniel 6788: } else if (RAW == '\''){
1.40 daniel 6789: NEXT;
6790: q = CUR_PTR;
1.29 daniel 6791: encoding = xmlParseEncName(ctxt);
1.152 daniel 6792: if (RAW != '\'') {
1.230 ! veillard 6793: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6794: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6795: ctxt->sax->error(ctxt->userData,
6796: "String not closed\n%.50s\n", q);
1.59 daniel 6797: ctxt->wellFormed = 0;
1.180 daniel 6798: ctxt->disableSAX = 1;
1.55 daniel 6799: } else
1.40 daniel 6800: NEXT;
1.152 daniel 6801: } else if (RAW == '"'){
1.230 ! veillard 6802: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6803: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6804: ctxt->sax->error(ctxt->userData,
1.59 daniel 6805: "xmlParseEncodingDecl : expected ' or \"\n");
6806: ctxt->wellFormed = 0;
1.180 daniel 6807: ctxt->disableSAX = 1;
1.29 daniel 6808: }
1.193 daniel 6809: if (encoding != NULL) {
6810: xmlCharEncoding enc;
6811: xmlCharEncodingHandlerPtr handler;
6812:
1.195 daniel 6813: if (ctxt->input->encoding != NULL)
6814: xmlFree((xmlChar *) ctxt->input->encoding);
6815: ctxt->input->encoding = encoding;
6816:
1.193 daniel 6817: enc = xmlParseCharEncoding((const char *) encoding);
6818: /*
6819: * registered set of known encodings
6820: */
6821: if (enc != XML_CHAR_ENCODING_ERROR) {
6822: xmlSwitchEncoding(ctxt, enc);
6823: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6824: xmlFree(encoding);
6825: return(NULL);
6826: }
6827: } else {
6828: /*
6829: * fallback for unknown encodings
6830: */
6831: handler = xmlFindCharEncodingHandler((const char *) encoding);
6832: if (handler != NULL) {
6833: xmlSwitchToEncoding(ctxt, handler);
6834: } else {
6835: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 6836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6837: ctxt->sax->error(ctxt->userData,
6838: "Unsupported encoding %s\n", encoding);
1.193 daniel 6839: return(NULL);
6840: }
6841: }
6842: }
1.29 daniel 6843: }
6844: return(encoding);
6845: }
6846:
1.50 daniel 6847: /**
6848: * xmlParseSDDecl:
6849: * @ctxt: an XML parser context
6850: *
6851: * parse the XML standalone declaration
1.29 daniel 6852: *
6853: * [32] SDDecl ::= S 'standalone' Eq
6854: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 6855: *
6856: * [ VC: Standalone Document Declaration ]
6857: * TODO The standalone document declaration must have the value "no"
6858: * if any external markup declarations contain declarations of:
6859: * - attributes with default values, if elements to which these
6860: * attributes apply appear in the document without specifications
6861: * of values for these attributes, or
6862: * - entities (other than amp, lt, gt, apos, quot), if references
6863: * to those entities appear in the document, or
6864: * - attributes with values subject to normalization, where the
6865: * attribute appears in the document with a value which will change
6866: * as a result of normalization, or
6867: * - element types with element content, if white space occurs directly
6868: * within any instance of those types.
1.68 daniel 6869: *
6870: * Returns 1 if standalone, 0 otherwise
1.29 daniel 6871: */
6872:
1.55 daniel 6873: int
6874: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 6875: int standalone = -1;
6876:
1.42 daniel 6877: SKIP_BLANKS;
1.152 daniel 6878: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 6879: (NXT(2) == 'a') && (NXT(3) == 'n') &&
6880: (NXT(4) == 'd') && (NXT(5) == 'a') &&
6881: (NXT(6) == 'l') && (NXT(7) == 'o') &&
6882: (NXT(8) == 'n') && (NXT(9) == 'e')) {
6883: SKIP(10);
1.81 daniel 6884: SKIP_BLANKS;
1.152 daniel 6885: if (RAW != '=') {
1.230 ! veillard 6886: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6887: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6888: ctxt->sax->error(ctxt->userData,
1.59 daniel 6889: "XML standalone declaration : expected '='\n");
6890: ctxt->wellFormed = 0;
1.180 daniel 6891: ctxt->disableSAX = 1;
1.32 daniel 6892: return(standalone);
6893: }
1.40 daniel 6894: NEXT;
1.42 daniel 6895: SKIP_BLANKS;
1.152 daniel 6896: if (RAW == '\''){
1.40 daniel 6897: NEXT;
1.152 daniel 6898: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6899: standalone = 0;
1.40 daniel 6900: SKIP(2);
1.152 daniel 6901: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6902: (NXT(2) == 's')) {
1.29 daniel 6903: standalone = 1;
1.40 daniel 6904: SKIP(3);
1.29 daniel 6905: } else {
1.230 ! veillard 6906: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6907: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6908: ctxt->sax->error(ctxt->userData,
6909: "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 6910: ctxt->wellFormed = 0;
1.180 daniel 6911: ctxt->disableSAX = 1;
1.29 daniel 6912: }
1.152 daniel 6913: if (RAW != '\'') {
1.230 ! veillard 6914: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6916: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6917: ctxt->wellFormed = 0;
1.180 daniel 6918: ctxt->disableSAX = 1;
1.55 daniel 6919: } else
1.40 daniel 6920: NEXT;
1.152 daniel 6921: } else if (RAW == '"'){
1.40 daniel 6922: NEXT;
1.152 daniel 6923: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6924: standalone = 0;
1.40 daniel 6925: SKIP(2);
1.152 daniel 6926: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6927: (NXT(2) == 's')) {
1.29 daniel 6928: standalone = 1;
1.40 daniel 6929: SKIP(3);
1.29 daniel 6930: } else {
1.230 ! veillard 6931: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6932: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6933: ctxt->sax->error(ctxt->userData,
1.59 daniel 6934: "standalone accepts only 'yes' or 'no'\n");
6935: ctxt->wellFormed = 0;
1.180 daniel 6936: ctxt->disableSAX = 1;
1.29 daniel 6937: }
1.152 daniel 6938: if (RAW != '"') {
1.230 ! veillard 6939: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6941: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6942: ctxt->wellFormed = 0;
1.180 daniel 6943: ctxt->disableSAX = 1;
1.55 daniel 6944: } else
1.40 daniel 6945: NEXT;
1.37 daniel 6946: } else {
1.230 ! veillard 6947: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6949: ctxt->sax->error(ctxt->userData,
6950: "Standalone value not found\n");
1.59 daniel 6951: ctxt->wellFormed = 0;
1.180 daniel 6952: ctxt->disableSAX = 1;
1.37 daniel 6953: }
1.29 daniel 6954: }
6955: return(standalone);
6956: }
6957:
1.50 daniel 6958: /**
6959: * xmlParseXMLDecl:
6960: * @ctxt: an XML parser context
6961: *
6962: * parse an XML declaration header
1.29 daniel 6963: *
6964: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 6965: */
6966:
1.55 daniel 6967: void
6968: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6969: xmlChar *version;
1.1 veillard 6970:
6971: /*
1.19 daniel 6972: * We know that '<?xml' is here.
1.1 veillard 6973: */
1.40 daniel 6974: SKIP(5);
1.1 veillard 6975:
1.153 daniel 6976: if (!IS_BLANK(RAW)) {
1.230 ! veillard 6977: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6978: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6979: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 6980: ctxt->wellFormed = 0;
1.180 daniel 6981: ctxt->disableSAX = 1;
1.59 daniel 6982: }
1.42 daniel 6983: SKIP_BLANKS;
1.1 veillard 6984:
6985: /*
1.29 daniel 6986: * We should have the VersionInfo here.
1.1 veillard 6987: */
1.29 daniel 6988: version = xmlParseVersionInfo(ctxt);
6989: if (version == NULL)
1.45 daniel 6990: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 6991: ctxt->version = xmlStrdup(version);
1.119 daniel 6992: xmlFree(version);
1.29 daniel 6993:
6994: /*
6995: * We may have the encoding declaration
6996: */
1.153 daniel 6997: if (!IS_BLANK(RAW)) {
1.152 daniel 6998: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6999: SKIP(2);
7000: return;
7001: }
1.230 ! veillard 7002: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7003: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7004: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7005: ctxt->wellFormed = 0;
1.180 daniel 7006: ctxt->disableSAX = 1;
1.59 daniel 7007: }
1.195 daniel 7008: xmlParseEncodingDecl(ctxt);
1.193 daniel 7009: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7010: /*
7011: * The XML REC instructs us to stop parsing right here
7012: */
7013: return;
7014: }
1.1 veillard 7015:
7016: /*
1.29 daniel 7017: * We may have the standalone status.
1.1 veillard 7018: */
1.164 daniel 7019: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 7020: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 7021: SKIP(2);
7022: return;
7023: }
1.230 ! veillard 7024: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 7025: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7026: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 7027: ctxt->wellFormed = 0;
1.180 daniel 7028: ctxt->disableSAX = 1;
1.59 daniel 7029: }
7030: SKIP_BLANKS;
1.167 daniel 7031: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 7032:
1.42 daniel 7033: SKIP_BLANKS;
1.152 daniel 7034: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 7035: SKIP(2);
1.152 daniel 7036: } else if (RAW == '>') {
1.31 daniel 7037: /* Deprecated old WD ... */
1.230 ! veillard 7038: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 7039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7040: ctxt->sax->error(ctxt->userData,
7041: "XML declaration must end-up with '?>'\n");
1.59 daniel 7042: ctxt->wellFormed = 0;
1.180 daniel 7043: ctxt->disableSAX = 1;
1.40 daniel 7044: NEXT;
1.29 daniel 7045: } else {
1.230 ! veillard 7046: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 7047: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 7048: ctxt->sax->error(ctxt->userData,
7049: "parsing XML declaration: '?>' expected\n");
1.59 daniel 7050: ctxt->wellFormed = 0;
1.180 daniel 7051: ctxt->disableSAX = 1;
1.40 daniel 7052: MOVETO_ENDTAG(CUR_PTR);
7053: NEXT;
1.29 daniel 7054: }
1.1 veillard 7055: }
7056:
1.50 daniel 7057: /**
7058: * xmlParseMisc:
7059: * @ctxt: an XML parser context
7060: *
7061: * parse an XML Misc* optionnal field.
1.21 daniel 7062: *
1.22 daniel 7063: * [27] Misc ::= Comment | PI | S
1.1 veillard 7064: */
7065:
1.55 daniel 7066: void
7067: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 7068: while (((RAW == '<') && (NXT(1) == '?')) ||
7069: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7070: (NXT(2) == '-') && (NXT(3) == '-')) ||
7071: IS_BLANK(CUR)) {
1.152 daniel 7072: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 7073: xmlParsePI(ctxt);
1.40 daniel 7074: } else if (IS_BLANK(CUR)) {
7075: NEXT;
1.1 veillard 7076: } else
1.114 daniel 7077: xmlParseComment(ctxt);
1.1 veillard 7078: }
7079: }
7080:
1.50 daniel 7081: /**
1.181 daniel 7082: * xmlParseDocument:
1.50 daniel 7083: * @ctxt: an XML parser context
7084: *
7085: * parse an XML document (and build a tree if using the standard SAX
7086: * interface).
1.21 daniel 7087: *
1.22 daniel 7088: * [1] document ::= prolog element Misc*
1.29 daniel 7089: *
7090: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 7091: *
1.68 daniel 7092: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 7093: * as a result of the parsing.
1.1 veillard 7094: */
7095:
1.55 daniel 7096: int
7097: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 7098: xmlChar start[4];
7099: xmlCharEncoding enc;
7100:
1.45 daniel 7101: xmlDefaultSAXHandlerInit();
7102:
1.91 daniel 7103: GROW;
7104:
1.14 veillard 7105: /*
1.44 daniel 7106: * SAX: beginning of the document processing.
7107: */
1.72 daniel 7108: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 7109: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 7110:
1.156 daniel 7111: /*
7112: * Get the 4 first bytes and decode the charset
7113: * if enc != XML_CHAR_ENCODING_NONE
7114: * plug some encoding conversion routines.
7115: */
7116: start[0] = RAW;
7117: start[1] = NXT(1);
7118: start[2] = NXT(2);
7119: start[3] = NXT(3);
7120: enc = xmlDetectCharEncoding(start, 4);
7121: if (enc != XML_CHAR_ENCODING_NONE) {
7122: xmlSwitchEncoding(ctxt, enc);
7123: }
7124:
1.1 veillard 7125:
1.59 daniel 7126: if (CUR == 0) {
1.230 ! veillard 7127: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7128: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7129: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 7130: ctxt->wellFormed = 0;
1.180 daniel 7131: ctxt->disableSAX = 1;
1.59 daniel 7132: }
1.1 veillard 7133:
7134: /*
7135: * Check for the XMLDecl in the Prolog.
7136: */
1.91 daniel 7137: GROW;
1.152 daniel 7138: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7139: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7140: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 7141:
7142: /*
7143: * Note that we will switch encoding on the fly.
7144: */
1.19 daniel 7145: xmlParseXMLDecl(ctxt);
1.193 daniel 7146: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7147: /*
7148: * The XML REC instructs us to stop parsing right here
7149: */
7150: return(-1);
7151: }
1.167 daniel 7152: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7153: SKIP_BLANKS;
1.1 veillard 7154: } else {
1.72 daniel 7155: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7156: }
1.171 daniel 7157: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 7158: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7159:
7160: /*
7161: * The Misc part of the Prolog
7162: */
1.91 daniel 7163: GROW;
1.16 daniel 7164: xmlParseMisc(ctxt);
1.1 veillard 7165:
7166: /*
1.29 daniel 7167: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7168: * (doctypedecl Misc*)?
7169: */
1.91 daniel 7170: GROW;
1.152 daniel 7171: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7172: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7173: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7174: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7175: (NXT(8) == 'E')) {
1.165 daniel 7176:
1.166 daniel 7177: ctxt->inSubset = 1;
1.22 daniel 7178: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7179: if (RAW == '[') {
1.140 daniel 7180: ctxt->instate = XML_PARSER_DTD;
7181: xmlParseInternalSubset(ctxt);
7182: }
1.165 daniel 7183:
7184: /*
7185: * Create and update the external subset.
7186: */
1.166 daniel 7187: ctxt->inSubset = 2;
1.171 daniel 7188: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7189: (!ctxt->disableSAX))
1.165 daniel 7190: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7191: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7192: ctxt->inSubset = 0;
1.165 daniel 7193:
7194:
1.96 daniel 7195: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7196: xmlParseMisc(ctxt);
1.21 daniel 7197: }
7198:
7199: /*
7200: * Time to start parsing the tree itself
1.1 veillard 7201: */
1.91 daniel 7202: GROW;
1.152 daniel 7203: if (RAW != '<') {
1.230 ! veillard 7204: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7205: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7206: ctxt->sax->error(ctxt->userData,
1.151 daniel 7207: "Start tag expected, '<' not found\n");
1.59 daniel 7208: ctxt->wellFormed = 0;
1.180 daniel 7209: ctxt->disableSAX = 1;
1.140 daniel 7210: ctxt->instate = XML_PARSER_EOF;
7211: } else {
7212: ctxt->instate = XML_PARSER_CONTENT;
7213: xmlParseElement(ctxt);
7214: ctxt->instate = XML_PARSER_EPILOG;
7215:
7216:
7217: /*
7218: * The Misc part at the end
7219: */
7220: xmlParseMisc(ctxt);
7221:
1.152 daniel 7222: if (RAW != 0) {
1.230 ! veillard 7223: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7225: ctxt->sax->error(ctxt->userData,
7226: "Extra content at the end of the document\n");
7227: ctxt->wellFormed = 0;
1.180 daniel 7228: ctxt->disableSAX = 1;
1.140 daniel 7229: }
7230: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7231: }
7232:
1.44 daniel 7233: /*
7234: * SAX: end of the document processing.
7235: */
1.171 daniel 7236: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7237: (!ctxt->disableSAX))
1.74 daniel 7238: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7239:
1.59 daniel 7240: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7241: return(0);
7242: }
7243:
1.229 veillard 7244: /**
7245: * xmlParseExtParsedEnt:
7246: * @ctxt: an XML parser context
7247: *
7248: * parse a genreral parsed entity
7249: * An external general parsed entity is well-formed if it matches the
7250: * production labeled extParsedEnt.
7251: *
7252: * [78] extParsedEnt ::= TextDecl? content
7253: *
7254: * Returns 0, -1 in case of error. the parser context is augmented
7255: * as a result of the parsing.
7256: */
7257:
7258: int
7259: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7260: xmlChar start[4];
7261: xmlCharEncoding enc;
7262:
7263: xmlDefaultSAXHandlerInit();
7264:
7265: GROW;
7266:
7267: /*
7268: * SAX: beginning of the document processing.
7269: */
7270: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7271: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7272:
7273: /*
7274: * Get the 4 first bytes and decode the charset
7275: * if enc != XML_CHAR_ENCODING_NONE
7276: * plug some encoding conversion routines.
7277: */
7278: start[0] = RAW;
7279: start[1] = NXT(1);
7280: start[2] = NXT(2);
7281: start[3] = NXT(3);
7282: enc = xmlDetectCharEncoding(start, 4);
7283: if (enc != XML_CHAR_ENCODING_NONE) {
7284: xmlSwitchEncoding(ctxt, enc);
7285: }
7286:
7287:
7288: if (CUR == 0) {
1.230 ! veillard 7289: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.229 veillard 7290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7291: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7292: ctxt->wellFormed = 0;
7293: ctxt->disableSAX = 1;
7294: }
7295:
7296: /*
7297: * Check for the XMLDecl in the Prolog.
7298: */
7299: GROW;
7300: if ((RAW == '<') && (NXT(1) == '?') &&
7301: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7302: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7303:
7304: /*
7305: * Note that we will switch encoding on the fly.
7306: */
7307: xmlParseXMLDecl(ctxt);
7308: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7309: /*
7310: * The XML REC instructs us to stop parsing right here
7311: */
7312: return(-1);
7313: }
7314: SKIP_BLANKS;
7315: } else {
7316: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7317: }
7318: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7319: ctxt->sax->startDocument(ctxt->userData);
7320:
7321: /*
7322: * Doing validity checking on chunk doesn't make sense
7323: */
7324: ctxt->instate = XML_PARSER_CONTENT;
7325: ctxt->validate = 0;
7326: ctxt->depth = 0;
7327:
7328: xmlParseContent(ctxt);
7329:
7330: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 ! veillard 7331: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.229 veillard 7332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7333: ctxt->sax->error(ctxt->userData,
7334: "chunk is not well balanced\n");
7335: ctxt->wellFormed = 0;
7336: ctxt->disableSAX = 1;
7337: } else if (RAW != 0) {
1.230 ! veillard 7338: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.229 veillard 7339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7340: ctxt->sax->error(ctxt->userData,
7341: "extra content at the end of well balanced chunk\n");
7342: ctxt->wellFormed = 0;
7343: ctxt->disableSAX = 1;
7344: }
7345:
7346: /*
7347: * SAX: end of the document processing.
7348: */
7349: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7350: (!ctxt->disableSAX))
7351: ctxt->sax->endDocument(ctxt->userData);
7352:
7353: if (! ctxt->wellFormed) return(-1);
7354: return(0);
7355: }
7356:
1.98 daniel 7357: /************************************************************************
7358: * *
1.128 daniel 7359: * Progressive parsing interfaces *
7360: * *
7361: ************************************************************************/
7362:
7363: /**
7364: * xmlParseLookupSequence:
7365: * @ctxt: an XML parser context
7366: * @first: the first char to lookup
1.140 daniel 7367: * @next: the next char to lookup or zero
7368: * @third: the next char to lookup or zero
1.128 daniel 7369: *
1.140 daniel 7370: * Try to find if a sequence (first, next, third) or just (first next) or
7371: * (first) is available in the input stream.
7372: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7373: * to avoid rescanning sequences of bytes, it DOES change the state of the
7374: * parser, do not use liberally.
1.128 daniel 7375: *
1.140 daniel 7376: * Returns the index to the current parsing point if the full sequence
7377: * is available, -1 otherwise.
1.128 daniel 7378: */
7379: int
1.140 daniel 7380: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7381: xmlChar next, xmlChar third) {
7382: int base, len;
7383: xmlParserInputPtr in;
7384: const xmlChar *buf;
7385:
7386: in = ctxt->input;
7387: if (in == NULL) return(-1);
7388: base = in->cur - in->base;
7389: if (base < 0) return(-1);
7390: if (ctxt->checkIndex > base)
7391: base = ctxt->checkIndex;
7392: if (in->buf == NULL) {
7393: buf = in->base;
7394: len = in->length;
7395: } else {
7396: buf = in->buf->buffer->content;
7397: len = in->buf->buffer->use;
7398: }
7399: /* take into account the sequence length */
7400: if (third) len -= 2;
7401: else if (next) len --;
7402: for (;base < len;base++) {
7403: if (buf[base] == first) {
7404: if (third != 0) {
7405: if ((buf[base + 1] != next) ||
7406: (buf[base + 2] != third)) continue;
7407: } else if (next != 0) {
7408: if (buf[base + 1] != next) continue;
7409: }
7410: ctxt->checkIndex = 0;
7411: #ifdef DEBUG_PUSH
7412: if (next == 0)
7413: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7414: first, base);
7415: else if (third == 0)
7416: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7417: first, next, base);
7418: else
7419: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7420: first, next, third, base);
7421: #endif
7422: return(base - (in->cur - in->base));
7423: }
7424: }
7425: ctxt->checkIndex = base;
7426: #ifdef DEBUG_PUSH
7427: if (next == 0)
7428: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7429: else if (third == 0)
7430: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7431: else
7432: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7433: #endif
7434: return(-1);
1.128 daniel 7435: }
7436:
7437: /**
1.143 daniel 7438: * xmlParseTryOrFinish:
1.128 daniel 7439: * @ctxt: an XML parser context
1.143 daniel 7440: * @terminate: last chunk indicator
1.128 daniel 7441: *
7442: * Try to progress on parsing
7443: *
7444: * Returns zero if no parsing was possible
7445: */
7446: int
1.143 daniel 7447: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7448: int ret = 0;
1.140 daniel 7449: int avail;
7450: xmlChar cur, next;
7451:
7452: #ifdef DEBUG_PUSH
7453: switch (ctxt->instate) {
7454: case XML_PARSER_EOF:
7455: fprintf(stderr, "PP: try EOF\n"); break;
7456: case XML_PARSER_START:
7457: fprintf(stderr, "PP: try START\n"); break;
7458: case XML_PARSER_MISC:
7459: fprintf(stderr, "PP: try MISC\n");break;
7460: case XML_PARSER_COMMENT:
7461: fprintf(stderr, "PP: try COMMENT\n");break;
7462: case XML_PARSER_PROLOG:
7463: fprintf(stderr, "PP: try PROLOG\n");break;
7464: case XML_PARSER_START_TAG:
7465: fprintf(stderr, "PP: try START_TAG\n");break;
7466: case XML_PARSER_CONTENT:
7467: fprintf(stderr, "PP: try CONTENT\n");break;
7468: case XML_PARSER_CDATA_SECTION:
7469: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7470: case XML_PARSER_END_TAG:
7471: fprintf(stderr, "PP: try END_TAG\n");break;
7472: case XML_PARSER_ENTITY_DECL:
7473: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7474: case XML_PARSER_ENTITY_VALUE:
7475: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7476: case XML_PARSER_ATTRIBUTE_VALUE:
7477: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7478: case XML_PARSER_DTD:
7479: fprintf(stderr, "PP: try DTD\n");break;
7480: case XML_PARSER_EPILOG:
7481: fprintf(stderr, "PP: try EPILOG\n");break;
7482: case XML_PARSER_PI:
7483: fprintf(stderr, "PP: try PI\n");break;
7484: }
7485: #endif
1.128 daniel 7486:
7487: while (1) {
1.140 daniel 7488: /*
7489: * Pop-up of finished entities.
7490: */
1.152 daniel 7491: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7492: xmlPopInput(ctxt);
7493:
1.184 daniel 7494: if (ctxt->input ==NULL) break;
7495: if (ctxt->input->buf == NULL)
7496: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7497: else
1.184 daniel 7498: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7499: if (avail < 1)
7500: goto done;
1.128 daniel 7501: switch (ctxt->instate) {
7502: case XML_PARSER_EOF:
1.140 daniel 7503: /*
7504: * Document parsing is done !
7505: */
7506: goto done;
7507: case XML_PARSER_START:
7508: /*
7509: * Very first chars read from the document flow.
7510: */
1.184 daniel 7511: cur = ctxt->input->cur[0];
1.140 daniel 7512: if (IS_BLANK(cur)) {
7513: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7514: ctxt->sax->setDocumentLocator(ctxt->userData,
7515: &xmlDefaultSAXLocator);
1.230 ! veillard 7516: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.140 daniel 7517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7518: ctxt->sax->error(ctxt->userData,
7519: "Extra spaces at the beginning of the document are not allowed\n");
7520: ctxt->wellFormed = 0;
1.180 daniel 7521: ctxt->disableSAX = 1;
1.140 daniel 7522: SKIP_BLANKS;
7523: ret++;
1.184 daniel 7524: if (ctxt->input->buf == NULL)
7525: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7526: else
1.184 daniel 7527: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7528: }
7529: if (avail < 2)
7530: goto done;
7531:
1.184 daniel 7532: cur = ctxt->input->cur[0];
7533: next = ctxt->input->cur[1];
1.140 daniel 7534: if (cur == 0) {
7535: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7536: ctxt->sax->setDocumentLocator(ctxt->userData,
7537: &xmlDefaultSAXLocator);
1.230 ! veillard 7538: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7539: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7540: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7541: ctxt->wellFormed = 0;
1.180 daniel 7542: ctxt->disableSAX = 1;
1.140 daniel 7543: ctxt->instate = XML_PARSER_EOF;
7544: #ifdef DEBUG_PUSH
7545: fprintf(stderr, "PP: entering EOF\n");
7546: #endif
7547: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7548: ctxt->sax->endDocument(ctxt->userData);
7549: goto done;
7550: }
7551: if ((cur == '<') && (next == '?')) {
7552: /* PI or XML decl */
7553: if (avail < 5) return(ret);
1.143 daniel 7554: if ((!terminate) &&
7555: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7556: return(ret);
7557: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7558: ctxt->sax->setDocumentLocator(ctxt->userData,
7559: &xmlDefaultSAXLocator);
1.184 daniel 7560: if ((ctxt->input->cur[2] == 'x') &&
7561: (ctxt->input->cur[3] == 'm') &&
7562: (ctxt->input->cur[4] == 'l') &&
7563: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 7564: ret += 5;
7565: #ifdef DEBUG_PUSH
7566: fprintf(stderr, "PP: Parsing XML Decl\n");
7567: #endif
7568: xmlParseXMLDecl(ctxt);
1.193 daniel 7569: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7570: /*
7571: * The XML REC instructs us to stop parsing right
7572: * here
7573: */
7574: ctxt->instate = XML_PARSER_EOF;
7575: return(0);
7576: }
1.167 daniel 7577: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7578: if ((ctxt->encoding == NULL) &&
7579: (ctxt->input->encoding != NULL))
7580: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 7581: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7582: (!ctxt->disableSAX))
1.140 daniel 7583: ctxt->sax->startDocument(ctxt->userData);
7584: ctxt->instate = XML_PARSER_MISC;
7585: #ifdef DEBUG_PUSH
7586: fprintf(stderr, "PP: entering MISC\n");
7587: #endif
7588: } else {
7589: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7590: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7591: (!ctxt->disableSAX))
1.140 daniel 7592: ctxt->sax->startDocument(ctxt->userData);
7593: ctxt->instate = XML_PARSER_MISC;
7594: #ifdef DEBUG_PUSH
7595: fprintf(stderr, "PP: entering MISC\n");
7596: #endif
7597: }
7598: } else {
7599: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7600: ctxt->sax->setDocumentLocator(ctxt->userData,
7601: &xmlDefaultSAXLocator);
7602: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7603: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7604: (!ctxt->disableSAX))
1.140 daniel 7605: ctxt->sax->startDocument(ctxt->userData);
7606: ctxt->instate = XML_PARSER_MISC;
7607: #ifdef DEBUG_PUSH
7608: fprintf(stderr, "PP: entering MISC\n");
7609: #endif
7610: }
7611: break;
7612: case XML_PARSER_MISC:
7613: SKIP_BLANKS;
1.184 daniel 7614: if (ctxt->input->buf == NULL)
7615: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7616: else
1.184 daniel 7617: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7618: if (avail < 2)
7619: goto done;
1.184 daniel 7620: cur = ctxt->input->cur[0];
7621: next = ctxt->input->cur[1];
1.140 daniel 7622: if ((cur == '<') && (next == '?')) {
1.143 daniel 7623: if ((!terminate) &&
7624: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7625: goto done;
7626: #ifdef DEBUG_PUSH
7627: fprintf(stderr, "PP: Parsing PI\n");
7628: #endif
7629: xmlParsePI(ctxt);
7630: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7631: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7632: if ((!terminate) &&
7633: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7634: goto done;
7635: #ifdef DEBUG_PUSH
7636: fprintf(stderr, "PP: Parsing Comment\n");
7637: #endif
7638: xmlParseComment(ctxt);
7639: ctxt->instate = XML_PARSER_MISC;
7640: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7641: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7642: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7643: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7644: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 7645: if ((!terminate) &&
7646: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7647: goto done;
7648: #ifdef DEBUG_PUSH
7649: fprintf(stderr, "PP: Parsing internal subset\n");
7650: #endif
1.166 daniel 7651: ctxt->inSubset = 1;
1.140 daniel 7652: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7653: if (RAW == '[') {
1.140 daniel 7654: ctxt->instate = XML_PARSER_DTD;
7655: #ifdef DEBUG_PUSH
7656: fprintf(stderr, "PP: entering DTD\n");
7657: #endif
7658: } else {
1.166 daniel 7659: /*
7660: * Create and update the external subset.
7661: */
7662: ctxt->inSubset = 2;
1.171 daniel 7663: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7664: (ctxt->sax->externalSubset != NULL))
7665: ctxt->sax->externalSubset(ctxt->userData,
7666: ctxt->intSubName, ctxt->extSubSystem,
7667: ctxt->extSubURI);
7668: ctxt->inSubset = 0;
1.140 daniel 7669: ctxt->instate = XML_PARSER_PROLOG;
7670: #ifdef DEBUG_PUSH
7671: fprintf(stderr, "PP: entering PROLOG\n");
7672: #endif
7673: }
7674: } else if ((cur == '<') && (next == '!') &&
7675: (avail < 9)) {
7676: goto done;
7677: } else {
7678: ctxt->instate = XML_PARSER_START_TAG;
7679: #ifdef DEBUG_PUSH
7680: fprintf(stderr, "PP: entering START_TAG\n");
7681: #endif
7682: }
7683: break;
1.128 daniel 7684: case XML_PARSER_PROLOG:
1.140 daniel 7685: SKIP_BLANKS;
1.184 daniel 7686: if (ctxt->input->buf == NULL)
7687: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7688: else
1.184 daniel 7689: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7690: if (avail < 2)
7691: goto done;
1.184 daniel 7692: cur = ctxt->input->cur[0];
7693: next = ctxt->input->cur[1];
1.140 daniel 7694: if ((cur == '<') && (next == '?')) {
1.143 daniel 7695: if ((!terminate) &&
7696: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7697: goto done;
7698: #ifdef DEBUG_PUSH
7699: fprintf(stderr, "PP: Parsing PI\n");
7700: #endif
7701: xmlParsePI(ctxt);
7702: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7703: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7704: if ((!terminate) &&
7705: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7706: goto done;
7707: #ifdef DEBUG_PUSH
7708: fprintf(stderr, "PP: Parsing Comment\n");
7709: #endif
7710: xmlParseComment(ctxt);
7711: ctxt->instate = XML_PARSER_PROLOG;
7712: } else if ((cur == '<') && (next == '!') &&
7713: (avail < 4)) {
7714: goto done;
7715: } else {
7716: ctxt->instate = XML_PARSER_START_TAG;
7717: #ifdef DEBUG_PUSH
7718: fprintf(stderr, "PP: entering START_TAG\n");
7719: #endif
7720: }
7721: break;
7722: case XML_PARSER_EPILOG:
7723: SKIP_BLANKS;
1.184 daniel 7724: if (ctxt->input->buf == NULL)
7725: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7726: else
1.184 daniel 7727: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7728: if (avail < 2)
7729: goto done;
1.184 daniel 7730: cur = ctxt->input->cur[0];
7731: next = ctxt->input->cur[1];
1.140 daniel 7732: if ((cur == '<') && (next == '?')) {
1.143 daniel 7733: if ((!terminate) &&
7734: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7735: goto done;
7736: #ifdef DEBUG_PUSH
7737: fprintf(stderr, "PP: Parsing PI\n");
7738: #endif
7739: xmlParsePI(ctxt);
7740: ctxt->instate = XML_PARSER_EPILOG;
7741: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7742: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7743: if ((!terminate) &&
7744: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7745: goto done;
7746: #ifdef DEBUG_PUSH
7747: fprintf(stderr, "PP: Parsing Comment\n");
7748: #endif
7749: xmlParseComment(ctxt);
7750: ctxt->instate = XML_PARSER_EPILOG;
7751: } else if ((cur == '<') && (next == '!') &&
7752: (avail < 4)) {
7753: goto done;
7754: } else {
1.230 ! veillard 7755: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7756: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7757: ctxt->sax->error(ctxt->userData,
7758: "Extra content at the end of the document\n");
7759: ctxt->wellFormed = 0;
1.180 daniel 7760: ctxt->disableSAX = 1;
1.140 daniel 7761: ctxt->instate = XML_PARSER_EOF;
7762: #ifdef DEBUG_PUSH
7763: fprintf(stderr, "PP: entering EOF\n");
7764: #endif
1.171 daniel 7765: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7766: (!ctxt->disableSAX))
1.140 daniel 7767: ctxt->sax->endDocument(ctxt->userData);
7768: goto done;
7769: }
7770: break;
7771: case XML_PARSER_START_TAG: {
7772: xmlChar *name, *oldname;
7773:
1.184 daniel 7774: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7775: goto done;
1.184 daniel 7776: cur = ctxt->input->cur[0];
1.140 daniel 7777: if (cur != '<') {
1.230 ! veillard 7778: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7779: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7780: ctxt->sax->error(ctxt->userData,
7781: "Start tag expect, '<' not found\n");
7782: ctxt->wellFormed = 0;
1.180 daniel 7783: ctxt->disableSAX = 1;
1.140 daniel 7784: ctxt->instate = XML_PARSER_EOF;
7785: #ifdef DEBUG_PUSH
7786: fprintf(stderr, "PP: entering EOF\n");
7787: #endif
1.171 daniel 7788: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7789: (!ctxt->disableSAX))
1.140 daniel 7790: ctxt->sax->endDocument(ctxt->userData);
7791: goto done;
7792: }
1.143 daniel 7793: if ((!terminate) &&
7794: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7795: goto done;
1.176 daniel 7796: if (ctxt->spaceNr == 0)
7797: spacePush(ctxt, -1);
7798: else
7799: spacePush(ctxt, *ctxt->space);
1.140 daniel 7800: name = xmlParseStartTag(ctxt);
7801: if (name == NULL) {
1.176 daniel 7802: spacePop(ctxt);
1.140 daniel 7803: ctxt->instate = XML_PARSER_EOF;
7804: #ifdef DEBUG_PUSH
7805: fprintf(stderr, "PP: entering EOF\n");
7806: #endif
1.171 daniel 7807: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7808: (!ctxt->disableSAX))
1.140 daniel 7809: ctxt->sax->endDocument(ctxt->userData);
7810: goto done;
7811: }
7812: namePush(ctxt, xmlStrdup(name));
7813:
7814: /*
7815: * [ VC: Root Element Type ]
7816: * The Name in the document type declaration must match
7817: * the element type of the root element.
7818: */
7819: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7820: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7821: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7822:
7823: /*
7824: * Check for an Empty Element.
7825: */
1.152 daniel 7826: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7827: SKIP(2);
1.171 daniel 7828: if ((ctxt->sax != NULL) &&
7829: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 7830: ctxt->sax->endElement(ctxt->userData, name);
7831: xmlFree(name);
7832: oldname = namePop(ctxt);
1.176 daniel 7833: spacePop(ctxt);
1.140 daniel 7834: if (oldname != NULL) {
7835: #ifdef DEBUG_STACK
7836: fprintf(stderr,"Close: popped %s\n", oldname);
7837: #endif
7838: xmlFree(oldname);
7839: }
7840: if (ctxt->name == NULL) {
7841: ctxt->instate = XML_PARSER_EPILOG;
7842: #ifdef DEBUG_PUSH
7843: fprintf(stderr, "PP: entering EPILOG\n");
7844: #endif
7845: } else {
7846: ctxt->instate = XML_PARSER_CONTENT;
7847: #ifdef DEBUG_PUSH
7848: fprintf(stderr, "PP: entering CONTENT\n");
7849: #endif
7850: }
7851: break;
7852: }
1.152 daniel 7853: if (RAW == '>') {
1.140 daniel 7854: NEXT;
7855: } else {
1.230 ! veillard 7856: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.140 daniel 7857: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7858: ctxt->sax->error(ctxt->userData,
7859: "Couldn't find end of Start Tag %s\n",
7860: name);
7861: ctxt->wellFormed = 0;
1.180 daniel 7862: ctxt->disableSAX = 1;
1.140 daniel 7863:
7864: /*
7865: * end of parsing of this node.
7866: */
7867: nodePop(ctxt);
7868: oldname = namePop(ctxt);
1.176 daniel 7869: spacePop(ctxt);
1.140 daniel 7870: if (oldname != NULL) {
7871: #ifdef DEBUG_STACK
7872: fprintf(stderr,"Close: popped %s\n", oldname);
7873: #endif
7874: xmlFree(oldname);
7875: }
7876: }
7877: xmlFree(name);
7878: ctxt->instate = XML_PARSER_CONTENT;
7879: #ifdef DEBUG_PUSH
7880: fprintf(stderr, "PP: entering CONTENT\n");
7881: #endif
7882: break;
7883: }
1.224 veillard 7884: case XML_PARSER_CONTENT: {
7885: const xmlChar *test;
7886: int cons;
7887: xmlChar tok;
7888:
1.140 daniel 7889: /*
7890: * Handle preparsed entities and charRef
7891: */
7892: if (ctxt->token != 0) {
7893: xmlChar cur[2] = { 0 , 0 } ;
7894:
7895: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 7896: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7897: (ctxt->sax->characters != NULL))
1.140 daniel 7898: ctxt->sax->characters(ctxt->userData, cur, 1);
7899: ctxt->token = 0;
7900: }
1.184 daniel 7901: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7902: goto done;
1.184 daniel 7903: cur = ctxt->input->cur[0];
7904: next = ctxt->input->cur[1];
1.224 veillard 7905:
7906: test = CUR_PTR;
7907: cons = ctxt->input->consumed;
7908: tok = ctxt->token;
1.140 daniel 7909: if ((cur == '<') && (next == '?')) {
1.143 daniel 7910: if ((!terminate) &&
7911: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7912: goto done;
7913: #ifdef DEBUG_PUSH
7914: fprintf(stderr, "PP: Parsing PI\n");
7915: #endif
7916: xmlParsePI(ctxt);
7917: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7918: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7919: if ((!terminate) &&
7920: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7921: goto done;
7922: #ifdef DEBUG_PUSH
7923: fprintf(stderr, "PP: Parsing Comment\n");
7924: #endif
7925: xmlParseComment(ctxt);
7926: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 7927: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7928: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7929: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7930: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7931: (ctxt->input->cur[8] == '[')) {
1.140 daniel 7932: SKIP(9);
7933: ctxt->instate = XML_PARSER_CDATA_SECTION;
7934: #ifdef DEBUG_PUSH
7935: fprintf(stderr, "PP: entering CDATA_SECTION\n");
7936: #endif
7937: break;
7938: } else if ((cur == '<') && (next == '!') &&
7939: (avail < 9)) {
7940: goto done;
7941: } else if ((cur == '<') && (next == '/')) {
7942: ctxt->instate = XML_PARSER_END_TAG;
7943: #ifdef DEBUG_PUSH
7944: fprintf(stderr, "PP: entering END_TAG\n");
7945: #endif
7946: break;
7947: } else if (cur == '<') {
7948: ctxt->instate = XML_PARSER_START_TAG;
7949: #ifdef DEBUG_PUSH
7950: fprintf(stderr, "PP: entering START_TAG\n");
7951: #endif
7952: break;
7953: } else if (cur == '&') {
1.143 daniel 7954: if ((!terminate) &&
7955: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 7956: goto done;
7957: #ifdef DEBUG_PUSH
7958: fprintf(stderr, "PP: Parsing Reference\n");
7959: #endif
7960: xmlParseReference(ctxt);
7961: } else {
1.156 daniel 7962: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 7963: /*
1.181 daniel 7964: * Goal of the following test is:
1.140 daniel 7965: * - minimize calls to the SAX 'character' callback
7966: * when they are mergeable
7967: * - handle an problem for isBlank when we only parse
7968: * a sequence of blank chars and the next one is
7969: * not available to check against '<' presence.
7970: * - tries to homogenize the differences in SAX
7971: * callbacks beween the push and pull versions
7972: * of the parser.
7973: */
7974: if ((ctxt->inputNr == 1) &&
7975: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 7976: if ((!terminate) &&
7977: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 7978: goto done;
7979: }
7980: ctxt->checkIndex = 0;
7981: #ifdef DEBUG_PUSH
7982: fprintf(stderr, "PP: Parsing char data\n");
7983: #endif
7984: xmlParseCharData(ctxt, 0);
7985: }
7986: /*
7987: * Pop-up of finished entities.
7988: */
1.152 daniel 7989: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7990: xmlPopInput(ctxt);
1.224 veillard 7991: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7992: (tok == ctxt->token)) {
1.230 ! veillard 7993: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.224 veillard 7994: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7995: ctxt->sax->error(ctxt->userData,
7996: "detected an error in element content\n");
7997: ctxt->wellFormed = 0;
7998: ctxt->disableSAX = 1;
7999: ctxt->instate = XML_PARSER_EOF;
8000: break;
8001: }
1.140 daniel 8002: break;
1.224 veillard 8003: }
1.140 daniel 8004: case XML_PARSER_CDATA_SECTION: {
8005: /*
8006: * The Push mode need to have the SAX callback for
8007: * cdataBlock merge back contiguous callbacks.
8008: */
8009: int base;
8010:
8011: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8012: if (base < 0) {
8013: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 8014: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 8015: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 8016: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 8017: XML_PARSER_BIG_BUFFER_SIZE);
8018: }
8019: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8020: ctxt->checkIndex = 0;
8021: }
8022: goto done;
8023: } else {
1.171 daniel 8024: if ((ctxt->sax != NULL) && (base > 0) &&
8025: (!ctxt->disableSAX)) {
1.140 daniel 8026: if (ctxt->sax->cdataBlock != NULL)
8027: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 8028: ctxt->input->cur, base);
1.140 daniel 8029: }
8030: SKIP(base + 3);
8031: ctxt->checkIndex = 0;
8032: ctxt->instate = XML_PARSER_CONTENT;
8033: #ifdef DEBUG_PUSH
8034: fprintf(stderr, "PP: entering CONTENT\n");
8035: #endif
8036: }
8037: break;
8038: }
1.141 daniel 8039: case XML_PARSER_END_TAG:
1.140 daniel 8040: if (avail < 2)
8041: goto done;
1.143 daniel 8042: if ((!terminate) &&
8043: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 8044: goto done;
8045: xmlParseEndTag(ctxt);
8046: if (ctxt->name == NULL) {
8047: ctxt->instate = XML_PARSER_EPILOG;
8048: #ifdef DEBUG_PUSH
8049: fprintf(stderr, "PP: entering EPILOG\n");
8050: #endif
8051: } else {
8052: ctxt->instate = XML_PARSER_CONTENT;
8053: #ifdef DEBUG_PUSH
8054: fprintf(stderr, "PP: entering CONTENT\n");
8055: #endif
8056: }
8057: break;
8058: case XML_PARSER_DTD: {
8059: /*
8060: * Sorry but progressive parsing of the internal subset
8061: * is not expected to be supported. We first check that
8062: * the full content of the internal subset is available and
8063: * the parsing is launched only at that point.
8064: * Internal subset ends up with "']' S? '>'" in an unescaped
8065: * section and not in a ']]>' sequence which are conditional
8066: * sections (whoever argued to keep that crap in XML deserve
8067: * a place in hell !).
8068: */
8069: int base, i;
8070: xmlChar *buf;
8071: xmlChar quote = 0;
8072:
1.184 daniel 8073: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 8074: if (base < 0) return(0);
8075: if (ctxt->checkIndex > base)
8076: base = ctxt->checkIndex;
1.184 daniel 8077: buf = ctxt->input->buf->buffer->content;
1.202 daniel 8078: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8079: base++) {
1.140 daniel 8080: if (quote != 0) {
8081: if (buf[base] == quote)
8082: quote = 0;
8083: continue;
8084: }
8085: if (buf[base] == '"') {
8086: quote = '"';
8087: continue;
8088: }
8089: if (buf[base] == '\'') {
8090: quote = '\'';
8091: continue;
8092: }
8093: if (buf[base] == ']') {
1.202 daniel 8094: if ((unsigned int) base +1 >=
8095: ctxt->input->buf->buffer->use)
1.140 daniel 8096: break;
8097: if (buf[base + 1] == ']') {
8098: /* conditional crap, skip both ']' ! */
8099: base++;
8100: continue;
8101: }
1.202 daniel 8102: for (i = 0;
8103: (unsigned int) base + i < ctxt->input->buf->buffer->use;
8104: i++) {
1.140 daniel 8105: if (buf[base + i] == '>')
8106: goto found_end_int_subset;
8107: }
8108: break;
8109: }
8110: }
8111: /*
8112: * We didn't found the end of the Internal subset
8113: */
8114: if (quote == 0)
8115: ctxt->checkIndex = base;
8116: #ifdef DEBUG_PUSH
8117: if (next == 0)
8118: fprintf(stderr, "PP: lookup of int subset end filed\n");
8119: #endif
8120: goto done;
8121:
8122: found_end_int_subset:
8123: xmlParseInternalSubset(ctxt);
1.166 daniel 8124: ctxt->inSubset = 2;
1.171 daniel 8125: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8126: (ctxt->sax->externalSubset != NULL))
8127: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8128: ctxt->extSubSystem, ctxt->extSubURI);
8129: ctxt->inSubset = 0;
1.140 daniel 8130: ctxt->instate = XML_PARSER_PROLOG;
8131: ctxt->checkIndex = 0;
8132: #ifdef DEBUG_PUSH
8133: fprintf(stderr, "PP: entering PROLOG\n");
8134: #endif
8135: break;
8136: }
8137: case XML_PARSER_COMMENT:
8138: fprintf(stderr, "PP: internal error, state == COMMENT\n");
8139: ctxt->instate = XML_PARSER_CONTENT;
8140: #ifdef DEBUG_PUSH
8141: fprintf(stderr, "PP: entering CONTENT\n");
8142: #endif
8143: break;
8144: case XML_PARSER_PI:
8145: fprintf(stderr, "PP: internal error, state == PI\n");
8146: ctxt->instate = XML_PARSER_CONTENT;
8147: #ifdef DEBUG_PUSH
8148: fprintf(stderr, "PP: entering CONTENT\n");
8149: #endif
8150: break;
1.128 daniel 8151: case XML_PARSER_ENTITY_DECL:
1.140 daniel 8152: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
8153: ctxt->instate = XML_PARSER_DTD;
8154: #ifdef DEBUG_PUSH
8155: fprintf(stderr, "PP: entering DTD\n");
8156: #endif
8157: break;
1.128 daniel 8158: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8159: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8160: ctxt->instate = XML_PARSER_CONTENT;
8161: #ifdef DEBUG_PUSH
8162: fprintf(stderr, "PP: entering DTD\n");
8163: #endif
8164: break;
1.128 daniel 8165: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8166: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8167: ctxt->instate = XML_PARSER_START_TAG;
8168: #ifdef DEBUG_PUSH
8169: fprintf(stderr, "PP: entering START_TAG\n");
8170: #endif
8171: break;
8172: case XML_PARSER_SYSTEM_LITERAL:
8173: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8174: ctxt->instate = XML_PARSER_START_TAG;
8175: #ifdef DEBUG_PUSH
8176: fprintf(stderr, "PP: entering START_TAG\n");
8177: #endif
8178: break;
1.128 daniel 8179: }
8180: }
1.140 daniel 8181: done:
8182: #ifdef DEBUG_PUSH
8183: fprintf(stderr, "PP: done %d\n", ret);
8184: #endif
1.128 daniel 8185: return(ret);
8186: }
8187:
8188: /**
1.143 daniel 8189: * xmlParseTry:
8190: * @ctxt: an XML parser context
8191: *
8192: * Try to progress on parsing
8193: *
8194: * Returns zero if no parsing was possible
8195: */
8196: int
8197: xmlParseTry(xmlParserCtxtPtr ctxt) {
8198: return(xmlParseTryOrFinish(ctxt, 0));
8199: }
8200:
8201: /**
1.128 daniel 8202: * xmlParseChunk:
8203: * @ctxt: an XML parser context
8204: * @chunk: an char array
8205: * @size: the size in byte of the chunk
8206: * @terminate: last chunk indicator
8207: *
8208: * Parse a Chunk of memory
8209: *
8210: * Returns zero if no error, the xmlParserErrors otherwise.
8211: */
1.140 daniel 8212: int
1.128 daniel 8213: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8214: int terminate) {
1.132 daniel 8215: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8216: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8217: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8218: int cur = ctxt->input->cur - ctxt->input->base;
8219:
1.132 daniel 8220: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8221: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8222: ctxt->input->cur = ctxt->input->base + cur;
8223: #ifdef DEBUG_PUSH
8224: fprintf(stderr, "PP: pushed %d\n", size);
8225: #endif
8226:
1.150 daniel 8227: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8228: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8229: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8230: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8231: if (terminate) {
1.151 daniel 8232: /*
8233: * Check for termination
8234: */
1.140 daniel 8235: if ((ctxt->instate != XML_PARSER_EOF) &&
8236: (ctxt->instate != XML_PARSER_EPILOG)) {
1.230 ! veillard 8237: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 8238: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8239: ctxt->sax->error(ctxt->userData,
8240: "Extra content at the end of the document\n");
8241: ctxt->wellFormed = 0;
1.180 daniel 8242: ctxt->disableSAX = 1;
1.140 daniel 8243: }
8244: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8245: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8246: (!ctxt->disableSAX))
1.140 daniel 8247: ctxt->sax->endDocument(ctxt->userData);
8248: }
8249: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8250: }
8251: return((xmlParserErrors) ctxt->errNo);
8252: }
8253:
8254: /************************************************************************
8255: * *
1.98 daniel 8256: * I/O front end functions to the parser *
8257: * *
8258: ************************************************************************/
1.201 daniel 8259:
8260: /**
1.229 veillard 8261: * xmlStopParser:
1.201 daniel 8262: * @ctxt: an XML parser context
8263: *
8264: * Blocks further parser processing
8265: */
8266: void
8267: xmlStopParser(xmlParserCtxtPtr ctxt) {
8268: ctxt->instate = XML_PARSER_EOF;
8269: if (ctxt->input != NULL)
8270: ctxt->input->cur = BAD_CAST"";
8271: }
1.98 daniel 8272:
1.50 daniel 8273: /**
1.181 daniel 8274: * xmlCreatePushParserCtxt:
1.140 daniel 8275: * @sax: a SAX handler
8276: * @user_data: The user data returned on SAX callbacks
8277: * @chunk: a pointer to an array of chars
8278: * @size: number of chars in the array
8279: * @filename: an optional file name or URI
8280: *
8281: * Create a parser context for using the XML parser in push mode
8282: * To allow content encoding detection, @size should be >= 4
8283: * The value of @filename is used for fetching external entities
8284: * and error/warning reports.
8285: *
8286: * Returns the new parser context or NULL
8287: */
8288: xmlParserCtxtPtr
8289: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8290: const char *chunk, int size, const char *filename) {
8291: xmlParserCtxtPtr ctxt;
8292: xmlParserInputPtr inputStream;
8293: xmlParserInputBufferPtr buf;
8294: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8295:
8296: /*
1.156 daniel 8297: * plug some encoding conversion routines
1.140 daniel 8298: */
8299: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8300: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8301:
8302: buf = xmlAllocParserInputBuffer(enc);
8303: if (buf == NULL) return(NULL);
8304:
8305: ctxt = xmlNewParserCtxt();
8306: if (ctxt == NULL) {
8307: xmlFree(buf);
8308: return(NULL);
8309: }
8310: if (sax != NULL) {
8311: if (ctxt->sax != &xmlDefaultSAXHandler)
8312: xmlFree(ctxt->sax);
8313: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8314: if (ctxt->sax == NULL) {
8315: xmlFree(buf);
8316: xmlFree(ctxt);
8317: return(NULL);
8318: }
8319: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8320: if (user_data != NULL)
8321: ctxt->userData = user_data;
8322: }
8323: if (filename == NULL) {
8324: ctxt->directory = NULL;
8325: } else {
8326: ctxt->directory = xmlParserGetDirectory(filename);
8327: }
8328:
8329: inputStream = xmlNewInputStream(ctxt);
8330: if (inputStream == NULL) {
8331: xmlFreeParserCtxt(ctxt);
8332: return(NULL);
8333: }
8334:
8335: if (filename == NULL)
8336: inputStream->filename = NULL;
8337: else
8338: inputStream->filename = xmlMemStrdup(filename);
8339: inputStream->buf = buf;
8340: inputStream->base = inputStream->buf->buffer->content;
8341: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8342: if (enc != XML_CHAR_ENCODING_NONE) {
8343: xmlSwitchEncoding(ctxt, enc);
8344: }
1.140 daniel 8345:
8346: inputPush(ctxt, inputStream);
8347:
8348: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8349: (ctxt->input->buf != NULL)) {
8350: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8351: #ifdef DEBUG_PUSH
8352: fprintf(stderr, "PP: pushed %d\n", size);
8353: #endif
8354: }
1.190 daniel 8355:
8356: return(ctxt);
8357: }
8358:
8359: /**
8360: * xmlCreateIOParserCtxt:
8361: * @sax: a SAX handler
8362: * @user_data: The user data returned on SAX callbacks
8363: * @ioread: an I/O read function
8364: * @ioclose: an I/O close function
8365: * @ioctx: an I/O handler
8366: * @enc: the charset encoding if known
8367: *
8368: * Create a parser context for using the XML parser with an existing
8369: * I/O stream
8370: *
8371: * Returns the new parser context or NULL
8372: */
8373: xmlParserCtxtPtr
8374: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8375: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8376: void *ioctx, xmlCharEncoding enc) {
8377: xmlParserCtxtPtr ctxt;
8378: xmlParserInputPtr inputStream;
8379: xmlParserInputBufferPtr buf;
8380:
8381: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8382: if (buf == NULL) return(NULL);
8383:
8384: ctxt = xmlNewParserCtxt();
8385: if (ctxt == NULL) {
8386: xmlFree(buf);
8387: return(NULL);
8388: }
8389: if (sax != NULL) {
8390: if (ctxt->sax != &xmlDefaultSAXHandler)
8391: xmlFree(ctxt->sax);
8392: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8393: if (ctxt->sax == NULL) {
8394: xmlFree(buf);
8395: xmlFree(ctxt);
8396: return(NULL);
8397: }
8398: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8399: if (user_data != NULL)
8400: ctxt->userData = user_data;
8401: }
8402:
1.229 veillard 8403: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8404: if (inputStream == NULL) {
8405: xmlFreeParserCtxt(ctxt);
8406: return(NULL);
1.74 daniel 8407: }
1.229 veillard 8408: inputPush(ctxt, inputStream);
1.69 daniel 8409:
1.229 veillard 8410: return(ctxt);
1.1 veillard 8411: }
8412:
1.229 veillard 8413: /************************************************************************
8414: * *
8415: * Front ends when parsing a Dtd *
8416: * *
8417: ************************************************************************/
1.76 daniel 8418:
8419: /**
1.181 daniel 8420: * xmlSAXParseDTD:
1.76 daniel 8421: * @sax: the SAX handler block
8422: * @ExternalID: a NAME* containing the External ID of the DTD
8423: * @SystemID: a NAME* containing the URL to the DTD
8424: *
8425: * Load and parse an external subset.
8426: *
8427: * Returns the resulting xmlDtdPtr or NULL in case of error.
8428: */
8429:
8430: xmlDtdPtr
1.123 daniel 8431: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8432: const xmlChar *SystemID) {
1.76 daniel 8433: xmlDtdPtr ret = NULL;
8434: xmlParserCtxtPtr ctxt;
1.83 daniel 8435: xmlParserInputPtr input = NULL;
1.76 daniel 8436: xmlCharEncoding enc;
8437:
8438: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8439:
1.97 daniel 8440: ctxt = xmlNewParserCtxt();
1.76 daniel 8441: if (ctxt == NULL) {
8442: return(NULL);
8443: }
8444:
8445: /*
8446: * Set-up the SAX context
8447: */
8448: if (sax != NULL) {
1.93 veillard 8449: if (ctxt->sax != NULL)
1.119 daniel 8450: xmlFree(ctxt->sax);
1.76 daniel 8451: ctxt->sax = sax;
8452: ctxt->userData = NULL;
8453: }
8454:
8455: /*
8456: * Ask the Entity resolver to load the damn thing
8457: */
8458:
8459: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8460: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8461: if (input == NULL) {
1.86 daniel 8462: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8463: xmlFreeParserCtxt(ctxt);
8464: return(NULL);
8465: }
8466:
8467: /*
1.156 daniel 8468: * plug some encoding conversion routines here.
1.76 daniel 8469: */
8470: xmlPushInput(ctxt, input);
1.156 daniel 8471: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8472: xmlSwitchEncoding(ctxt, enc);
8473:
1.95 veillard 8474: if (input->filename == NULL)
1.156 daniel 8475: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8476: input->line = 1;
8477: input->col = 1;
8478: input->base = ctxt->input->cur;
8479: input->cur = ctxt->input->cur;
8480: input->free = NULL;
8481:
8482: /*
8483: * let's parse that entity knowing it's an external subset.
8484: */
1.191 daniel 8485: ctxt->inSubset = 2;
8486: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8487: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8488: ExternalID, SystemID);
1.79 daniel 8489: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8490:
8491: if (ctxt->myDoc != NULL) {
8492: if (ctxt->wellFormed) {
1.191 daniel 8493: ret = ctxt->myDoc->extSubset;
8494: ctxt->myDoc->extSubset = NULL;
1.76 daniel 8495: } else {
8496: ret = NULL;
8497: }
8498: xmlFreeDoc(ctxt->myDoc);
8499: ctxt->myDoc = NULL;
8500: }
1.86 daniel 8501: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8502: xmlFreeParserCtxt(ctxt);
8503:
8504: return(ret);
8505: }
8506:
8507: /**
1.181 daniel 8508: * xmlParseDTD:
1.76 daniel 8509: * @ExternalID: a NAME* containing the External ID of the DTD
8510: * @SystemID: a NAME* containing the URL to the DTD
8511: *
8512: * Load and parse an external subset.
8513: *
8514: * Returns the resulting xmlDtdPtr or NULL in case of error.
8515: */
8516:
8517: xmlDtdPtr
1.123 daniel 8518: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8519: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8520: }
8521:
1.229 veillard 8522: /************************************************************************
8523: * *
8524: * Front ends when parsing an Entity *
8525: * *
8526: ************************************************************************/
8527:
1.59 daniel 8528: /**
1.181 daniel 8529: * xmlSAXParseBalancedChunk:
1.144 daniel 8530: * @ctx: an XML parser context (possibly NULL)
8531: * @sax: the SAX handler bloc (possibly NULL)
8532: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8533: * @input: a parser input stream
8534: * @enc: the encoding
8535: *
8536: * Parse a well-balanced chunk of an XML document
8537: * The user has to provide SAX callback block whose routines will be
8538: * called by the parser
8539: * The allowed sequence for the Well Balanced Chunk is the one defined by
8540: * the content production in the XML grammar:
8541: *
8542: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8543: *
1.176 daniel 8544: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 8545: * the error code otherwise
8546: */
8547:
8548: int
8549: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8550: void *user_data, xmlParserInputPtr input,
8551: xmlCharEncoding enc) {
8552: xmlParserCtxtPtr ctxt;
8553: int ret;
8554:
8555: if (input == NULL) return(-1);
8556:
8557: if (ctx != NULL)
8558: ctxt = ctx;
8559: else {
8560: ctxt = xmlNewParserCtxt();
8561: if (ctxt == NULL)
8562: return(-1);
8563: if (sax == NULL)
8564: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8565: }
8566:
8567: /*
8568: * Set-up the SAX context
8569: */
8570: if (sax != NULL) {
8571: if (ctxt->sax != NULL)
8572: xmlFree(ctxt->sax);
8573: ctxt->sax = sax;
8574: ctxt->userData = user_data;
8575: }
8576:
8577: /*
8578: * plug some encoding conversion routines here.
8579: */
8580: xmlPushInput(ctxt, input);
8581: if (enc != XML_CHAR_ENCODING_NONE)
8582: xmlSwitchEncoding(ctxt, enc);
8583:
8584: /*
8585: * let's parse that entity knowing it's an external subset.
8586: */
8587: xmlParseContent(ctxt);
8588: ret = ctxt->errNo;
8589:
8590: if (ctx == NULL) {
8591: if (sax != NULL)
8592: ctxt->sax = NULL;
8593: else
8594: xmlFreeDoc(ctxt->myDoc);
8595: xmlFreeParserCtxt(ctxt);
8596: }
8597: return(ret);
8598: }
8599:
8600: /**
1.213 veillard 8601: * xmlParseCtxtExternalEntity:
8602: * @ctx: the existing parsing context
8603: * @URL: the URL for the entity to load
8604: * @ID: the System ID for the entity to load
8605: * @list: the return value for the set of parsed nodes
8606: *
8607: * Parse an external general entity within an existing parsing context
8608: * An external general parsed entity is well-formed if it matches the
8609: * production labeled extParsedEnt.
8610: *
8611: * [78] extParsedEnt ::= TextDecl? content
8612: *
8613: * Returns 0 if the entity is well formed, -1 in case of args problem and
8614: * the parser error code otherwise
8615: */
8616:
8617: int
8618: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8619: const xmlChar *ID, xmlNodePtr *list) {
8620: xmlParserCtxtPtr ctxt;
8621: xmlDocPtr newDoc;
8622: xmlSAXHandlerPtr oldsax = NULL;
8623: int ret = 0;
8624:
8625: if (ctx->depth > 40) {
8626: return(XML_ERR_ENTITY_LOOP);
8627: }
8628:
8629: if (list != NULL)
8630: *list = NULL;
8631: if ((URL == NULL) && (ID == NULL))
8632: return(-1);
8633: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8634: return(-1);
8635:
8636:
1.228 veillard 8637: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.213 veillard 8638: if (ctxt == NULL) return(-1);
8639: ctxt->userData = ctxt;
8640: oldsax = ctxt->sax;
8641: ctxt->sax = ctx->sax;
8642: newDoc = xmlNewDoc(BAD_CAST "1.0");
8643: if (newDoc == NULL) {
8644: xmlFreeParserCtxt(ctxt);
8645: return(-1);
8646: }
8647: if (ctx->myDoc != NULL) {
8648: newDoc->intSubset = ctx->myDoc->intSubset;
8649: newDoc->extSubset = ctx->myDoc->extSubset;
8650: }
8651: if (ctx->myDoc->URL != NULL) {
8652: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8653: }
8654: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8655: if (newDoc->children == NULL) {
8656: ctxt->sax = oldsax;
8657: xmlFreeParserCtxt(ctxt);
8658: newDoc->intSubset = NULL;
8659: newDoc->extSubset = NULL;
8660: xmlFreeDoc(newDoc);
8661: return(-1);
8662: }
8663: nodePush(ctxt, newDoc->children);
8664: if (ctx->myDoc == NULL) {
8665: ctxt->myDoc = newDoc;
8666: } else {
8667: ctxt->myDoc = ctx->myDoc;
8668: newDoc->children->doc = ctx->myDoc;
8669: }
8670:
8671: /*
8672: * Parse a possible text declaration first
8673: */
8674: GROW;
8675: if ((RAW == '<') && (NXT(1) == '?') &&
8676: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8677: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8678: xmlParseTextDecl(ctxt);
8679: }
8680:
8681: /*
8682: * Doing validity checking on chunk doesn't make sense
8683: */
8684: ctxt->instate = XML_PARSER_CONTENT;
8685: ctxt->validate = ctx->validate;
8686: ctxt->depth = ctx->depth + 1;
8687: ctxt->replaceEntities = ctx->replaceEntities;
8688: if (ctxt->validate) {
8689: ctxt->vctxt.error = ctx->vctxt.error;
8690: ctxt->vctxt.warning = ctx->vctxt.warning;
8691: /* Allocate the Node stack */
8692: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1.228 veillard 8693: if (ctxt->vctxt.nodeTab == NULL) {
8694: fprintf(stderr, "xmlParseCtxtExternalEntity: out of memory\n");
8695: ctxt->validate = 0;
8696: ctxt->vctxt.error = NULL;
8697: ctxt->vctxt.warning = NULL;
8698: } else {
8699: ctxt->vctxt.nodeNr = 0;
8700: ctxt->vctxt.nodeMax = 4;
8701: ctxt->vctxt.node = NULL;
8702: }
1.213 veillard 8703: } else {
8704: ctxt->vctxt.error = NULL;
8705: ctxt->vctxt.warning = NULL;
8706: }
8707:
8708: xmlParseContent(ctxt);
8709:
8710: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 ! veillard 8711: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8712: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8713: ctxt->sax->error(ctxt->userData,
8714: "chunk is not well balanced\n");
8715: ctxt->wellFormed = 0;
8716: ctxt->disableSAX = 1;
8717: } else if (RAW != 0) {
1.230 ! veillard 8718: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.213 veillard 8719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8720: ctxt->sax->error(ctxt->userData,
8721: "extra content at the end of well balanced chunk\n");
8722: ctxt->wellFormed = 0;
8723: ctxt->disableSAX = 1;
8724: }
8725: if (ctxt->node != newDoc->children) {
1.230 ! veillard 8726: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8727: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8728: ctxt->sax->error(ctxt->userData,
8729: "chunk is not well balanced\n");
8730: ctxt->wellFormed = 0;
8731: ctxt->disableSAX = 1;
8732: }
8733:
8734: if (!ctxt->wellFormed) {
8735: if (ctxt->errNo == 0)
8736: ret = 1;
8737: else
8738: ret = ctxt->errNo;
8739: } else {
8740: if (list != NULL) {
8741: xmlNodePtr cur;
8742:
8743: /*
8744: * Return the newly created nodeset after unlinking it from
8745: * they pseudo parent.
8746: */
8747: cur = newDoc->children->children;
8748: *list = cur;
8749: while (cur != NULL) {
8750: cur->parent = NULL;
8751: cur = cur->next;
8752: }
8753: newDoc->children->children = NULL;
8754: }
8755: ret = 0;
8756: }
8757: ctxt->sax = oldsax;
8758: xmlFreeParserCtxt(ctxt);
8759: newDoc->intSubset = NULL;
8760: newDoc->extSubset = NULL;
8761: xmlFreeDoc(newDoc);
8762:
8763: return(ret);
8764: }
8765:
8766: /**
1.181 daniel 8767: * xmlParseExternalEntity:
8768: * @doc: the document the chunk pertains to
8769: * @sax: the SAX handler bloc (possibly NULL)
8770: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8771: * @depth: Used for loop detection, use 0
1.181 daniel 8772: * @URL: the URL for the entity to load
8773: * @ID: the System ID for the entity to load
8774: * @list: the return value for the set of parsed nodes
8775: *
8776: * Parse an external general entity
8777: * An external general parsed entity is well-formed if it matches the
8778: * production labeled extParsedEnt.
8779: *
8780: * [78] extParsedEnt ::= TextDecl? content
8781: *
8782: * Returns 0 if the entity is well formed, -1 in case of args problem and
8783: * the parser error code otherwise
8784: */
8785:
8786: int
8787: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 8788: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 8789: xmlParserCtxtPtr ctxt;
8790: xmlDocPtr newDoc;
8791: xmlSAXHandlerPtr oldsax = NULL;
8792: int ret = 0;
8793:
1.185 daniel 8794: if (depth > 40) {
8795: return(XML_ERR_ENTITY_LOOP);
8796: }
8797:
8798:
1.181 daniel 8799:
8800: if (list != NULL)
8801: *list = NULL;
8802: if ((URL == NULL) && (ID == NULL))
1.213 veillard 8803: return(-1);
8804: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 8805: return(-1);
8806:
8807:
1.228 veillard 8808: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.181 daniel 8809: if (ctxt == NULL) return(-1);
8810: ctxt->userData = ctxt;
8811: if (sax != NULL) {
8812: oldsax = ctxt->sax;
8813: ctxt->sax = sax;
8814: if (user_data != NULL)
8815: ctxt->userData = user_data;
8816: }
8817: newDoc = xmlNewDoc(BAD_CAST "1.0");
8818: if (newDoc == NULL) {
8819: xmlFreeParserCtxt(ctxt);
8820: return(-1);
8821: }
8822: if (doc != NULL) {
8823: newDoc->intSubset = doc->intSubset;
8824: newDoc->extSubset = doc->extSubset;
8825: }
8826: if (doc->URL != NULL) {
8827: newDoc->URL = xmlStrdup(doc->URL);
8828: }
8829: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8830: if (newDoc->children == NULL) {
8831: if (sax != NULL)
8832: ctxt->sax = oldsax;
8833: xmlFreeParserCtxt(ctxt);
8834: newDoc->intSubset = NULL;
8835: newDoc->extSubset = NULL;
8836: xmlFreeDoc(newDoc);
8837: return(-1);
8838: }
8839: nodePush(ctxt, newDoc->children);
8840: if (doc == NULL) {
8841: ctxt->myDoc = newDoc;
8842: } else {
8843: ctxt->myDoc = doc;
8844: newDoc->children->doc = doc;
8845: }
8846:
8847: /*
8848: * Parse a possible text declaration first
8849: */
8850: GROW;
8851: if ((RAW == '<') && (NXT(1) == '?') &&
8852: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8853: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8854: xmlParseTextDecl(ctxt);
8855: }
8856:
8857: /*
8858: * Doing validity checking on chunk doesn't make sense
8859: */
8860: ctxt->instate = XML_PARSER_CONTENT;
8861: ctxt->validate = 0;
1.185 daniel 8862: ctxt->depth = depth;
1.181 daniel 8863:
8864: xmlParseContent(ctxt);
8865:
8866: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 ! veillard 8867: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8868: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8869: ctxt->sax->error(ctxt->userData,
8870: "chunk is not well balanced\n");
8871: ctxt->wellFormed = 0;
8872: ctxt->disableSAX = 1;
8873: } else if (RAW != 0) {
1.230 ! veillard 8874: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.181 daniel 8875: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8876: ctxt->sax->error(ctxt->userData,
8877: "extra content at the end of well balanced chunk\n");
8878: ctxt->wellFormed = 0;
8879: ctxt->disableSAX = 1;
8880: }
8881: if (ctxt->node != newDoc->children) {
1.230 ! veillard 8882: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8883: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8884: ctxt->sax->error(ctxt->userData,
8885: "chunk is not well balanced\n");
8886: ctxt->wellFormed = 0;
8887: ctxt->disableSAX = 1;
8888: }
8889:
8890: if (!ctxt->wellFormed) {
8891: if (ctxt->errNo == 0)
8892: ret = 1;
8893: else
8894: ret = ctxt->errNo;
8895: } else {
8896: if (list != NULL) {
8897: xmlNodePtr cur;
8898:
8899: /*
8900: * Return the newly created nodeset after unlinking it from
8901: * they pseudo parent.
8902: */
8903: cur = newDoc->children->children;
8904: *list = cur;
8905: while (cur != NULL) {
8906: cur->parent = NULL;
8907: cur = cur->next;
8908: }
8909: newDoc->children->children = NULL;
8910: }
8911: ret = 0;
8912: }
8913: if (sax != NULL)
8914: ctxt->sax = oldsax;
8915: xmlFreeParserCtxt(ctxt);
8916: newDoc->intSubset = NULL;
8917: newDoc->extSubset = NULL;
8918: xmlFreeDoc(newDoc);
8919:
8920: return(ret);
8921: }
8922:
8923: /**
8924: * xmlParseBalancedChunk:
1.176 daniel 8925: * @doc: the document the chunk pertains to
8926: * @sax: the SAX handler bloc (possibly NULL)
8927: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8928: * @depth: Used for loop detection, use 0
1.176 daniel 8929: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8930: * @list: the return value for the set of parsed nodes
8931: *
8932: * Parse a well-balanced chunk of an XML document
8933: * called by the parser
8934: * The allowed sequence for the Well Balanced Chunk is the one defined by
8935: * the content production in the XML grammar:
1.144 daniel 8936: *
1.175 daniel 8937: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8938: *
1.176 daniel 8939: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8940: * the parser error code otherwise
1.144 daniel 8941: */
8942:
1.175 daniel 8943: int
8944: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 8945: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 8946: xmlParserCtxtPtr ctxt;
1.175 daniel 8947: xmlDocPtr newDoc;
1.181 daniel 8948: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 8949: int size;
1.176 daniel 8950: int ret = 0;
1.175 daniel 8951:
1.185 daniel 8952: if (depth > 40) {
8953: return(XML_ERR_ENTITY_LOOP);
8954: }
8955:
1.175 daniel 8956:
1.176 daniel 8957: if (list != NULL)
8958: *list = NULL;
8959: if (string == NULL)
8960: return(-1);
8961:
8962: size = xmlStrlen(string);
8963:
1.183 daniel 8964: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 8965: if (ctxt == NULL) return(-1);
8966: ctxt->userData = ctxt;
1.175 daniel 8967: if (sax != NULL) {
1.176 daniel 8968: oldsax = ctxt->sax;
8969: ctxt->sax = sax;
8970: if (user_data != NULL)
8971: ctxt->userData = user_data;
1.175 daniel 8972: }
8973: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 8974: if (newDoc == NULL) {
8975: xmlFreeParserCtxt(ctxt);
8976: return(-1);
8977: }
1.175 daniel 8978: if (doc != NULL) {
8979: newDoc->intSubset = doc->intSubset;
8980: newDoc->extSubset = doc->extSubset;
8981: }
1.176 daniel 8982: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8983: if (newDoc->children == NULL) {
8984: if (sax != NULL)
8985: ctxt->sax = oldsax;
8986: xmlFreeParserCtxt(ctxt);
8987: newDoc->intSubset = NULL;
8988: newDoc->extSubset = NULL;
8989: xmlFreeDoc(newDoc);
8990: return(-1);
8991: }
8992: nodePush(ctxt, newDoc->children);
8993: if (doc == NULL) {
8994: ctxt->myDoc = newDoc;
8995: } else {
8996: ctxt->myDoc = doc;
8997: newDoc->children->doc = doc;
8998: }
8999: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 9000: ctxt->depth = depth;
1.176 daniel 9001:
9002: /*
9003: * Doing validity checking on chunk doesn't make sense
9004: */
9005: ctxt->validate = 0;
9006:
1.175 daniel 9007: xmlParseContent(ctxt);
1.176 daniel 9008:
9009: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 ! veillard 9010: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9011: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9012: ctxt->sax->error(ctxt->userData,
9013: "chunk is not well balanced\n");
9014: ctxt->wellFormed = 0;
1.180 daniel 9015: ctxt->disableSAX = 1;
1.176 daniel 9016: } else if (RAW != 0) {
1.230 ! veillard 9017: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.176 daniel 9018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9019: ctxt->sax->error(ctxt->userData,
9020: "extra content at the end of well balanced chunk\n");
9021: ctxt->wellFormed = 0;
1.180 daniel 9022: ctxt->disableSAX = 1;
1.176 daniel 9023: }
9024: if (ctxt->node != newDoc->children) {
1.230 ! veillard 9025: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9027: ctxt->sax->error(ctxt->userData,
9028: "chunk is not well balanced\n");
9029: ctxt->wellFormed = 0;
1.180 daniel 9030: ctxt->disableSAX = 1;
1.176 daniel 9031: }
1.175 daniel 9032:
1.176 daniel 9033: if (!ctxt->wellFormed) {
9034: if (ctxt->errNo == 0)
9035: ret = 1;
9036: else
9037: ret = ctxt->errNo;
9038: } else {
9039: if (list != NULL) {
9040: xmlNodePtr cur;
1.175 daniel 9041:
1.176 daniel 9042: /*
9043: * Return the newly created nodeset after unlinking it from
9044: * they pseudo parent.
9045: */
9046: cur = newDoc->children->children;
9047: *list = cur;
9048: while (cur != NULL) {
9049: cur->parent = NULL;
9050: cur = cur->next;
9051: }
9052: newDoc->children->children = NULL;
9053: }
9054: ret = 0;
1.175 daniel 9055: }
1.176 daniel 9056: if (sax != NULL)
9057: ctxt->sax = oldsax;
1.175 daniel 9058: xmlFreeParserCtxt(ctxt);
9059: newDoc->intSubset = NULL;
9060: newDoc->extSubset = NULL;
1.176 daniel 9061: xmlFreeDoc(newDoc);
1.175 daniel 9062:
1.176 daniel 9063: return(ret);
1.144 daniel 9064: }
9065:
9066: /**
1.229 veillard 9067: * xmlSAXParseEntity:
9068: * @sax: the SAX handler block
9069: * @filename: the filename
9070: *
9071: * parse an XML external entity out of context and build a tree.
9072: * It use the given SAX function block to handle the parsing callback.
9073: * If sax is NULL, fallback to the default DOM tree building routines.
9074: *
9075: * [78] extParsedEnt ::= TextDecl? content
9076: *
9077: * This correspond to a "Well Balanced" chunk
1.144 daniel 9078: *
1.229 veillard 9079: * Returns the resulting document tree
1.144 daniel 9080: */
9081:
1.229 veillard 9082: xmlDocPtr
9083: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9084: xmlDocPtr ret;
9085: xmlParserCtxtPtr ctxt;
9086: char *directory = NULL;
9087:
9088: ctxt = xmlCreateFileParserCtxt(filename);
9089: if (ctxt == NULL) {
9090: return(NULL);
9091: }
9092: if (sax != NULL) {
9093: if (ctxt->sax != NULL)
9094: xmlFree(ctxt->sax);
9095: ctxt->sax = sax;
9096: ctxt->userData = NULL;
9097: }
9098:
9099: if ((ctxt->directory == NULL) && (directory == NULL))
9100: directory = xmlParserGetDirectory(filename);
9101:
9102: xmlParseExtParsedEnt(ctxt);
9103:
9104: if (ctxt->wellFormed)
9105: ret = ctxt->myDoc;
9106: else {
9107: ret = NULL;
9108: xmlFreeDoc(ctxt->myDoc);
9109: ctxt->myDoc = NULL;
9110: }
9111: if (sax != NULL)
9112: ctxt->sax = NULL;
9113: xmlFreeParserCtxt(ctxt);
9114:
9115: return(ret);
1.144 daniel 9116: }
9117:
9118: /**
1.229 veillard 9119: * xmlParseEntity:
9120: * @filename: the filename
9121: *
9122: * parse an XML external entity out of context and build a tree.
9123: *
9124: * [78] extParsedEnt ::= TextDecl? content
9125: *
9126: * This correspond to a "Well Balanced" chunk
1.59 daniel 9127: *
1.68 daniel 9128: * Returns the resulting document tree
1.59 daniel 9129: */
9130:
1.69 daniel 9131: xmlDocPtr
1.229 veillard 9132: xmlParseEntity(const char *filename) {
9133: return(xmlSAXParseEntity(NULL, filename));
1.55 daniel 9134: }
9135:
9136: /**
1.181 daniel 9137: * xmlCreateEntityParserCtxt:
9138: * @URL: the entity URL
9139: * @ID: the entity PUBLIC ID
9140: * @base: a posible base for the target URI
9141: *
9142: * Create a parser context for an external entity
9143: * Automatic support for ZLIB/Compress compressed document is provided
9144: * by default if found at compile-time.
9145: *
9146: * Returns the new parser context or NULL
9147: */
9148: xmlParserCtxtPtr
9149: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9150: const xmlChar *base) {
9151: xmlParserCtxtPtr ctxt;
9152: xmlParserInputPtr inputStream;
9153: char *directory = NULL;
1.210 veillard 9154: xmlChar *uri;
9155:
1.181 daniel 9156: ctxt = xmlNewParserCtxt();
9157: if (ctxt == NULL) {
9158: return(NULL);
9159: }
9160:
1.210 veillard 9161: uri = xmlBuildURI(URL, base);
9162:
9163: if (uri == NULL) {
9164: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9165: if (inputStream == NULL) {
9166: xmlFreeParserCtxt(ctxt);
9167: return(NULL);
9168: }
9169:
9170: inputPush(ctxt, inputStream);
9171:
9172: if ((ctxt->directory == NULL) && (directory == NULL))
9173: directory = xmlParserGetDirectory((char *)URL);
9174: if ((ctxt->directory == NULL) && (directory != NULL))
9175: ctxt->directory = directory;
9176: } else {
9177: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9178: if (inputStream == NULL) {
9179: xmlFreeParserCtxt(ctxt);
9180: return(NULL);
9181: }
1.181 daniel 9182:
1.210 veillard 9183: inputPush(ctxt, inputStream);
1.181 daniel 9184:
1.210 veillard 9185: if ((ctxt->directory == NULL) && (directory == NULL))
9186: directory = xmlParserGetDirectory((char *)uri);
9187: if ((ctxt->directory == NULL) && (directory != NULL))
9188: ctxt->directory = directory;
9189: xmlFree(uri);
9190: }
1.181 daniel 9191:
9192: return(ctxt);
9193: }
9194:
1.229 veillard 9195: /************************************************************************
9196: * *
9197: * Front ends when parsing from a file *
9198: * *
9199: ************************************************************************/
9200:
1.181 daniel 9201: /**
9202: * xmlCreateFileParserCtxt:
1.50 daniel 9203: * @filename: the filename
9204: *
1.69 daniel 9205: * Create a parser context for a file content.
9206: * Automatic support for ZLIB/Compress compressed document is provided
9207: * by default if found at compile-time.
1.50 daniel 9208: *
1.69 daniel 9209: * Returns the new parser context or NULL
1.9 httpng 9210: */
1.69 daniel 9211: xmlParserCtxtPtr
9212: xmlCreateFileParserCtxt(const char *filename)
9213: {
9214: xmlParserCtxtPtr ctxt;
1.40 daniel 9215: xmlParserInputPtr inputStream;
1.91 daniel 9216: xmlParserInputBufferPtr buf;
1.111 daniel 9217: char *directory = NULL;
1.9 httpng 9218:
1.91 daniel 9219: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.228 veillard 9220: if (buf == NULL) {
9221: return(NULL);
9222: }
1.9 httpng 9223:
1.97 daniel 9224: ctxt = xmlNewParserCtxt();
1.16 daniel 9225: if (ctxt == NULL) {
1.228 veillard 9226: if (xmlDefaultSAXHandler.error != NULL) {
9227: xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9228: }
1.16 daniel 9229: return(NULL);
9230: }
1.97 daniel 9231:
1.96 daniel 9232: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9233: if (inputStream == NULL) {
1.97 daniel 9234: xmlFreeParserCtxt(ctxt);
1.40 daniel 9235: return(NULL);
9236: }
9237:
1.119 daniel 9238: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9239: inputStream->buf = buf;
9240: inputStream->base = inputStream->buf->buffer->content;
9241: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9242:
1.40 daniel 9243: inputPush(ctxt, inputStream);
1.110 daniel 9244: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9245: directory = xmlParserGetDirectory(filename);
9246: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9247: ctxt->directory = directory;
1.106 daniel 9248:
1.69 daniel 9249: return(ctxt);
9250: }
9251:
9252: /**
1.181 daniel 9253: * xmlSAXParseFile:
1.69 daniel 9254: * @sax: the SAX handler block
9255: * @filename: the filename
9256: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9257: * documents
9258: *
9259: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9260: * compressed document is provided by default if found at compile-time.
9261: * It use the given SAX function block to handle the parsing callback.
9262: * If sax is NULL, fallback to the default DOM tree building routines.
9263: *
9264: * Returns the resulting document tree
9265: */
9266:
1.79 daniel 9267: xmlDocPtr
9268: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9269: int recovery) {
9270: xmlDocPtr ret;
1.229 veillard 9271: xmlParserCtxtPtr ctxt;
9272: char *directory = NULL;
9273:
9274: ctxt = xmlCreateFileParserCtxt(filename);
9275: if (ctxt == NULL) {
9276: return(NULL);
9277: }
9278: if (sax != NULL) {
9279: if (ctxt->sax != NULL)
9280: xmlFree(ctxt->sax);
9281: ctxt->sax = sax;
9282: ctxt->userData = NULL;
9283: }
9284:
9285: if ((ctxt->directory == NULL) && (directory == NULL))
9286: directory = xmlParserGetDirectory(filename);
9287: if ((ctxt->directory == NULL) && (directory != NULL))
9288: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9289:
9290: xmlParseDocument(ctxt);
9291:
9292: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9293: else {
9294: ret = NULL;
9295: xmlFreeDoc(ctxt->myDoc);
9296: ctxt->myDoc = NULL;
9297: }
9298: if (sax != NULL)
9299: ctxt->sax = NULL;
9300: xmlFreeParserCtxt(ctxt);
9301:
9302: return(ret);
9303: }
9304:
9305: /**
9306: * xmlRecoverDoc:
9307: * @cur: a pointer to an array of xmlChar
9308: *
9309: * parse an XML in-memory document and build a tree.
9310: * In the case the document is not Well Formed, a tree is built anyway
9311: *
9312: * Returns the resulting document tree
9313: */
9314:
9315: xmlDocPtr
9316: xmlRecoverDoc(xmlChar *cur) {
9317: return(xmlSAXParseDoc(NULL, cur, 1));
9318: }
9319:
9320: /**
9321: * xmlParseFile:
9322: * @filename: the filename
9323: *
9324: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9325: * compressed document is provided by default if found at compile-time.
9326: *
9327: * Returns the resulting document tree
9328: */
9329:
9330: xmlDocPtr
9331: xmlParseFile(const char *filename) {
9332: return(xmlSAXParseFile(NULL, filename, 0));
9333: }
9334:
9335: /**
9336: * xmlRecoverFile:
9337: * @filename: the filename
9338: *
9339: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9340: * compressed document is provided by default if found at compile-time.
9341: * In the case the document is not Well Formed, a tree is built anyway
9342: *
9343: * Returns the resulting document tree
9344: */
9345:
9346: xmlDocPtr
9347: xmlRecoverFile(const char *filename) {
9348: return(xmlSAXParseFile(NULL, filename, 1));
9349: }
9350:
9351:
9352: /**
9353: * xmlSetupParserForBuffer:
9354: * @ctxt: an XML parser context
9355: * @buffer: a xmlChar * buffer
9356: * @filename: a file name
9357: *
9358: * Setup the parser context to parse a new buffer; Clears any prior
9359: * contents from the parser context. The buffer parameter must not be
9360: * NULL, but the filename parameter can be
9361: */
9362: void
9363: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9364: const char* filename)
9365: {
9366: xmlParserInputPtr input;
9367:
9368: input = xmlNewInputStream(ctxt);
9369: if (input == NULL) {
9370: perror("malloc");
9371: xmlFree(ctxt);
9372: return;
9373: }
9374:
9375: xmlClearParserCtxt(ctxt);
9376: if (filename != NULL)
9377: input->filename = xmlMemStrdup(filename);
9378: input->base = buffer;
9379: input->cur = buffer;
9380: inputPush(ctxt, input);
9381: }
9382:
9383: /**
9384: * xmlSAXUserParseFile:
9385: * @sax: a SAX handler
9386: * @user_data: The user data returned on SAX callbacks
9387: * @filename: a file name
9388: *
9389: * parse an XML file and call the given SAX handler routines.
9390: * Automatic support for ZLIB/Compress compressed document is provided
9391: *
9392: * Returns 0 in case of success or a error number otherwise
9393: */
9394: int
9395: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9396: const char *filename) {
9397: int ret = 0;
9398: xmlParserCtxtPtr ctxt;
9399:
9400: ctxt = xmlCreateFileParserCtxt(filename);
9401: if (ctxt == NULL) return -1;
9402: if (ctxt->sax != &xmlDefaultSAXHandler)
9403: xmlFree(ctxt->sax);
9404: ctxt->sax = sax;
9405: if (user_data != NULL)
9406: ctxt->userData = user_data;
9407:
1.16 daniel 9408: xmlParseDocument(ctxt);
1.229 veillard 9409:
9410: if (ctxt->wellFormed)
9411: ret = 0;
1.59 daniel 9412: else {
1.229 veillard 9413: if (ctxt->errNo != 0)
9414: ret = ctxt->errNo;
9415: else
9416: ret = -1;
1.59 daniel 9417: }
1.86 daniel 9418: if (sax != NULL)
1.229 veillard 9419: ctxt->sax = NULL;
1.69 daniel 9420: xmlFreeParserCtxt(ctxt);
1.20 daniel 9421:
1.229 veillard 9422: return ret;
1.20 daniel 9423: }
9424:
1.229 veillard 9425: /************************************************************************
9426: * *
9427: * Front ends when parsing from memory *
9428: * *
9429: ************************************************************************/
1.32 daniel 9430:
1.50 daniel 9431: /**
1.181 daniel 9432: * xmlCreateMemoryParserCtxt:
1.229 veillard 9433: * @buffer: a pointer to a char array
9434: * @size: the size of the array
1.50 daniel 9435: *
1.69 daniel 9436: * Create a parser context for an XML in-memory document.
1.50 daniel 9437: *
1.69 daniel 9438: * Returns the new parser context or NULL
1.20 daniel 9439: */
1.69 daniel 9440: xmlParserCtxtPtr
9441: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9442: xmlParserCtxtPtr ctxt;
1.40 daniel 9443: xmlParserInputPtr input;
1.209 veillard 9444: xmlParserInputBufferPtr buf;
1.40 daniel 9445:
1.229 veillard 9446: if (buffer == NULL)
9447: return(NULL);
9448: if (size <= 0)
1.181 daniel 9449: return(NULL);
1.40 daniel 9450:
1.97 daniel 9451: ctxt = xmlNewParserCtxt();
1.181 daniel 9452: if (ctxt == NULL)
1.20 daniel 9453: return(NULL);
1.97 daniel 9454:
1.209 veillard 9455: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9456: if (buf == NULL) return(NULL);
9457:
1.96 daniel 9458: input = xmlNewInputStream(ctxt);
1.40 daniel 9459: if (input == NULL) {
1.97 daniel 9460: xmlFreeParserCtxt(ctxt);
1.40 daniel 9461: return(NULL);
9462: }
1.20 daniel 9463:
1.40 daniel 9464: input->filename = NULL;
1.209 veillard 9465: input->buf = buf;
9466: input->base = input->buf->buffer->content;
9467: input->cur = input->buf->buffer->content;
1.20 daniel 9468:
1.40 daniel 9469: inputPush(ctxt, input);
1.69 daniel 9470: return(ctxt);
9471: }
9472:
9473: /**
1.181 daniel 9474: * xmlSAXParseMemory:
1.69 daniel 9475: * @sax: the SAX handler block
9476: * @buffer: an pointer to a char array
1.127 daniel 9477: * @size: the size of the array
9478: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9479: * documents
9480: *
9481: * parse an XML in-memory block and use the given SAX function block
9482: * to handle the parsing callback. If sax is NULL, fallback to the default
9483: * DOM tree building routines.
9484: *
9485: * Returns the resulting document tree
9486: */
9487: xmlDocPtr
9488: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9489: xmlDocPtr ret;
9490: xmlParserCtxtPtr ctxt;
9491:
9492: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9493: if (ctxt == NULL) return(NULL);
1.74 daniel 9494: if (sax != NULL) {
9495: ctxt->sax = sax;
9496: ctxt->userData = NULL;
9497: }
1.20 daniel 9498:
9499: xmlParseDocument(ctxt);
1.40 daniel 9500:
1.72 daniel 9501: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9502: else {
9503: ret = NULL;
1.72 daniel 9504: xmlFreeDoc(ctxt->myDoc);
9505: ctxt->myDoc = NULL;
1.59 daniel 9506: }
1.86 daniel 9507: if (sax != NULL)
9508: ctxt->sax = NULL;
1.69 daniel 9509: xmlFreeParserCtxt(ctxt);
1.16 daniel 9510:
1.9 httpng 9511: return(ret);
1.17 daniel 9512: }
9513:
1.55 daniel 9514: /**
1.181 daniel 9515: * xmlParseMemory:
1.68 daniel 9516: * @buffer: an pointer to a char array
1.55 daniel 9517: * @size: the size of the array
9518: *
9519: * parse an XML in-memory block and build a tree.
9520: *
1.68 daniel 9521: * Returns the resulting document tree
1.55 daniel 9522: */
9523:
9524: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9525: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9526: }
9527:
9528: /**
1.181 daniel 9529: * xmlRecoverMemory:
1.68 daniel 9530: * @buffer: an pointer to a char array
1.59 daniel 9531: * @size: the size of the array
9532: *
9533: * parse an XML in-memory block and build a tree.
9534: * In the case the document is not Well Formed, a tree is built anyway
9535: *
1.68 daniel 9536: * Returns the resulting document tree
1.59 daniel 9537: */
9538:
9539: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9540: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9541: }
9542:
1.123 daniel 9543: /**
9544: * xmlSAXUserParseMemory:
9545: * @sax: a SAX handler
9546: * @user_data: The user data returned on SAX callbacks
9547: * @buffer: an in-memory XML document input
1.127 daniel 9548: * @size: the length of the XML document in bytes
1.123 daniel 9549: *
9550: * A better SAX parsing routine.
9551: * parse an XML in-memory buffer and call the given SAX handler routines.
9552: *
9553: * Returns 0 in case of success or a error number otherwise
9554: */
9555: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9556: char *buffer, int size) {
9557: int ret = 0;
9558: xmlParserCtxtPtr ctxt;
1.218 veillard 9559: xmlSAXHandlerPtr oldsax = NULL;
1.123 daniel 9560:
9561: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9562: if (ctxt == NULL) return -1;
1.216 veillard 9563: if (sax != NULL) {
9564: oldsax = ctxt->sax;
9565: ctxt->sax = sax;
9566: }
1.123 daniel 9567: ctxt->userData = user_data;
9568:
9569: xmlParseDocument(ctxt);
9570:
9571: if (ctxt->wellFormed)
9572: ret = 0;
9573: else {
9574: if (ctxt->errNo != 0)
9575: ret = ctxt->errNo;
9576: else
9577: ret = -1;
9578: }
1.216 veillard 9579: if (sax != NULL) {
9580: ctxt->sax = oldsax;
9581: }
1.123 daniel 9582: xmlFreeParserCtxt(ctxt);
9583:
9584: return ret;
9585: }
9586:
1.132 daniel 9587: /**
1.229 veillard 9588: * xmlCreateDocParserCtxt:
9589: * @cur: a pointer to an array of xmlChar
9590: *
9591: * Creates a parser context for an XML in-memory document.
1.132 daniel 9592: *
1.229 veillard 9593: * Returns the new parser context or NULL
1.132 daniel 9594: */
1.229 veillard 9595: xmlParserCtxtPtr
9596: xmlCreateDocParserCtxt(xmlChar *cur) {
9597: int len;
1.132 daniel 9598:
1.229 veillard 9599: if (cur == NULL)
9600: return(NULL);
9601: len = xmlStrlen(cur);
9602: return(xmlCreateMemoryParserCtxt((char *)cur, len));
1.132 daniel 9603: }
1.98 daniel 9604:
1.50 daniel 9605: /**
1.229 veillard 9606: * xmlSAXParseDoc:
9607: * @sax: the SAX handler block
9608: * @cur: a pointer to an array of xmlChar
9609: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9610: * documents
1.50 daniel 9611: *
1.229 veillard 9612: * parse an XML in-memory document and build a tree.
9613: * It use the given SAX function block to handle the parsing callback.
9614: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 9615: *
1.229 veillard 9616: * Returns the resulting document tree
1.32 daniel 9617: */
9618:
1.229 veillard 9619: xmlDocPtr
9620: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9621: xmlDocPtr ret;
9622: xmlParserCtxtPtr ctxt;
9623:
9624: if (cur == NULL) return(NULL);
1.32 daniel 9625:
9626:
1.229 veillard 9627: ctxt = xmlCreateDocParserCtxt(cur);
9628: if (ctxt == NULL) return(NULL);
9629: if (sax != NULL) {
9630: ctxt->sax = sax;
9631: ctxt->userData = NULL;
9632: }
1.32 daniel 9633:
1.229 veillard 9634: xmlParseDocument(ctxt);
9635: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9636: else {
9637: ret = NULL;
9638: xmlFreeDoc(ctxt->myDoc);
9639: ctxt->myDoc = NULL;
9640: }
9641: if (sax != NULL)
9642: ctxt->sax = NULL;
9643: xmlFreeParserCtxt(ctxt);
9644:
9645: return(ret);
1.32 daniel 9646: }
9647:
1.50 daniel 9648: /**
1.229 veillard 9649: * xmlParseDoc:
9650: * @cur: a pointer to an array of xmlChar
1.50 daniel 9651: *
1.229 veillard 9652: * parse an XML in-memory document and build a tree.
1.50 daniel 9653: *
1.229 veillard 9654: * Returns the resulting document tree
1.32 daniel 9655: */
9656:
1.229 veillard 9657: xmlDocPtr
9658: xmlParseDoc(xmlChar *cur) {
9659: return(xmlSAXParseDoc(NULL, cur, 0));
9660: }
1.32 daniel 9661:
9662:
1.229 veillard 9663: /************************************************************************
9664: * *
9665: * Miscellaneous *
9666: * *
9667: ************************************************************************/
1.32 daniel 9668:
1.50 daniel 9669: /**
1.229 veillard 9670: * xmlCleanupParser:
1.50 daniel 9671: *
1.229 veillard 9672: * Cleanup function for the XML parser. It tries to reclaim all
9673: * parsing related global memory allocated for the parser processing.
9674: * It doesn't deallocate any document related memory. Calling this
9675: * function should not prevent reusing the parser.
1.32 daniel 9676: */
1.229 veillard 9677:
1.55 daniel 9678: void
1.229 veillard 9679: xmlCleanupParser(void) {
9680: xmlCleanupCharEncodingHandlers();
9681: xmlCleanupPredefinedEntities();
1.32 daniel 9682: }
1.220 veillard 9683:
9684: /**
9685: * xmlPedanticParserDefault:
9686: * @val: int 0 or 1
9687: *
9688: * Set and return the previous value for enabling pedantic warnings.
9689: *
9690: * Returns the last value for 0 for no substitution, 1 for substitution.
9691: */
9692:
9693: int
9694: xmlPedanticParserDefault(int val) {
9695: int old = xmlPedanticParserDefaultValue;
9696:
9697: xmlPedanticParserDefaultValue = val;
9698: return(old);
9699: }
1.98 daniel 9700:
9701: /**
1.181 daniel 9702: * xmlSubstituteEntitiesDefault:
1.98 daniel 9703: * @val: int 0 or 1
9704: *
9705: * Set and return the previous value for default entity support.
9706: * Initially the parser always keep entity references instead of substituting
9707: * entity values in the output. This function has to be used to change the
9708: * default parser behaviour
9709: * SAX::subtituteEntities() has to be used for changing that on a file by
9710: * file basis.
9711: *
9712: * Returns the last value for 0 for no substitution, 1 for substitution.
9713: */
9714:
9715: int
9716: xmlSubstituteEntitiesDefault(int val) {
9717: int old = xmlSubstituteEntitiesDefaultValue;
9718:
9719: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 9720: return(old);
9721: }
9722:
9723: /**
9724: * xmlKeepBlanksDefault:
9725: * @val: int 0 or 1
9726: *
9727: * Set and return the previous value for default blanks text nodes support.
9728: * The 1.x version of the parser used an heuristic to try to detect
9729: * ignorable white spaces. As a result the SAX callback was generating
9730: * ignorableWhitespace() callbacks instead of characters() one, and when
9731: * using the DOM output text nodes containing those blanks were not generated.
9732: * The 2.x and later version will switch to the XML standard way and
9733: * ignorableWhitespace() are only generated when running the parser in
9734: * validating mode and when the current element doesn't allow CDATA or
9735: * mixed content.
9736: * This function is provided as a way to force the standard behaviour
9737: * on 1.X libs and to switch back to the old mode for compatibility when
9738: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9739: * by using xmlIsBlankNode() commodity function to detect the "empty"
9740: * nodes generated.
9741: * This value also affect autogeneration of indentation when saving code
9742: * if blanks sections are kept, indentation is not generated.
9743: *
9744: * Returns the last value for 0 for no substitution, 1 for substitution.
9745: */
9746:
9747: int
9748: xmlKeepBlanksDefault(int val) {
9749: int old = xmlKeepBlanksDefaultValue;
9750:
9751: xmlKeepBlanksDefaultValue = val;
9752: xmlIndentTreeOutput = !val;
1.98 daniel 9753: return(old);
9754: }
1.77 daniel 9755:
Webmaster