Annotation of XML/parser.c, revision 1.237
1.1 veillard 1: /*
1.229 veillard 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
1.15 veillard 4: *
1.222 veillard 5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
1.229 veillard 13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscelaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAx callbacks or as standalones functions using a preparsed
26: * document.
27: *
1.15 veillard 28: * See Copyright for the status of this software.
29: *
1.60 daniel 30: * Daniel.Veillard@w3.org
1.1 veillard 31: */
32:
1.26 daniel 33: #ifdef WIN32
1.138 daniel 34: #include "win32config.h"
1.226 veillard 35: #define XML_DIR_SEP '\\'
1.26 daniel 36: #else
1.121 daniel 37: #include "config.h"
1.226 veillard 38: #define XML_DIR_SEP '/'
1.26 daniel 39: #endif
1.121 daniel 40:
1.1 veillard 41: #include <stdio.h>
1.204 veillard 42: #include <string.h>
1.121 daniel 43: #ifdef HAVE_CTYPE_H
1.1 veillard 44: #include <ctype.h>
1.121 daniel 45: #endif
46: #ifdef HAVE_STDLIB_H
1.50 daniel 47: #include <stdlib.h>
1.121 daniel 48: #endif
49: #ifdef HAVE_SYS_STAT_H
1.9 httpng 50: #include <sys/stat.h>
1.121 daniel 51: #endif
1.9 httpng 52: #ifdef HAVE_FCNTL_H
53: #include <fcntl.h>
54: #endif
1.10 httpng 55: #ifdef HAVE_UNISTD_H
56: #include <unistd.h>
57: #endif
1.20 daniel 58: #ifdef HAVE_ZLIB_H
59: #include <zlib.h>
60: #endif
1.1 veillard 61:
1.188 daniel 62: #include <libxml/xmlmemory.h>
63: #include <libxml/tree.h>
64: #include <libxml/parser.h>
65: #include <libxml/entities.h>
66: #include <libxml/encoding.h>
67: #include <libxml/valid.h>
68: #include <libxml/parserInternals.h>
69: #include <libxml/xmlIO.h>
1.193 daniel 70: #include <libxml/uri.h>
1.122 daniel 71: #include "xml-error.h"
1.1 veillard 72:
1.140 daniel 73: #define XML_PARSER_BIG_BUFFER_SIZE 1000
74: #define XML_PARSER_BUFFER_SIZE 100
75:
1.229 veillard 76: /*
77: * Various global defaults for parsing
78: */
1.160 daniel 79: int xmlGetWarningsDefaultValue = 1;
1.220 veillard 80: int xmlParserDebugEntities = 0;
1.229 veillard 81: int xmlSubstituteEntitiesDefaultValue = 0;
82: int xmlDoValidityCheckingDefaultValue = 0;
83: int xmlPedanticParserDefaultValue = 0;
84: int xmlKeepBlanksDefaultValue = 1;
1.86 daniel 85:
1.139 daniel 86: /*
87: * List of XML prefixed PI allowed by W3C specs
88: */
89:
90: const char *xmlW3CPIs[] = {
91: "xml-stylesheet",
92: NULL
93: };
1.91 daniel 94:
1.229 veillard 95: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
1.151 daniel 96: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
97: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
98: const xmlChar **str);
1.91 daniel 99:
100:
1.45 daniel 101: /************************************************************************
102: * *
103: * Parser stacks related functions and macros *
104: * *
105: ************************************************************************/
1.79 daniel 106:
1.135 daniel 107: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
108: const xmlChar ** str);
1.79 daniel 109:
1.1 veillard 110: /*
1.40 daniel 111: * Generic function for accessing stacks in the Parser Context
1.1 veillard 112: */
113:
1.140 daniel 114: #define PUSH_AND_POP(scope, type, name) \
115: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 116: if (ctxt->name##Nr >= ctxt->name##Max) { \
117: ctxt->name##Max *= 2; \
1.204 veillard 118: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 119: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
120: if (ctxt->name##Tab == NULL) { \
1.31 daniel 121: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 122: return(0); \
1.31 daniel 123: } \
124: } \
1.40 daniel 125: ctxt->name##Tab[ctxt->name##Nr] = value; \
126: ctxt->name = value; \
127: return(ctxt->name##Nr++); \
1.31 daniel 128: } \
1.140 daniel 129: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 130: type ret; \
1.40 daniel 131: if (ctxt->name##Nr <= 0) return(0); \
132: ctxt->name##Nr--; \
1.50 daniel 133: if (ctxt->name##Nr > 0) \
134: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
135: else \
136: ctxt->name = NULL; \
1.69 daniel 137: ret = ctxt->name##Tab[ctxt->name##Nr]; \
138: ctxt->name##Tab[ctxt->name##Nr] = 0; \
139: return(ret); \
1.31 daniel 140: } \
141:
1.229 veillard 142: /*
143: * Those macros actually generate the functions
144: */
1.140 daniel 145: PUSH_AND_POP(extern, xmlParserInputPtr, input)
146: PUSH_AND_POP(extern, xmlNodePtr, node)
147: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 148:
1.176 daniel 149: int spacePush(xmlParserCtxtPtr ctxt, int val) {
150: if (ctxt->spaceNr >= ctxt->spaceMax) {
151: ctxt->spaceMax *= 2;
1.204 veillard 152: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 153: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
154: if (ctxt->spaceTab == NULL) {
155: fprintf(stderr, "realloc failed !\n");
156: return(0);
157: }
158: }
159: ctxt->spaceTab[ctxt->spaceNr] = val;
160: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
161: return(ctxt->spaceNr++);
162: }
163:
164: int spacePop(xmlParserCtxtPtr ctxt) {
165: int ret;
166: if (ctxt->spaceNr <= 0) return(0);
167: ctxt->spaceNr--;
168: if (ctxt->spaceNr > 0)
169: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
170: else
171: ctxt->space = NULL;
172: ret = ctxt->spaceTab[ctxt->spaceNr];
173: ctxt->spaceTab[ctxt->spaceNr] = -1;
174: return(ret);
175: }
176:
1.55 daniel 177: /*
178: * Macros for accessing the content. Those should be used only by the parser,
179: * and not exported.
180: *
1.229 veillard 181: * Dirty macros, i.e. one often need to make assumption on the context to
182: * use them
1.55 daniel 183: *
1.123 daniel 184: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 185: * To be used with extreme caution since operations consuming
186: * characters may move the input buffer to a different location !
1.123 daniel 187: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.151 daniel 188: * This should be used internally by the parser
1.55 daniel 189: * only to compare to ASCII values otherwise it would break when
190: * running with UTF-8 encoding.
1.229 veillard 191: * RAW same as CUR but in the input buffer, bypass any token
192: * extraction that may have been done
1.123 daniel 193: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 194: * to compare on ASCII based substring.
1.123 daniel 195: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 196: * strings within the parser.
197: *
1.77 daniel 198: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 199: *
200: * NEXT Skip to the next character, this does the proper decoding
201: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.229 veillard 202: * NEXTL(l) Skip l xmlChars in the input buffer
203: * CUR_CHAR(l) returns the current unicode character (int), set l
204: * to the number of xmlChars used for the encoding [0-5].
205: * CUR_SCHAR same but operate on a string instead of the context
206: * COPY_BUF copy the current unicode char to the target buffer, increment
207: * the index
208: * GROW, SHRINK handling of input buffers
1.55 daniel 209: */
1.45 daniel 210:
1.152 daniel 211: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 212: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 213: #define NXT(val) ctxt->input->cur[(val)]
214: #define CUR_PTR ctxt->input->cur
1.154 daniel 215:
1.164 daniel 216: #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
217: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 218: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
1.168 daniel 219: if ((*ctxt->input->cur == 0) && \
220: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
221: xmlPopInput(ctxt)
1.164 daniel 222:
1.97 daniel 223: #define SHRINK xmlParserInputShrink(ctxt->input); \
224: if ((*ctxt->input->cur == 0) && \
225: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
226: xmlPopInput(ctxt)
227:
228: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
229: if ((*ctxt->input->cur == 0) && \
230: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
231: xmlPopInput(ctxt)
1.55 daniel 232:
1.155 daniel 233: #define SKIP_BLANKS xmlSkipBlankChars(ctxt);
1.154 daniel 234:
1.151 daniel 235: #define NEXT xmlNextChar(ctxt);
1.154 daniel 236:
1.153 daniel 237: #define NEXTL(l) \
238: if (*(ctxt->input->cur) == '\n') { \
239: ctxt->input->line++; ctxt->input->col = 1; \
240: } else ctxt->input->col++; \
1.154 daniel 241: ctxt->token = 0; ctxt->input->cur += l; \
242: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 243: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.154 daniel 244:
1.152 daniel 245: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
1.162 daniel 246: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
1.154 daniel 247:
1.152 daniel 248: #define COPY_BUF(l,b,i,v) \
249: if (l == 1) b[i++] = (xmlChar) v; \
250: else i += xmlCopyChar(l,&b[i],v);
1.151 daniel 251:
252: /**
1.229 veillard 253: * xmlSkipBlankChars:
1.151 daniel 254: * @ctxt: the XML parser context
255: *
1.229 veillard 256: * skip all blanks character found at that point in the input streams.
257: * It pops up finished entities in the process if allowable at that point.
258: *
259: * Returns the number of space chars skipped
1.151 daniel 260: */
1.55 daniel 261:
1.229 veillard 262: int
263: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
264: int cur, res = 0;
1.201 daniel 265:
1.176 daniel 266: /*
1.229 veillard 267: * It's Okay to use CUR/NEXT here since all the blanks are on
268: * the ASCII range.
269: */
270: do {
271: cur = CUR;
272: while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
273: NEXT;
274: cur = CUR;
275: res++;
1.151 daniel 276: }
1.229 veillard 277: while ((cur == 0) && (ctxt->inputNr > 1) &&
278: (ctxt->instate != XML_PARSER_COMMENT)) {
1.168 daniel 279: xmlPopInput(ctxt);
1.229 veillard 280: cur = CUR;
281: }
1.222 veillard 282: /*
283: * Need to handle support of entities branching here
284: */
1.155 daniel 285: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1.229 veillard 286: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.222 veillard 287: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1.155 daniel 288: return(res);
1.152 daniel 289: }
290:
1.97 daniel 291: /************************************************************************
292: * *
1.229 veillard 293: * Commodity functions to handle entities *
1.97 daniel 294: * *
295: ************************************************************************/
1.40 daniel 296:
1.50 daniel 297: /**
298: * xmlPopInput:
299: * @ctxt: an XML parser context
300: *
1.40 daniel 301: * xmlPopInput: the current input pointed by ctxt->input came to an end
302: * pop it and return the next char.
1.45 daniel 303: *
1.123 daniel 304: * Returns the current xmlChar in the parser context
1.40 daniel 305: */
1.123 daniel 306: xmlChar
1.55 daniel 307: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 308: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.220 veillard 309: if (xmlParserDebugEntities)
310: fprintf(stderr, "Popping input %d\n", ctxt->inputNr);
1.69 daniel 311: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 312: if ((*ctxt->input->cur == 0) &&
313: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
314: return(xmlPopInput(ctxt));
1.40 daniel 315: return(CUR);
316: }
317:
1.50 daniel 318: /**
1.229 veillard 319: * xmlPushInput:
1.174 daniel 320: * @ctxt: an XML parser context
1.229 veillard 321: * @input: an XML parser input fragment (entity, XML fragment ...).
1.174 daniel 322: *
1.229 veillard 323: * xmlPushInput: switch to a new input stream which is stacked on top
324: * of the previous one(s).
1.174 daniel 325: */
1.229 veillard 326: void
327: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
328: if (input == NULL) return;
1.174 daniel 329:
1.229 veillard 330: if (xmlParserDebugEntities) {
331: if ((ctxt->input != NULL) && (ctxt->input->filename))
332: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
333: ctxt->input->line);
334: fprintf(stderr, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
335: }
336: inputPush(ctxt, input);
337: GROW;
1.174 daniel 338: }
1.97 daniel 339:
340: /**
341: * xmlParseCharRef:
342: * @ctxt: an XML parser context
343: *
344: * parse Reference declarations
345: *
346: * [66] CharRef ::= '&#' [0-9]+ ';' |
347: * '&#x' [0-9a-fA-F]+ ';'
348: *
1.98 daniel 349: * [ WFC: Legal Character ]
350: * Characters referred to using character references must match the
351: * production for Char.
352: *
1.135 daniel 353: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 354: */
1.97 daniel 355: int
356: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
357: int val = 0;
1.222 veillard 358: int count = 0;
1.97 daniel 359:
1.111 daniel 360: if (ctxt->token != 0) {
361: val = ctxt->token;
362: ctxt->token = 0;
363: return(val);
364: }
1.222 veillard 365: /*
366: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
367: */
1.152 daniel 368: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 369: (NXT(2) == 'x')) {
370: SKIP(3);
1.222 veillard 371: GROW;
372: while (RAW != ';') { /* loop blocked by count */
373: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 374: val = val * 16 + (CUR - '0');
1.222 veillard 375: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1.97 daniel 376: val = val * 16 + (CUR - 'a') + 10;
1.222 veillard 377: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1.97 daniel 378: val = val * 16 + (CUR - 'A') + 10;
379: else {
1.123 daniel 380: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 381: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
382: ctxt->sax->error(ctxt->userData,
383: "xmlParseCharRef: invalid hexadecimal value\n");
384: ctxt->wellFormed = 0;
1.180 daniel 385: ctxt->disableSAX = 1;
1.97 daniel 386: val = 0;
387: break;
388: }
389: NEXT;
1.222 veillard 390: count++;
1.97 daniel 391: }
1.164 daniel 392: if (RAW == ';') {
393: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
394: ctxt->nbChars ++;
395: ctxt->input->cur++;
396: }
1.152 daniel 397: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 398: SKIP(2);
1.222 veillard 399: GROW;
400: while (RAW != ';') { /* loop blocked by count */
401: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 402: val = val * 10 + (CUR - '0');
403: else {
1.123 daniel 404: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 405: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
406: ctxt->sax->error(ctxt->userData,
407: "xmlParseCharRef: invalid decimal value\n");
408: ctxt->wellFormed = 0;
1.180 daniel 409: ctxt->disableSAX = 1;
1.97 daniel 410: val = 0;
411: break;
412: }
413: NEXT;
1.222 veillard 414: count++;
1.97 daniel 415: }
1.164 daniel 416: if (RAW == ';') {
417: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
418: ctxt->nbChars ++;
419: ctxt->input->cur++;
420: }
1.97 daniel 421: } else {
1.123 daniel 422: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 424: ctxt->sax->error(ctxt->userData,
425: "xmlParseCharRef: invalid value\n");
1.97 daniel 426: ctxt->wellFormed = 0;
1.180 daniel 427: ctxt->disableSAX = 1;
1.97 daniel 428: }
1.229 veillard 429:
430: /*
431: * [ WFC: Legal Character ]
432: * Characters referred to using character references must match the
433: * production for Char.
434: */
435: if (IS_CHAR(val)) {
436: return(val);
437: } else {
438: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 439: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 440: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
441: val);
1.97 daniel 442: ctxt->wellFormed = 0;
1.180 daniel 443: ctxt->disableSAX = 1;
1.97 daniel 444: }
1.229 veillard 445: return(0);
446: }
447:
448: /**
449: * xmlParseStringCharRef:
450: * @ctxt: an XML parser context
451: * @str: a pointer to an index in the string
452: *
453: * parse Reference declarations, variant parsing from a string rather
454: * than an an input flow.
455: *
456: * [66] CharRef ::= '&#' [0-9]+ ';' |
457: * '&#x' [0-9a-fA-F]+ ';'
458: *
459: * [ WFC: Legal Character ]
460: * Characters referred to using character references must match the
461: * production for Char.
462: *
463: * Returns the value parsed (as an int), 0 in case of error, str will be
464: * updated to the current value of the index
465: */
466: int
467: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
468: const xmlChar *ptr;
469: xmlChar cur;
470: int val = 0;
1.98 daniel 471:
1.229 veillard 472: if ((str == NULL) || (*str == NULL)) return(0);
473: ptr = *str;
474: cur = *ptr;
475: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
476: ptr += 3;
477: cur = *ptr;
478: while (cur != ';') { /* Non input consuming loop */
479: if ((cur >= '0') && (cur <= '9'))
480: val = val * 16 + (cur - '0');
481: else if ((cur >= 'a') && (cur <= 'f'))
482: val = val * 16 + (cur - 'a') + 10;
483: else if ((cur >= 'A') && (cur <= 'F'))
484: val = val * 16 + (cur - 'A') + 10;
485: else {
486: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
487: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
488: ctxt->sax->error(ctxt->userData,
489: "xmlParseStringCharRef: invalid hexadecimal value\n");
490: ctxt->wellFormed = 0;
491: ctxt->disableSAX = 1;
492: val = 0;
493: break;
494: }
495: ptr++;
496: cur = *ptr;
497: }
498: if (cur == ';')
499: ptr++;
500: } else if ((cur == '&') && (ptr[1] == '#')){
501: ptr += 2;
502: cur = *ptr;
503: while (cur != ';') { /* Non input consuming loops */
504: if ((cur >= '0') && (cur <= '9'))
505: val = val * 10 + (cur - '0');
506: else {
507: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
509: ctxt->sax->error(ctxt->userData,
510: "xmlParseStringCharRef: invalid decimal value\n");
511: ctxt->wellFormed = 0;
512: ctxt->disableSAX = 1;
513: val = 0;
514: break;
515: }
516: ptr++;
517: cur = *ptr;
518: }
519: if (cur == ';')
520: ptr++;
521: } else {
522: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 524: ctxt->sax->error(ctxt->userData,
525: "xmlParseCharRef: invalid value\n");
1.97 daniel 526: ctxt->wellFormed = 0;
1.180 daniel 527: ctxt->disableSAX = 1;
1.229 veillard 528: return(0);
1.97 daniel 529: }
1.229 veillard 530: *str = ptr;
1.98 daniel 531:
532: /*
1.229 veillard 533: * [ WFC: Legal Character ]
534: * Characters referred to using character references must match the
535: * production for Char.
1.98 daniel 536: */
1.229 veillard 537: if (IS_CHAR(val)) {
538: return(val);
539: } else {
540: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.98 daniel 541: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 542: ctxt->sax->error(ctxt->userData,
543: "CharRef: invalid xmlChar value %d\n", val);
1.98 daniel 544: ctxt->wellFormed = 0;
1.180 daniel 545: ctxt->disableSAX = 1;
1.98 daniel 546: }
1.229 veillard 547: return(0);
1.96 daniel 548: }
549:
550: /**
551: * xmlParserHandlePEReference:
552: * @ctxt: the parser context
553: *
554: * [69] PEReference ::= '%' Name ';'
555: *
1.98 daniel 556: * [ WFC: No Recursion ]
1.229 veillard 557: * A parsed entity must not contain a recursive
1.98 daniel 558: * reference to itself, either directly or indirectly.
559: *
560: * [ WFC: Entity Declared ]
561: * In a document without any DTD, a document with only an internal DTD
562: * subset which contains no parameter entity references, or a document
563: * with "standalone='yes'", ... ... The declaration of a parameter
564: * entity must precede any reference to it...
565: *
566: * [ VC: Entity Declared ]
567: * In a document with an external subset or external parameter entities
568: * with "standalone='no'", ... ... The declaration of a parameter entity
569: * must precede any reference to it...
570: *
571: * [ WFC: In DTD ]
572: * Parameter-entity references may only appear in the DTD.
573: * NOTE: misleading but this is handled.
574: *
575: * A PEReference may have been detected in the current input stream
1.96 daniel 576: * the handling is done accordingly to
577: * http://www.w3.org/TR/REC-xml#entproc
578: * i.e.
579: * - Included in literal in entity values
580: * - Included as Paraemeter Entity reference within DTDs
581: */
582: void
583: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 584: xmlChar *name;
1.96 daniel 585: xmlEntityPtr entity = NULL;
586: xmlParserInputPtr input;
587:
1.126 daniel 588: if (ctxt->token != 0) {
589: return;
590: }
1.152 daniel 591: if (RAW != '%') return;
1.96 daniel 592: switch(ctxt->instate) {
1.109 daniel 593: case XML_PARSER_CDATA_SECTION:
594: return;
1.97 daniel 595: case XML_PARSER_COMMENT:
596: return;
1.140 daniel 597: case XML_PARSER_START_TAG:
598: return;
599: case XML_PARSER_END_TAG:
600: return;
1.96 daniel 601: case XML_PARSER_EOF:
1.123 daniel 602: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 603: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
604: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
605: ctxt->wellFormed = 0;
1.180 daniel 606: ctxt->disableSAX = 1;
1.96 daniel 607: return;
608: case XML_PARSER_PROLOG:
1.140 daniel 609: case XML_PARSER_START:
610: case XML_PARSER_MISC:
1.123 daniel 611: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
613: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
614: ctxt->wellFormed = 0;
1.180 daniel 615: ctxt->disableSAX = 1;
1.96 daniel 616: return;
1.97 daniel 617: case XML_PARSER_ENTITY_DECL:
1.96 daniel 618: case XML_PARSER_CONTENT:
619: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 620: case XML_PARSER_PI:
1.168 daniel 621: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 622: /* we just ignore it there */
623: return;
624: case XML_PARSER_EPILOG:
1.123 daniel 625: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 626: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 627: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 628: ctxt->wellFormed = 0;
1.180 daniel 629: ctxt->disableSAX = 1;
1.96 daniel 630: return;
1.97 daniel 631: case XML_PARSER_ENTITY_VALUE:
632: /*
633: * NOTE: in the case of entity values, we don't do the
1.127 daniel 634: * substitution here since we need the literal
1.97 daniel 635: * entity value to be able to save the internal
636: * subset of the document.
1.222 veillard 637: * This will be handled by xmlStringDecodeEntities
1.97 daniel 638: */
639: return;
1.96 daniel 640: case XML_PARSER_DTD:
1.98 daniel 641: /*
642: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
643: * In the internal DTD subset, parameter-entity references
644: * can occur only where markup declarations can occur, not
645: * within markup declarations.
646: * In that case this is handled in xmlParseMarkupDecl
647: */
648: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
649: return;
1.96 daniel 650: }
651:
652: NEXT;
653: name = xmlParseName(ctxt);
1.220 veillard 654: if (xmlParserDebugEntities)
655: fprintf(stderr, "PE Reference: %s\n", name);
1.96 daniel 656: if (name == NULL) {
1.123 daniel 657: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
659: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
660: ctxt->wellFormed = 0;
1.180 daniel 661: ctxt->disableSAX = 1;
1.96 daniel 662: } else {
1.152 daniel 663: if (RAW == ';') {
1.96 daniel 664: NEXT;
1.98 daniel 665: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
666: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 667: if (entity == NULL) {
1.98 daniel 668:
669: /*
670: * [ WFC: Entity Declared ]
671: * In a document without any DTD, a document with only an
672: * internal DTD subset which contains no parameter entity
673: * references, or a document with "standalone='yes'", ...
674: * ... The declaration of a parameter entity must precede
675: * any reference to it...
676: */
677: if ((ctxt->standalone == 1) ||
678: ((ctxt->hasExternalSubset == 0) &&
679: (ctxt->hasPErefs == 0))) {
680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
681: ctxt->sax->error(ctxt->userData,
682: "PEReference: %%%s; not found\n", name);
683: ctxt->wellFormed = 0;
1.180 daniel 684: ctxt->disableSAX = 1;
1.98 daniel 685: } else {
686: /*
687: * [ VC: Entity Declared ]
688: * In a document with an external subset or external
689: * parameter entities with "standalone='no'", ...
690: * ... The declaration of a parameter entity must precede
691: * any reference to it...
692: */
1.220 veillard 693: if ((!ctxt->disableSAX) &&
694: (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1.212 veillard 695: ctxt->vctxt.error(ctxt->vctxt.userData,
696: "PEReference: %%%s; not found\n", name);
1.220 veillard 697: } else if ((!ctxt->disableSAX) &&
698: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 699: ctxt->sax->warning(ctxt->userData,
700: "PEReference: %%%s; not found\n", name);
701: ctxt->valid = 0;
702: }
1.96 daniel 703: } else {
1.159 daniel 704: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
705: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 706: /*
1.229 veillard 707: * handle the extra spaces added before and after
1.96 daniel 708: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.229 veillard 709: * this is done independantly.
1.96 daniel 710: */
711: input = xmlNewEntityInputStream(ctxt, entity);
712: xmlPushInput(ctxt, input);
1.164 daniel 713: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
714: (RAW == '<') && (NXT(1) == '?') &&
715: (NXT(2) == 'x') && (NXT(3) == 'm') &&
716: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 717: xmlParseTextDecl(ctxt);
1.164 daniel 718: }
719: if (ctxt->token == 0)
720: ctxt->token = ' ';
1.96 daniel 721: } else {
722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723: ctxt->sax->error(ctxt->userData,
724: "xmlHandlePEReference: %s is not a parameter entity\n",
725: name);
726: ctxt->wellFormed = 0;
1.180 daniel 727: ctxt->disableSAX = 1;
1.96 daniel 728: }
729: }
730: } else {
1.123 daniel 731: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
733: ctxt->sax->error(ctxt->userData,
734: "xmlHandlePEReference: expecting ';'\n");
735: ctxt->wellFormed = 0;
1.180 daniel 736: ctxt->disableSAX = 1;
1.96 daniel 737: }
1.119 daniel 738: xmlFree(name);
1.97 daniel 739: }
740: }
741:
742: /*
743: * Macro used to grow the current buffer.
744: */
745: #define growBuffer(buffer) { \
746: buffer##_size *= 2; \
1.145 daniel 747: buffer = (xmlChar *) \
748: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 749: if (buffer == NULL) { \
750: perror("realloc failed"); \
1.145 daniel 751: return(NULL); \
1.97 daniel 752: } \
1.96 daniel 753: }
1.77 daniel 754:
755: /**
1.135 daniel 756: * xmlStringDecodeEntities:
757: * @ctxt: the parser context
758: * @str: the input string
759: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
760: * @end: an end marker xmlChar, 0 if none
761: * @end2: an end marker xmlChar, 0 if none
762: * @end3: an end marker xmlChar, 0 if none
763: *
1.222 veillard 764: * Takes a entity string content and process to do the adequate subtitutions.
765: *
1.135 daniel 766: * [67] Reference ::= EntityRef | CharRef
767: *
768: * [69] PEReference ::= '%' Name ';'
769: *
770: * Returns A newly allocated string with the substitution done. The caller
771: * must deallocate it !
772: */
773: xmlChar *
774: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
775: xmlChar end, xmlChar end2, xmlChar end3) {
776: xmlChar *buffer = NULL;
777: int buffer_size = 0;
778:
779: xmlChar *current = NULL;
780: xmlEntityPtr ent;
1.176 daniel 781: int c,l;
782: int nbchars = 0;
1.135 daniel 783:
1.211 veillard 784: if (str == NULL)
785: return(NULL);
786:
1.185 daniel 787: if (ctxt->depth > 40) {
1.230 veillard 788: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
790: ctxt->sax->error(ctxt->userData,
791: "Detected entity reference loop\n");
792: ctxt->wellFormed = 0;
793: ctxt->disableSAX = 1;
794: return(NULL);
795: }
796:
1.135 daniel 797: /*
798: * allocate a translation buffer.
799: */
1.140 daniel 800: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 801: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
802: if (buffer == NULL) {
803: perror("xmlDecodeEntities: malloc failed");
804: return(NULL);
805: }
806:
807: /*
808: * Ok loop until we reach one of the ending char or a size limit.
1.222 veillard 809: * we are operating on already parsed values.
1.135 daniel 810: */
1.176 daniel 811: c = CUR_SCHAR(str, l);
1.222 veillard 812: while ((c != 0) && (c != end) && /* non input consuming loop */
813: (c != end2) && (c != end3)) {
1.135 daniel 814:
1.176 daniel 815: if (c == 0) break;
816: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 817: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 818: if (val != 0) {
819: COPY_BUF(0,buffer,nbchars,val);
820: }
821: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.220 veillard 822: if (xmlParserDebugEntities)
823: fprintf(stderr, "String decoding Entity Reference: %.30s\n",
824: str);
1.135 daniel 825: ent = xmlParseStringEntityRef(ctxt, &str);
1.222 veillard 826: if ((ent != NULL) &&
827: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.219 veillard 828: if (ent->content != NULL) {
829: COPY_BUF(0,buffer,nbchars,ent->content[0]);
830: } else {
831: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
832: ctxt->sax->error(ctxt->userData,
833: "internal error entity has no content\n");
834: }
835: } else if ((ent != NULL) && (ent->content != NULL)) {
1.185 daniel 836: xmlChar *rep;
837:
838: ctxt->depth++;
839: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
840: 0, 0, 0);
841: ctxt->depth--;
842: if (rep != NULL) {
843: current = rep;
1.222 veillard 844: while (*current != 0) { /* non input consuming loop */
1.185 daniel 845: buffer[nbchars++] = *current++;
846: if (nbchars >
847: buffer_size - XML_PARSER_BUFFER_SIZE) {
848: growBuffer(buffer);
849: }
1.135 daniel 850: }
1.185 daniel 851: xmlFree(rep);
1.135 daniel 852: }
853: } else if (ent != NULL) {
854: int i = xmlStrlen(ent->name);
855: const xmlChar *cur = ent->name;
856:
1.176 daniel 857: buffer[nbchars++] = '&';
858: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 859: growBuffer(buffer);
860: }
861: for (;i > 0;i--)
1.176 daniel 862: buffer[nbchars++] = *cur++;
863: buffer[nbchars++] = ';';
1.135 daniel 864: }
1.176 daniel 865: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.220 veillard 866: if (xmlParserDebugEntities)
867: fprintf(stderr, "String decoding PE Reference: %.30s\n", str);
1.135 daniel 868: ent = xmlParseStringPEReference(ctxt, &str);
869: if (ent != NULL) {
1.185 daniel 870: xmlChar *rep;
871:
872: ctxt->depth++;
873: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
874: 0, 0, 0);
875: ctxt->depth--;
876: if (rep != NULL) {
877: current = rep;
1.222 veillard 878: while (*current != 0) { /* non input consuming loop */
1.185 daniel 879: buffer[nbchars++] = *current++;
880: if (nbchars >
881: buffer_size - XML_PARSER_BUFFER_SIZE) {
882: growBuffer(buffer);
883: }
1.135 daniel 884: }
1.185 daniel 885: xmlFree(rep);
1.135 daniel 886: }
887: }
888: } else {
1.176 daniel 889: COPY_BUF(l,buffer,nbchars,c);
890: str += l;
891: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 892: growBuffer(buffer);
893: }
894: }
1.176 daniel 895: c = CUR_SCHAR(str, l);
1.135 daniel 896: }
1.229 veillard 897: buffer[nbchars++] = 0;
898: return(buffer);
1.172 daniel 899: }
900:
1.229 veillard 901:
902: /************************************************************************
903: * *
1.123 daniel 904: * Commodity functions to handle xmlChars *
1.28 daniel 905: * *
906: ************************************************************************/
907:
1.50 daniel 908: /**
909: * xmlStrndup:
1.123 daniel 910: * @cur: the input xmlChar *
1.50 daniel 911: * @len: the len of @cur
912: *
1.123 daniel 913: * a strndup for array of xmlChar's
1.68 daniel 914: *
1.123 daniel 915: * Returns a new xmlChar * or NULL
1.1 veillard 916: */
1.123 daniel 917: xmlChar *
918: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 919: xmlChar *ret;
920:
921: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 922: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 923: if (ret == NULL) {
1.86 daniel 924: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 925: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 926: return(NULL);
927: }
1.123 daniel 928: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 929: ret[len] = 0;
930: return(ret);
931: }
932:
1.50 daniel 933: /**
934: * xmlStrdup:
1.123 daniel 935: * @cur: the input xmlChar *
1.50 daniel 936: *
1.152 daniel 937: * a strdup for array of xmlChar's. Since they are supposed to be
938: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
939: * a termination mark of '0'.
1.68 daniel 940: *
1.123 daniel 941: * Returns a new xmlChar * or NULL
1.1 veillard 942: */
1.123 daniel 943: xmlChar *
944: xmlStrdup(const xmlChar *cur) {
945: const xmlChar *p = cur;
1.1 veillard 946:
1.135 daniel 947: if (cur == NULL) return(NULL);
1.222 veillard 948: while (*p != 0) p++; /* non input consuming */
1.1 veillard 949: return(xmlStrndup(cur, p - cur));
950: }
951:
1.50 daniel 952: /**
953: * xmlCharStrndup:
954: * @cur: the input char *
955: * @len: the len of @cur
956: *
1.123 daniel 957: * a strndup for char's to xmlChar's
1.68 daniel 958: *
1.123 daniel 959: * Returns a new xmlChar * or NULL
1.45 daniel 960: */
961:
1.123 daniel 962: xmlChar *
1.55 daniel 963: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 964: int i;
1.135 daniel 965: xmlChar *ret;
966:
967: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 968: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 969: if (ret == NULL) {
1.86 daniel 970: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 971: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 972: return(NULL);
973: }
974: for (i = 0;i < len;i++)
1.123 daniel 975: ret[i] = (xmlChar) cur[i];
1.45 daniel 976: ret[len] = 0;
977: return(ret);
978: }
979:
1.50 daniel 980: /**
981: * xmlCharStrdup:
982: * @cur: the input char *
983: * @len: the len of @cur
984: *
1.123 daniel 985: * a strdup for char's to xmlChar's
1.68 daniel 986: *
1.123 daniel 987: * Returns a new xmlChar * or NULL
1.45 daniel 988: */
989:
1.123 daniel 990: xmlChar *
1.55 daniel 991: xmlCharStrdup(const char *cur) {
1.45 daniel 992: const char *p = cur;
993:
1.135 daniel 994: if (cur == NULL) return(NULL);
1.222 veillard 995: while (*p != '\0') p++; /* non input consuming */
1.45 daniel 996: return(xmlCharStrndup(cur, p - cur));
997: }
998:
1.50 daniel 999: /**
1000: * xmlStrcmp:
1.123 daniel 1001: * @str1: the first xmlChar *
1002: * @str2: the second xmlChar *
1.50 daniel 1003: *
1.123 daniel 1004: * a strcmp for xmlChar's
1.68 daniel 1005: *
1006: * Returns the integer result of the comparison
1.14 veillard 1007: */
1008:
1.55 daniel 1009: int
1.123 daniel 1010: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1011: register int tmp;
1012:
1.229 veillard 1013: if (str1 == str2) return(0);
1.135 daniel 1014: if (str1 == NULL) return(-1);
1015: if (str2 == NULL) return(1);
1.14 veillard 1016: do {
1.232 veillard 1017: tmp = *str1++ - *str2;
1.14 veillard 1018: if (tmp != 0) return(tmp);
1.232 veillard 1019: } while (*str2++ != 0);
1020: return 0;
1.14 veillard 1021: }
1022:
1.50 daniel 1023: /**
1.236 veillard 1024: * xmlStrEqual:
1025: * @str1: the first xmlChar *
1026: * @str2: the second xmlChar *
1027: *
1028: * Check if both string are equal of have same content
1029: * Should be a bit more readable and faster than xmlStrEqual()
1030: *
1031: * Returns 1 if they are equal, 0 if they are different
1032: */
1033:
1034: int
1035: xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1036: if (str1 == str2) return(1);
1037: if (str1 == NULL) return(0);
1038: if (str2 == NULL) return(0);
1039: do {
1040: if (*str1++ != *str2) return(0);
1041: } while (*str2++);
1042: return(1);
1043: }
1044:
1045: /**
1.50 daniel 1046: * xmlStrncmp:
1.123 daniel 1047: * @str1: the first xmlChar *
1048: * @str2: the second xmlChar *
1.50 daniel 1049: * @len: the max comparison length
1050: *
1.123 daniel 1051: * a strncmp for xmlChar's
1.68 daniel 1052: *
1053: * Returns the integer result of the comparison
1.14 veillard 1054: */
1055:
1.55 daniel 1056: int
1.123 daniel 1057: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1058: register int tmp;
1059:
1060: if (len <= 0) return(0);
1.232 veillard 1061: if (str1 == str2) return(0);
1.135 daniel 1062: if (str1 == NULL) return(-1);
1063: if (str2 == NULL) return(1);
1.14 veillard 1064: do {
1.232 veillard 1065: tmp = *str1++ - *str2;
1066: if (tmp != 0 || --len == 0) return(tmp);
1067: } while (*str2++ != 0);
1068: return 0;
1069: }
1070:
1071: static xmlChar casemap[256] = {
1072: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1073: 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1074: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1075: 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1076: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1077: 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1078: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1079: 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1080: 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1081: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1082: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1083: 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1084: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1085: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1086: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1087: 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1088: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1089: 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1090: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1091: 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1092: 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1093: 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1094: 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1095: 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1096: 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1097: 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1098: 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1099: 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1100: 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1101: 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1102: 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1103: 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1104: };
1105:
1106: /**
1107: * xmlStrcasecmp:
1108: * @str1: the first xmlChar *
1109: * @str2: the second xmlChar *
1110: *
1111: * a strcasecmp for xmlChar's
1112: *
1113: * Returns the integer result of the comparison
1114: */
1115:
1116: int
1117: xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1118: register int tmp;
1119:
1120: if (str1 == str2) return(0);
1121: if (str1 == NULL) return(-1);
1122: if (str2 == NULL) return(1);
1123: do {
1124: tmp = casemap[*str1++] - casemap[*str2];
1.14 veillard 1125: if (tmp != 0) return(tmp);
1.232 veillard 1126: } while (*str2++ != 0);
1127: return 0;
1128: }
1129:
1130: /**
1131: * xmlStrncasecmp:
1132: * @str1: the first xmlChar *
1133: * @str2: the second xmlChar *
1134: * @len: the max comparison length
1135: *
1136: * a strncasecmp for xmlChar's
1137: *
1138: * Returns the integer result of the comparison
1139: */
1140:
1141: int
1142: xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1143: register int tmp;
1144:
1145: if (len <= 0) return(0);
1146: if (str1 == str2) return(0);
1147: if (str1 == NULL) return(-1);
1148: if (str2 == NULL) return(1);
1149: do {
1150: tmp = casemap[*str1++] - casemap[*str2];
1151: if (tmp != 0 || --len == 0) return(tmp);
1152: } while (*str2++ != 0);
1153: return 0;
1.14 veillard 1154: }
1155:
1.50 daniel 1156: /**
1157: * xmlStrchr:
1.123 daniel 1158: * @str: the xmlChar * array
1159: * @val: the xmlChar to search
1.50 daniel 1160: *
1.123 daniel 1161: * a strchr for xmlChar's
1.68 daniel 1162: *
1.123 daniel 1163: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1164: */
1165:
1.123 daniel 1166: const xmlChar *
1167: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 1168: if (str == NULL) return(NULL);
1.222 veillard 1169: while (*str != 0) { /* non input consuming */
1.123 daniel 1170: if (*str == val) return((xmlChar *) str);
1.14 veillard 1171: str++;
1172: }
1173: return(NULL);
1.89 daniel 1174: }
1175:
1176: /**
1177: * xmlStrstr:
1.123 daniel 1178: * @str: the xmlChar * array (haystack)
1179: * @val: the xmlChar to search (needle)
1.89 daniel 1180: *
1.123 daniel 1181: * a strstr for xmlChar's
1.89 daniel 1182: *
1.123 daniel 1183: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1184: */
1185:
1.123 daniel 1186: const xmlChar *
1187: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1188: int n;
1189:
1190: if (str == NULL) return(NULL);
1191: if (val == NULL) return(NULL);
1192: n = xmlStrlen(val);
1193:
1194: if (n == 0) return(str);
1.222 veillard 1195: while (*str != 0) { /* non input consuming */
1.89 daniel 1196: if (*str == *val) {
1.123 daniel 1197: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1198: }
1.232 veillard 1199: str++;
1200: }
1201: return(NULL);
1202: }
1203:
1204: /**
1205: * xmlStrcasestr:
1206: * @str: the xmlChar * array (haystack)
1207: * @val: the xmlChar to search (needle)
1208: *
1209: * a case-ignoring strstr for xmlChar's
1210: *
1211: * Returns the xmlChar * for the first occurence or NULL.
1212: */
1213:
1214: const xmlChar *
1215: xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1216: int n;
1217:
1218: if (str == NULL) return(NULL);
1219: if (val == NULL) return(NULL);
1220: n = xmlStrlen(val);
1221:
1222: if (n == 0) return(str);
1223: while (*str != 0) { /* non input consuming */
1224: if (casemap[*str] == casemap[*val])
1225: if (!xmlStrncasecmp(str, val, n)) return(str);
1.89 daniel 1226: str++;
1227: }
1228: return(NULL);
1229: }
1230:
1231: /**
1232: * xmlStrsub:
1.123 daniel 1233: * @str: the xmlChar * array (haystack)
1.89 daniel 1234: * @start: the index of the first char (zero based)
1235: * @len: the length of the substring
1236: *
1237: * Extract a substring of a given string
1238: *
1.123 daniel 1239: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1240: */
1241:
1.123 daniel 1242: xmlChar *
1243: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1244: int i;
1245:
1246: if (str == NULL) return(NULL);
1247: if (start < 0) return(NULL);
1.90 daniel 1248: if (len < 0) return(NULL);
1.89 daniel 1249:
1250: for (i = 0;i < start;i++) {
1251: if (*str == 0) return(NULL);
1252: str++;
1253: }
1254: if (*str == 0) return(NULL);
1255: return(xmlStrndup(str, len));
1.14 veillard 1256: }
1.28 daniel 1257:
1.50 daniel 1258: /**
1259: * xmlStrlen:
1.123 daniel 1260: * @str: the xmlChar * array
1.50 daniel 1261: *
1.127 daniel 1262: * length of a xmlChar's string
1.68 daniel 1263: *
1.123 daniel 1264: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1265: */
1266:
1.55 daniel 1267: int
1.123 daniel 1268: xmlStrlen(const xmlChar *str) {
1.45 daniel 1269: int len = 0;
1270:
1271: if (str == NULL) return(0);
1.222 veillard 1272: while (*str != 0) { /* non input consuming */
1.45 daniel 1273: str++;
1274: len++;
1275: }
1276: return(len);
1277: }
1278:
1.50 daniel 1279: /**
1280: * xmlStrncat:
1.123 daniel 1281: * @cur: the original xmlChar * array
1282: * @add: the xmlChar * array added
1.50 daniel 1283: * @len: the length of @add
1284: *
1.123 daniel 1285: * a strncat for array of xmlChar's
1.68 daniel 1286: *
1.123 daniel 1287: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1288: */
1289:
1.123 daniel 1290: xmlChar *
1291: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1292: int size;
1.123 daniel 1293: xmlChar *ret;
1.45 daniel 1294:
1295: if ((add == NULL) || (len == 0))
1296: return(cur);
1297: if (cur == NULL)
1298: return(xmlStrndup(add, len));
1299:
1300: size = xmlStrlen(cur);
1.204 veillard 1301: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1302: if (ret == NULL) {
1.86 daniel 1303: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1304: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1305: return(cur);
1306: }
1.123 daniel 1307: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1308: ret[size + len] = 0;
1309: return(ret);
1310: }
1311:
1.50 daniel 1312: /**
1313: * xmlStrcat:
1.123 daniel 1314: * @cur: the original xmlChar * array
1315: * @add: the xmlChar * array added
1.50 daniel 1316: *
1.152 daniel 1317: * a strcat for array of xmlChar's. Since they are supposed to be
1318: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1319: * a termination mark of '0'.
1.68 daniel 1320: *
1.123 daniel 1321: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1322: */
1.123 daniel 1323: xmlChar *
1324: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1325: const xmlChar *p = add;
1.45 daniel 1326:
1327: if (add == NULL) return(cur);
1328: if (cur == NULL)
1329: return(xmlStrdup(add));
1330:
1.222 veillard 1331: while (*p != 0) p++; /* non input consuming */
1.45 daniel 1332: return(xmlStrncat(cur, add, p - add));
1333: }
1334:
1335: /************************************************************************
1336: * *
1337: * Commodity functions, cleanup needed ? *
1338: * *
1339: ************************************************************************/
1340:
1.50 daniel 1341: /**
1342: * areBlanks:
1343: * @ctxt: an XML parser context
1.123 daniel 1344: * @str: a xmlChar *
1.50 daniel 1345: * @len: the size of @str
1346: *
1.45 daniel 1347: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1348: *
1.68 daniel 1349: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1350: */
1351:
1.123 daniel 1352: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1353: int i, ret;
1.45 daniel 1354: xmlNodePtr lastChild;
1355:
1.176 daniel 1356: /*
1357: * Check for xml:space value.
1358: */
1359: if (*(ctxt->space) == 1)
1360: return(0);
1361:
1362: /*
1363: * Check that the string is made of blanks
1364: */
1.45 daniel 1365: for (i = 0;i < len;i++)
1366: if (!(IS_BLANK(str[i]))) return(0);
1367:
1.176 daniel 1368: /*
1369: * Look if the element is mixed content in the Dtd if available
1370: */
1.104 daniel 1371: if (ctxt->myDoc != NULL) {
1372: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1373: if (ret == 0) return(1);
1374: if (ret == 1) return(0);
1375: }
1.176 daniel 1376:
1.104 daniel 1377: /*
1.176 daniel 1378: * Otherwise, heuristic :-\
1.104 daniel 1379: */
1.179 daniel 1380: if (ctxt->keepBlanks)
1381: return(0);
1382: if (RAW != '<') return(0);
1383: if (ctxt->node == NULL) return(0);
1384: if ((ctxt->node->children == NULL) &&
1385: (RAW == '<') && (NXT(1) == '/')) return(0);
1386:
1.45 daniel 1387: lastChild = xmlGetLastChild(ctxt->node);
1388: if (lastChild == NULL) {
1389: if (ctxt->node->content != NULL) return(0);
1390: } else if (xmlNodeIsText(lastChild))
1391: return(0);
1.157 daniel 1392: else if ((ctxt->node->children != NULL) &&
1393: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 1394: return(0);
1.45 daniel 1395: return(1);
1396: }
1397:
1398: /*
1399: * Forward definition for recusive behaviour.
1400: */
1.77 daniel 1401: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1402: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1403:
1.28 daniel 1404: /************************************************************************
1405: * *
1406: * Extra stuff for namespace support *
1407: * Relates to http://www.w3.org/TR/WD-xml-names *
1408: * *
1409: ************************************************************************/
1410:
1.50 daniel 1411: /**
1.72 daniel 1412: * xmlSplitQName:
1.162 daniel 1413: * @ctxt: an XML parser context
1.72 daniel 1414: * @name: an XML parser context
1.123 daniel 1415: * @prefix: a xmlChar **
1.72 daniel 1416: *
1.206 veillard 1417: * parse an UTF8 encoded XML qualified name string
1.72 daniel 1418: *
1419: * [NS 5] QName ::= (Prefix ':')? LocalPart
1420: *
1421: * [NS 6] Prefix ::= NCName
1422: *
1423: * [NS 7] LocalPart ::= NCName
1424: *
1.127 daniel 1425: * Returns the local part, and prefix is updated
1.72 daniel 1426: * to get the Prefix if any.
1427: */
1428:
1.123 daniel 1429: xmlChar *
1.162 daniel 1430: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1431: xmlChar buf[XML_MAX_NAMELEN + 5];
1.222 veillard 1432: xmlChar *buffer = NULL;
1.162 daniel 1433: int len = 0;
1.222 veillard 1434: int max = XML_MAX_NAMELEN;
1.123 daniel 1435: xmlChar *ret = NULL;
1436: const xmlChar *cur = name;
1.206 veillard 1437: int c;
1.72 daniel 1438:
1439: *prefix = NULL;
1.113 daniel 1440:
1441: /* xml: prefix is not really a namespace */
1442: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1443: (cur[2] == 'l') && (cur[3] == ':'))
1444: return(xmlStrdup(name));
1445:
1.162 daniel 1446: /* nasty but valid */
1447: if (cur[0] == ':')
1448: return(xmlStrdup(name));
1449:
1.206 veillard 1450: c = *cur++;
1.222 veillard 1451: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1.206 veillard 1452: buf[len++] = c;
1453: c = *cur++;
1.162 daniel 1454: }
1.222 veillard 1455: if (len >= max) {
1456: /*
1457: * Okay someone managed to make a huge name, so he's ready to pay
1458: * for the processing speed.
1459: */
1460: max = len * 2;
1461:
1462: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1463: if (buffer == NULL) {
1464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1465: ctxt->sax->error(ctxt->userData,
1466: "xmlSplitQName: out of memory\n");
1467: return(NULL);
1468: }
1469: memcpy(buffer, buf, len);
1470: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1471: if (len + 10 > max) {
1472: max *= 2;
1473: buffer = (xmlChar *) xmlRealloc(buffer,
1474: max * sizeof(xmlChar));
1475: if (buffer == NULL) {
1476: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1477: ctxt->sax->error(ctxt->userData,
1478: "xmlSplitQName: out of memory\n");
1479: return(NULL);
1480: }
1481: }
1482: buffer[len++] = c;
1483: c = *cur++;
1484: }
1485: buffer[len] = 0;
1486: }
1.72 daniel 1487:
1.222 veillard 1488: if (buffer == NULL)
1489: ret = xmlStrndup(buf, len);
1490: else {
1491: ret = buffer;
1492: buffer = NULL;
1493: max = XML_MAX_NAMELEN;
1494: }
1495:
1.72 daniel 1496:
1.162 daniel 1497: if (c == ':') {
1.206 veillard 1498: c = *cur++;
1499: if (c == 0) return(ret);
1.72 daniel 1500: *prefix = ret;
1.162 daniel 1501: len = 0;
1.72 daniel 1502:
1.222 veillard 1503: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1.206 veillard 1504: buf[len++] = c;
1505: c = *cur++;
1.162 daniel 1506: }
1.222 veillard 1507: if (len >= max) {
1508: /*
1509: * Okay someone managed to make a huge name, so he's ready to pay
1510: * for the processing speed.
1511: */
1.229 veillard 1512: max = len * 2;
1513:
1514: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1515: if (buffer == NULL) {
1.55 daniel 1516: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 1517: ctxt->sax->error(ctxt->userData,
1.229 veillard 1518: "xmlSplitQName: out of memory\n");
1519: return(NULL);
1520: }
1521: memcpy(buffer, buf, len);
1522: while (c != 0) { /* tested bigname2.xml */
1523: if (len + 10 > max) {
1524: max *= 2;
1525: buffer = (xmlChar *) xmlRealloc(buffer,
1526: max * sizeof(xmlChar));
1527: if (buffer == NULL) {
1528: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1529: ctxt->sax->error(ctxt->userData,
1530: "xmlSplitQName: out of memory\n");
1531: return(NULL);
1532: }
1533: }
1534: buffer[len++] = c;
1535: c = *cur++;
1.122 daniel 1536: }
1.229 veillard 1537: buffer[len] = 0;
1538: }
1539:
1540: if (buffer == NULL)
1541: ret = xmlStrndup(buf, len);
1542: else {
1543: ret = buffer;
1544: }
1.45 daniel 1545: }
1546:
1.229 veillard 1547: return(ret);
1.45 daniel 1548: }
1549:
1.28 daniel 1550: /************************************************************************
1551: * *
1552: * The parser itself *
1553: * Relates to http://www.w3.org/TR/REC-xml *
1554: * *
1555: ************************************************************************/
1.14 veillard 1556:
1.50 daniel 1557: /**
1558: * xmlParseName:
1559: * @ctxt: an XML parser context
1560: *
1561: * parse an XML name.
1.22 daniel 1562: *
1563: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1564: * CombiningChar | Extender
1565: *
1566: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1567: *
1568: * [6] Names ::= Name (S Name)*
1.68 daniel 1569: *
1570: * Returns the Name parsed or NULL
1.1 veillard 1571: */
1572:
1.123 daniel 1573: xmlChar *
1.55 daniel 1574: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 1575: xmlChar buf[XML_MAX_NAMELEN + 5];
1576: int len = 0, l;
1577: int c;
1.222 veillard 1578: int count = 0;
1.1 veillard 1579:
1.91 daniel 1580: GROW;
1.160 daniel 1581: c = CUR_CHAR(l);
1.190 daniel 1582: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1583: (!IS_LETTER(c) && (c != '_') &&
1584: (c != ':'))) {
1.91 daniel 1585: return(NULL);
1586: }
1.40 daniel 1587:
1.222 veillard 1588: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1.190 daniel 1589: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1590: (c == '.') || (c == '-') ||
1591: (c == '_') || (c == ':') ||
1592: (IS_COMBINING(c)) ||
1593: (IS_EXTENDER(c)))) {
1.222 veillard 1594: if (count++ > 100) {
1595: count = 0;
1596: GROW;
1597: }
1.160 daniel 1598: COPY_BUF(l,buf,len,c);
1599: NEXTL(l);
1600: c = CUR_CHAR(l);
1.91 daniel 1601: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1602: /*
1603: * Okay someone managed to make a huge name, so he's ready to pay
1604: * for the processing speed.
1605: */
1606: xmlChar *buffer;
1607: int max = len * 2;
1608:
1609: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1610: if (buffer == NULL) {
1611: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1612: ctxt->sax->error(ctxt->userData,
1613: "xmlParseName: out of memory\n");
1614: return(NULL);
1615: }
1616: memcpy(buffer, buf, len);
1617: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1.160 daniel 1618: (c == '.') || (c == '-') ||
1619: (c == '_') || (c == ':') ||
1620: (IS_COMBINING(c)) ||
1621: (IS_EXTENDER(c))) {
1.222 veillard 1622: if (count++ > 100) {
1623: count = 0;
1624: GROW;
1625: }
1626: if (len + 10 > max) {
1627: max *= 2;
1628: buffer = (xmlChar *) xmlRealloc(buffer,
1629: max * sizeof(xmlChar));
1630: if (buffer == NULL) {
1631: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1632: ctxt->sax->error(ctxt->userData,
1633: "xmlParseName: out of memory\n");
1634: return(NULL);
1635: }
1636: }
1637: COPY_BUF(l,buffer,len,c);
1.160 daniel 1638: NEXTL(l);
1639: c = CUR_CHAR(l);
1.97 daniel 1640: }
1.222 veillard 1641: buffer[len] = 0;
1642: return(buffer);
1.91 daniel 1643: }
1644: }
1645: return(xmlStrndup(buf, len));
1.22 daniel 1646: }
1647:
1.50 daniel 1648: /**
1.135 daniel 1649: * xmlParseStringName:
1650: * @ctxt: an XML parser context
1.229 veillard 1651: * @str: a pointer to the string pointer (IN/OUT)
1.135 daniel 1652: *
1653: * parse an XML name.
1654: *
1655: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1656: * CombiningChar | Extender
1657: *
1658: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1659: *
1660: * [6] Names ::= Name (S Name)*
1661: *
1662: * Returns the Name parsed or NULL. The str pointer
1663: * is updated to the current location in the string.
1664: */
1665:
1666: xmlChar *
1667: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 1668: xmlChar buf[XML_MAX_NAMELEN + 5];
1669: const xmlChar *cur = *str;
1670: int len = 0, l;
1671: int c;
1.135 daniel 1672:
1.176 daniel 1673: c = CUR_SCHAR(cur, l);
1674: if (!IS_LETTER(c) && (c != '_') &&
1675: (c != ':')) {
1.135 daniel 1676: return(NULL);
1677: }
1678:
1.222 veillard 1679: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1680: (c == '.') || (c == '-') ||
1681: (c == '_') || (c == ':') ||
1682: (IS_COMBINING(c)) ||
1683: (IS_EXTENDER(c))) {
1684: COPY_BUF(l,buf,len,c);
1685: cur += l;
1686: c = CUR_SCHAR(cur, l);
1.222 veillard 1687: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1688: /*
1689: * Okay someone managed to make a huge name, so he's ready to pay
1690: * for the processing speed.
1691: */
1692: xmlChar *buffer;
1693: int max = len * 2;
1694:
1695: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1696: if (buffer == NULL) {
1697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698: ctxt->sax->error(ctxt->userData,
1699: "xmlParseStringName: out of memory\n");
1700: return(NULL);
1701: }
1702: memcpy(buffer, buf, len);
1703: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1704: (c == '.') || (c == '-') ||
1705: (c == '_') || (c == ':') ||
1706: (IS_COMBINING(c)) ||
1707: (IS_EXTENDER(c))) {
1.222 veillard 1708: if (len + 10 > max) {
1709: max *= 2;
1710: buffer = (xmlChar *) xmlRealloc(buffer,
1711: max * sizeof(xmlChar));
1712: if (buffer == NULL) {
1713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1714: ctxt->sax->error(ctxt->userData,
1715: "xmlParseStringName: out of memory\n");
1716: return(NULL);
1717: }
1718: }
1719: COPY_BUF(l,buffer,len,c);
1.176 daniel 1720: cur += l;
1721: c = CUR_SCHAR(cur, l);
1722: }
1.222 veillard 1723: buffer[len] = 0;
1724: *str = cur;
1725: return(buffer);
1.176 daniel 1726: }
1.135 daniel 1727: }
1.176 daniel 1728: *str = cur;
1729: return(xmlStrndup(buf, len));
1.135 daniel 1730: }
1731:
1732: /**
1.50 daniel 1733: * xmlParseNmtoken:
1734: * @ctxt: an XML parser context
1735: *
1736: * parse an XML Nmtoken.
1.22 daniel 1737: *
1738: * [7] Nmtoken ::= (NameChar)+
1739: *
1740: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1741: *
1742: * Returns the Nmtoken parsed or NULL
1.22 daniel 1743: */
1744:
1.123 daniel 1745: xmlChar *
1.55 daniel 1746: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.222 veillard 1747: xmlChar buf[XML_MAX_NAMELEN + 5];
1748: int len = 0, l;
1749: int c;
1750: int count = 0;
1.22 daniel 1751:
1.91 daniel 1752: GROW;
1.160 daniel 1753: c = CUR_CHAR(l);
1.222 veillard 1754:
1755: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1756: (c == '.') || (c == '-') ||
1757: (c == '_') || (c == ':') ||
1758: (IS_COMBINING(c)) ||
1759: (IS_EXTENDER(c))) {
1.222 veillard 1760: if (count++ > 100) {
1761: count = 0;
1762: GROW;
1763: }
1.160 daniel 1764: COPY_BUF(l,buf,len,c);
1765: NEXTL(l);
1766: c = CUR_CHAR(l);
1.91 daniel 1767: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1768: /*
1769: * Okay someone managed to make a huge token, so he's ready to pay
1770: * for the processing speed.
1771: */
1772: xmlChar *buffer;
1773: int max = len * 2;
1774:
1775: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1776: if (buffer == NULL) {
1777: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1778: ctxt->sax->error(ctxt->userData,
1779: "xmlParseNmtoken: out of memory\n");
1780: return(NULL);
1781: }
1782: memcpy(buffer, buf, len);
1783: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1784: (c == '.') || (c == '-') ||
1785: (c == '_') || (c == ':') ||
1786: (IS_COMBINING(c)) ||
1787: (IS_EXTENDER(c))) {
1.222 veillard 1788: if (count++ > 100) {
1789: count = 0;
1790: GROW;
1791: }
1792: if (len + 10 > max) {
1793: max *= 2;
1794: buffer = (xmlChar *) xmlRealloc(buffer,
1795: max * sizeof(xmlChar));
1796: if (buffer == NULL) {
1797: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798: ctxt->sax->error(ctxt->userData,
1799: "xmlParseName: out of memory\n");
1800: return(NULL);
1801: }
1802: }
1803: COPY_BUF(l,buffer,len,c);
1.160 daniel 1804: NEXTL(l);
1805: c = CUR_CHAR(l);
1806: }
1.222 veillard 1807: buffer[len] = 0;
1808: return(buffer);
1.91 daniel 1809: }
1810: }
1.168 daniel 1811: if (len == 0)
1812: return(NULL);
1.91 daniel 1813: return(xmlStrndup(buf, len));
1.1 veillard 1814: }
1815:
1.50 daniel 1816: /**
1817: * xmlParseEntityValue:
1818: * @ctxt: an XML parser context
1.78 daniel 1819: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1820: *
1.229 veillard 1821: * parse a value for ENTITY declarations
1.24 daniel 1822: *
1823: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1824: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1825: *
1.78 daniel 1826: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1827: */
1828:
1.123 daniel 1829: xmlChar *
1830: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 1831: xmlChar *buf = NULL;
1832: int len = 0;
1.140 daniel 1833: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 1834: int c, l;
1.135 daniel 1835: xmlChar stop;
1.123 daniel 1836: xmlChar *ret = NULL;
1.176 daniel 1837: const xmlChar *cur = NULL;
1.98 daniel 1838: xmlParserInputPtr input;
1.24 daniel 1839:
1.152 daniel 1840: if (RAW == '"') stop = '"';
1841: else if (RAW == '\'') stop = '\'';
1.135 daniel 1842: else {
1843: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1845: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1846: ctxt->wellFormed = 0;
1.180 daniel 1847: ctxt->disableSAX = 1;
1.135 daniel 1848: return(NULL);
1849: }
1850: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1851: if (buf == NULL) {
1852: fprintf(stderr, "malloc of %d byte failed\n", size);
1853: return(NULL);
1854: }
1.94 daniel 1855:
1.135 daniel 1856: /*
1857: * The content of the entity definition is copied in a buffer.
1858: */
1.94 daniel 1859:
1.135 daniel 1860: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1861: input = ctxt->input;
1862: GROW;
1863: NEXT;
1.152 daniel 1864: c = CUR_CHAR(l);
1.135 daniel 1865: /*
1866: * NOTE: 4.4.5 Included in Literal
1867: * When a parameter entity reference appears in a literal entity
1868: * value, ... a single or double quote character in the replacement
1869: * text is always treated as a normal data character and will not
1870: * terminate the literal.
1871: * In practice it means we stop the loop only when back at parsing
1872: * the initial entity and the quote is found
1873: */
1.222 veillard 1874: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1875: (ctxt->input != input))) {
1.152 daniel 1876: if (len + 5 >= size) {
1.135 daniel 1877: size *= 2;
1.204 veillard 1878: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 1879: if (buf == NULL) {
1880: fprintf(stderr, "realloc of %d byte failed\n", size);
1881: return(NULL);
1.94 daniel 1882: }
1.79 daniel 1883: }
1.152 daniel 1884: COPY_BUF(l,buf,len,c);
1885: NEXTL(l);
1.98 daniel 1886: /*
1.135 daniel 1887: * Pop-up of finished entities.
1.98 daniel 1888: */
1.222 veillard 1889: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1.135 daniel 1890: xmlPopInput(ctxt);
1.152 daniel 1891:
1.221 veillard 1892: GROW;
1.152 daniel 1893: c = CUR_CHAR(l);
1.135 daniel 1894: if (c == 0) {
1.94 daniel 1895: GROW;
1.152 daniel 1896: c = CUR_CHAR(l);
1.79 daniel 1897: }
1.135 daniel 1898: }
1899: buf[len] = 0;
1900:
1901: /*
1.176 daniel 1902: * Raise problem w.r.t. '&' and '%' being used in non-entities
1903: * reference constructs. Note Charref will be handled in
1904: * xmlStringDecodeEntities()
1905: */
1906: cur = buf;
1.223 veillard 1907: while (*cur != 0) { /* non input consuming */
1.176 daniel 1908: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1909: xmlChar *name;
1910: xmlChar tmp = *cur;
1911:
1912: cur++;
1913: name = xmlParseStringName(ctxt, &cur);
1914: if ((name == NULL) || (*cur != ';')) {
1.230 veillard 1915: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1.176 daniel 1916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1917: ctxt->sax->error(ctxt->userData,
1918: "EntityValue: '%c' forbidden except for entities references\n",
1919: tmp);
1920: ctxt->wellFormed = 0;
1.180 daniel 1921: ctxt->disableSAX = 1;
1.176 daniel 1922: }
1923: if ((ctxt->inSubset == 1) && (tmp == '%')) {
1.230 veillard 1924: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1.176 daniel 1925: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1926: ctxt->sax->error(ctxt->userData,
1927: "EntityValue: PEReferences forbidden in internal subset\n",
1928: tmp);
1929: ctxt->wellFormed = 0;
1.180 daniel 1930: ctxt->disableSAX = 1;
1.176 daniel 1931: }
1932: if (name != NULL)
1933: xmlFree(name);
1934: }
1935: cur++;
1936: }
1937:
1938: /*
1.135 daniel 1939: * Then PEReference entities are substituted.
1940: */
1941: if (c != stop) {
1942: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 1943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 1944: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1945: ctxt->wellFormed = 0;
1.180 daniel 1946: ctxt->disableSAX = 1;
1.170 daniel 1947: xmlFree(buf);
1.135 daniel 1948: } else {
1949: NEXT;
1950: /*
1951: * NOTE: 4.4.7 Bypassed
1952: * When a general entity reference appears in the EntityValue in
1953: * an entity declaration, it is bypassed and left as is.
1.176 daniel 1954: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 1955: */
1956: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1957: 0, 0, 0);
1958: if (orig != NULL)
1959: *orig = buf;
1960: else
1961: xmlFree(buf);
1.24 daniel 1962: }
1963:
1964: return(ret);
1965: }
1966:
1.50 daniel 1967: /**
1968: * xmlParseAttValue:
1969: * @ctxt: an XML parser context
1970: *
1971: * parse a value for an attribute
1.78 daniel 1972: * Note: the parser won't do substitution of entities here, this
1.113 daniel 1973: * will be handled later in xmlStringGetNodeList
1.29 daniel 1974: *
1975: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1976: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 1977: *
1.129 daniel 1978: * 3.3.3 Attribute-Value Normalization:
1979: * Before the value of an attribute is passed to the application or
1980: * checked for validity, the XML processor must normalize it as follows:
1981: * - a character reference is processed by appending the referenced
1982: * character to the attribute value
1983: * - an entity reference is processed by recursively processing the
1984: * replacement text of the entity
1985: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
1986: * appending #x20 to the normalized value, except that only a single
1987: * #x20 is appended for a "#xD#xA" sequence that is part of an external
1988: * parsed entity or the literal entity value of an internal parsed entity
1989: * - other characters are processed by appending them to the normalized value
1.130 daniel 1990: * If the declared value is not CDATA, then the XML processor must further
1991: * process the normalized attribute value by discarding any leading and
1992: * trailing space (#x20) characters, and by replacing sequences of space
1993: * (#x20) characters by a single space (#x20) character.
1994: * All attributes for which no declaration has been read should be treated
1995: * by a non-validating parser as if declared CDATA.
1.129 daniel 1996: *
1997: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 1998: */
1999:
1.123 daniel 2000: xmlChar *
1.55 daniel 2001: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 2002: xmlChar limit = 0;
1.198 daniel 2003: xmlChar *buf = NULL;
2004: int len = 0;
2005: int buf_size = 0;
2006: int c, l;
1.129 daniel 2007: xmlChar *current = NULL;
2008: xmlEntityPtr ent;
2009:
1.29 daniel 2010:
1.91 daniel 2011: SHRINK;
1.151 daniel 2012: if (NXT(0) == '"') {
1.96 daniel 2013: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 2014: limit = '"';
1.40 daniel 2015: NEXT;
1.151 daniel 2016: } else if (NXT(0) == '\'') {
1.129 daniel 2017: limit = '\'';
1.96 daniel 2018: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2019: NEXT;
1.29 daniel 2020: } else {
1.123 daniel 2021: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2022: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2023: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2024: ctxt->wellFormed = 0;
1.180 daniel 2025: ctxt->disableSAX = 1;
1.129 daniel 2026: return(NULL);
1.29 daniel 2027: }
2028:
1.129 daniel 2029: /*
2030: * allocate a translation buffer.
2031: */
1.198 daniel 2032: buf_size = XML_PARSER_BUFFER_SIZE;
2033: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2034: if (buf == NULL) {
1.129 daniel 2035: perror("xmlParseAttValue: malloc failed");
2036: return(NULL);
2037: }
2038:
2039: /*
2040: * Ok loop until we reach one of the ending char or a size limit.
2041: */
1.198 daniel 2042: c = CUR_CHAR(l);
1.223 veillard 2043: while (((NXT(0) != limit) && /* checked */
2044: (c != '<')) || (ctxt->token != 0)) {
1.198 daniel 2045: if (c == 0) break;
1.205 veillard 2046: if (ctxt->token == '&') {
1.229 veillard 2047: /*
2048: * The reparsing will be done in xmlStringGetNodeList()
2049: * called by the attribute() function in SAX.c
2050: */
1.205 veillard 2051: static xmlChar buffer[6] = "&";
2052:
2053: if (len > buf_size - 10) {
2054: growBuffer(buf);
2055: }
2056: current = &buffer[0];
1.223 veillard 2057: while (*current != 0) { /* non input consuming */
1.205 veillard 2058: buf[len++] = *current++;
2059: }
2060: ctxt->token = 0;
2061: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 2062: int val = xmlParseCharRef(ctxt);
1.229 veillard 2063: if (val == '&') {
2064: /*
2065: * The reparsing will be done in xmlStringGetNodeList()
2066: * called by the attribute() function in SAX.c
2067: */
2068: static xmlChar buffer[6] = "&";
2069:
2070: if (len > buf_size - 10) {
2071: growBuffer(buf);
2072: }
2073: current = &buffer[0];
2074: while (*current != 0) { /* non input consuming */
2075: buf[len++] = *current++;
2076: }
2077: } else {
2078: COPY_BUF(l,buf,len,val);
2079: NEXTL(l);
2080: }
1.198 daniel 2081: } else if (c == '&') {
1.129 daniel 2082: ent = xmlParseEntityRef(ctxt);
2083: if ((ent != NULL) &&
2084: (ctxt->replaceEntities != 0)) {
1.185 daniel 2085: xmlChar *rep;
2086:
1.186 daniel 2087: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2088: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 2089: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 2090: if (rep != NULL) {
2091: current = rep;
1.223 veillard 2092: while (*current != 0) { /* non input consuming */
1.198 daniel 2093: buf[len++] = *current++;
2094: if (len > buf_size - 10) {
2095: growBuffer(buf);
1.186 daniel 2096: }
1.185 daniel 2097: }
1.186 daniel 2098: xmlFree(rep);
1.129 daniel 2099: }
1.186 daniel 2100: } else {
2101: if (ent->content != NULL)
1.198 daniel 2102: buf[len++] = ent->content[0];
1.129 daniel 2103: }
2104: } else if (ent != NULL) {
2105: int i = xmlStrlen(ent->name);
2106: const xmlChar *cur = ent->name;
2107:
1.186 daniel 2108: /*
2109: * This may look absurd but is needed to detect
2110: * entities problems
2111: */
1.211 veillard 2112: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2113: (ent->content != NULL)) {
1.186 daniel 2114: xmlChar *rep;
2115: rep = xmlStringDecodeEntities(ctxt, ent->content,
2116: XML_SUBSTITUTE_REF, 0, 0, 0);
2117: if (rep != NULL)
2118: xmlFree(rep);
2119: }
2120:
2121: /*
2122: * Just output the reference
2123: */
1.198 daniel 2124: buf[len++] = '&';
2125: if (len > buf_size - i - 10) {
2126: growBuffer(buf);
1.129 daniel 2127: }
2128: for (;i > 0;i--)
1.198 daniel 2129: buf[len++] = *cur++;
2130: buf[len++] = ';';
1.129 daniel 2131: }
2132: } else {
1.198 daniel 2133: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2134: COPY_BUF(l,buf,len,0x20);
2135: if (len > buf_size - 10) {
2136: growBuffer(buf);
1.129 daniel 2137: }
2138: } else {
1.198 daniel 2139: COPY_BUF(l,buf,len,c);
2140: if (len > buf_size - 10) {
2141: growBuffer(buf);
1.129 daniel 2142: }
2143: }
1.198 daniel 2144: NEXTL(l);
1.129 daniel 2145: }
1.198 daniel 2146: GROW;
2147: c = CUR_CHAR(l);
1.129 daniel 2148: }
1.198 daniel 2149: buf[len++] = 0;
1.152 daniel 2150: if (RAW == '<') {
1.230 veillard 2151: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.129 daniel 2152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2153: ctxt->sax->error(ctxt->userData,
2154: "Unescaped '<' not allowed in attributes values\n");
2155: ctxt->wellFormed = 0;
1.180 daniel 2156: ctxt->disableSAX = 1;
1.152 daniel 2157: } else if (RAW != limit) {
1.230 veillard 2158: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.129 daniel 2159: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2160: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2161: ctxt->wellFormed = 0;
1.180 daniel 2162: ctxt->disableSAX = 1;
1.129 daniel 2163: } else
2164: NEXT;
1.198 daniel 2165: return(buf);
1.29 daniel 2166: }
2167:
1.50 daniel 2168: /**
2169: * xmlParseSystemLiteral:
2170: * @ctxt: an XML parser context
2171: *
2172: * parse an XML Literal
1.21 daniel 2173: *
1.22 daniel 2174: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2175: *
2176: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2177: */
2178:
1.123 daniel 2179: xmlChar *
1.55 daniel 2180: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2181: xmlChar *buf = NULL;
2182: int len = 0;
1.140 daniel 2183: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2184: int cur, l;
1.135 daniel 2185: xmlChar stop;
1.168 daniel 2186: int state = ctxt->instate;
1.223 veillard 2187: int count = 0;
1.21 daniel 2188:
1.91 daniel 2189: SHRINK;
1.152 daniel 2190: if (RAW == '"') {
1.40 daniel 2191: NEXT;
1.135 daniel 2192: stop = '"';
1.152 daniel 2193: } else if (RAW == '\'') {
1.40 daniel 2194: NEXT;
1.135 daniel 2195: stop = '\'';
1.21 daniel 2196: } else {
1.230 veillard 2197: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2198: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2199: ctxt->sax->error(ctxt->userData,
2200: "SystemLiteral \" or ' expected\n");
1.59 daniel 2201: ctxt->wellFormed = 0;
1.180 daniel 2202: ctxt->disableSAX = 1;
1.135 daniel 2203: return(NULL);
1.21 daniel 2204: }
2205:
1.135 daniel 2206: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2207: if (buf == NULL) {
2208: fprintf(stderr, "malloc of %d byte failed\n", size);
2209: return(NULL);
2210: }
1.168 daniel 2211: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 2212: cur = CUR_CHAR(l);
1.223 veillard 2213: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
1.152 daniel 2214: if (len + 5 >= size) {
1.135 daniel 2215: size *= 2;
1.204 veillard 2216: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2217: if (buf == NULL) {
2218: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 2219: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2220: return(NULL);
2221: }
2222: }
1.223 veillard 2223: count++;
2224: if (count > 50) {
2225: GROW;
2226: count = 0;
2227: }
1.152 daniel 2228: COPY_BUF(l,buf,len,cur);
2229: NEXTL(l);
2230: cur = CUR_CHAR(l);
1.135 daniel 2231: if (cur == 0) {
2232: GROW;
2233: SHRINK;
1.152 daniel 2234: cur = CUR_CHAR(l);
1.135 daniel 2235: }
2236: }
2237: buf[len] = 0;
1.204 veillard 2238: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2239: if (!IS_CHAR(cur)) {
1.230 veillard 2240: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2241: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2242: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2243: ctxt->wellFormed = 0;
1.180 daniel 2244: ctxt->disableSAX = 1;
1.135 daniel 2245: } else {
2246: NEXT;
2247: }
2248: return(buf);
1.21 daniel 2249: }
2250:
1.50 daniel 2251: /**
2252: * xmlParsePubidLiteral:
2253: * @ctxt: an XML parser context
1.21 daniel 2254: *
1.50 daniel 2255: * parse an XML public literal
1.68 daniel 2256: *
2257: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2258: *
2259: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2260: */
2261:
1.123 daniel 2262: xmlChar *
1.55 daniel 2263: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2264: xmlChar *buf = NULL;
2265: int len = 0;
1.140 daniel 2266: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 2267: xmlChar cur;
2268: xmlChar stop;
1.223 veillard 2269: int count = 0;
1.125 daniel 2270:
1.91 daniel 2271: SHRINK;
1.152 daniel 2272: if (RAW == '"') {
1.40 daniel 2273: NEXT;
1.135 daniel 2274: stop = '"';
1.152 daniel 2275: } else if (RAW == '\'') {
1.40 daniel 2276: NEXT;
1.135 daniel 2277: stop = '\'';
1.21 daniel 2278: } else {
1.230 veillard 2279: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2280: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2281: ctxt->sax->error(ctxt->userData,
2282: "SystemLiteral \" or ' expected\n");
1.59 daniel 2283: ctxt->wellFormed = 0;
1.180 daniel 2284: ctxt->disableSAX = 1;
1.135 daniel 2285: return(NULL);
2286: }
2287: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2288: if (buf == NULL) {
2289: fprintf(stderr, "malloc of %d byte failed\n", size);
2290: return(NULL);
2291: }
2292: cur = CUR;
1.223 veillard 2293: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
1.135 daniel 2294: if (len + 1 >= size) {
2295: size *= 2;
1.204 veillard 2296: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2297: if (buf == NULL) {
2298: fprintf(stderr, "realloc of %d byte failed\n", size);
2299: return(NULL);
2300: }
2301: }
2302: buf[len++] = cur;
1.223 veillard 2303: count++;
2304: if (count > 50) {
2305: GROW;
2306: count = 0;
2307: }
1.135 daniel 2308: NEXT;
2309: cur = CUR;
2310: if (cur == 0) {
2311: GROW;
2312: SHRINK;
2313: cur = CUR;
2314: }
2315: }
2316: buf[len] = 0;
2317: if (cur != stop) {
1.230 veillard 2318: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2319: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2320: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2321: ctxt->wellFormed = 0;
1.180 daniel 2322: ctxt->disableSAX = 1;
1.135 daniel 2323: } else {
2324: NEXT;
1.21 daniel 2325: }
1.135 daniel 2326: return(buf);
1.21 daniel 2327: }
2328:
1.50 daniel 2329: /**
2330: * xmlParseCharData:
2331: * @ctxt: an XML parser context
2332: * @cdata: int indicating whether we are within a CDATA section
2333: *
2334: * parse a CharData section.
2335: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2336: *
1.151 daniel 2337: * The right angle bracket (>) may be represented using the string ">",
2338: * and must, for compatibility, be escaped using ">" or a character
2339: * reference when it appears in the string "]]>" in content, when that
2340: * string is not marking the end of a CDATA section.
2341: *
1.27 daniel 2342: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2343: */
2344:
1.55 daniel 2345: void
2346: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 2347: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 2348: int nbchar = 0;
1.152 daniel 2349: int cur, l;
1.223 veillard 2350: int count = 0;
1.27 daniel 2351:
1.91 daniel 2352: SHRINK;
1.223 veillard 2353: GROW;
1.152 daniel 2354: cur = CUR_CHAR(l);
1.223 veillard 2355: while (((cur != '<') || (ctxt->token == '<')) && /* checked */
1.190 daniel 2356: ((cur != '&') || (ctxt->token == '&')) &&
1.229 veillard 2357: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
1.97 daniel 2358: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2359: (NXT(2) == '>')) {
2360: if (cdata) break;
2361: else {
1.230 veillard 2362: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 2364: ctxt->sax->error(ctxt->userData,
1.59 daniel 2365: "Sequence ']]>' not allowed in content\n");
1.151 daniel 2366: /* Should this be relaxed ??? I see a "must here */
2367: ctxt->wellFormed = 0;
1.180 daniel 2368: ctxt->disableSAX = 1;
1.59 daniel 2369: }
2370: }
1.152 daniel 2371: COPY_BUF(l,buf,nbchar,cur);
2372: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 2373: /*
2374: * Ok the segment is to be consumed as chars.
2375: */
1.171 daniel 2376: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2377: if (areBlanks(ctxt, buf, nbchar)) {
2378: if (ctxt->sax->ignorableWhitespace != NULL)
2379: ctxt->sax->ignorableWhitespace(ctxt->userData,
2380: buf, nbchar);
2381: } else {
2382: if (ctxt->sax->characters != NULL)
2383: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2384: }
2385: }
2386: nbchar = 0;
2387: }
1.223 veillard 2388: count++;
2389: if (count > 50) {
2390: GROW;
2391: count = 0;
2392: }
1.152 daniel 2393: NEXTL(l);
2394: cur = CUR_CHAR(l);
1.27 daniel 2395: }
1.91 daniel 2396: if (nbchar != 0) {
2397: /*
2398: * Ok the segment is to be consumed as chars.
2399: */
1.171 daniel 2400: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2401: if (areBlanks(ctxt, buf, nbchar)) {
2402: if (ctxt->sax->ignorableWhitespace != NULL)
2403: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2404: } else {
2405: if (ctxt->sax->characters != NULL)
2406: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2407: }
2408: }
1.45 daniel 2409: }
1.27 daniel 2410: }
2411:
1.50 daniel 2412: /**
2413: * xmlParseExternalID:
2414: * @ctxt: an XML parser context
1.123 daniel 2415: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2416: * @strict: indicate whether we should restrict parsing to only
2417: * production [75], see NOTE below
1.50 daniel 2418: *
1.67 daniel 2419: * Parse an External ID or a Public ID
2420: *
2421: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2422: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2423: *
2424: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2425: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2426: *
2427: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2428: *
1.68 daniel 2429: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2430: * case publicID receives PubidLiteral, is strict is off
2431: * it is possible to return NULL and have publicID set.
1.22 daniel 2432: */
2433:
1.123 daniel 2434: xmlChar *
2435: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2436: xmlChar *URI = NULL;
1.22 daniel 2437:
1.91 daniel 2438: SHRINK;
1.152 daniel 2439: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 2440: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2441: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2442: SKIP(6);
1.59 daniel 2443: if (!IS_BLANK(CUR)) {
1.230 veillard 2444: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2446: ctxt->sax->error(ctxt->userData,
1.59 daniel 2447: "Space required after 'SYSTEM'\n");
2448: ctxt->wellFormed = 0;
1.180 daniel 2449: ctxt->disableSAX = 1;
1.59 daniel 2450: }
1.42 daniel 2451: SKIP_BLANKS;
1.39 daniel 2452: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2453: if (URI == NULL) {
1.230 veillard 2454: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2456: ctxt->sax->error(ctxt->userData,
1.39 daniel 2457: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2458: ctxt->wellFormed = 0;
1.180 daniel 2459: ctxt->disableSAX = 1;
1.59 daniel 2460: }
1.152 daniel 2461: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 2462: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2463: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2464: SKIP(6);
1.59 daniel 2465: if (!IS_BLANK(CUR)) {
1.230 veillard 2466: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2467: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2468: ctxt->sax->error(ctxt->userData,
1.59 daniel 2469: "Space required after 'PUBLIC'\n");
2470: ctxt->wellFormed = 0;
1.180 daniel 2471: ctxt->disableSAX = 1;
1.59 daniel 2472: }
1.42 daniel 2473: SKIP_BLANKS;
1.39 daniel 2474: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2475: if (*publicID == NULL) {
1.230 veillard 2476: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.55 daniel 2477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2478: ctxt->sax->error(ctxt->userData,
1.39 daniel 2479: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2480: ctxt->wellFormed = 0;
1.180 daniel 2481: ctxt->disableSAX = 1;
1.59 daniel 2482: }
1.67 daniel 2483: if (strict) {
2484: /*
2485: * We don't handle [83] so "S SystemLiteral" is required.
2486: */
2487: if (!IS_BLANK(CUR)) {
1.230 veillard 2488: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2489: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2490: ctxt->sax->error(ctxt->userData,
1.67 daniel 2491: "Space required after the Public Identifier\n");
2492: ctxt->wellFormed = 0;
1.180 daniel 2493: ctxt->disableSAX = 1;
1.67 daniel 2494: }
2495: } else {
2496: /*
2497: * We handle [83] so we return immediately, if
2498: * "S SystemLiteral" is not detected. From a purely parsing
2499: * point of view that's a nice mess.
2500: */
1.135 daniel 2501: const xmlChar *ptr;
2502: GROW;
2503:
2504: ptr = CUR_PTR;
1.67 daniel 2505: if (!IS_BLANK(*ptr)) return(NULL);
2506:
1.223 veillard 2507: while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
1.173 daniel 2508: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 2509: }
1.42 daniel 2510: SKIP_BLANKS;
1.39 daniel 2511: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2512: if (URI == NULL) {
1.230 veillard 2513: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2514: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2515: ctxt->sax->error(ctxt->userData,
1.39 daniel 2516: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2517: ctxt->wellFormed = 0;
1.180 daniel 2518: ctxt->disableSAX = 1;
1.59 daniel 2519: }
1.22 daniel 2520: }
1.39 daniel 2521: return(URI);
1.22 daniel 2522: }
2523:
1.50 daniel 2524: /**
2525: * xmlParseComment:
1.69 daniel 2526: * @ctxt: an XML parser context
1.50 daniel 2527: *
1.3 veillard 2528: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2529: * The spec says that "For compatibility, the string "--" (double-hyphen)
2530: * must not occur within comments. "
1.22 daniel 2531: *
2532: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2533: */
1.72 daniel 2534: void
1.114 daniel 2535: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 2536: xmlChar *buf = NULL;
1.195 daniel 2537: int len;
1.140 daniel 2538: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2539: int q, ql;
2540: int r, rl;
2541: int cur, l;
1.140 daniel 2542: xmlParserInputState state;
1.187 daniel 2543: xmlParserInputPtr input = ctxt->input;
1.223 veillard 2544: int count = 0;
1.3 veillard 2545:
2546: /*
1.22 daniel 2547: * Check that there is a comment right here.
1.3 veillard 2548: */
1.152 daniel 2549: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 2550: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2551:
1.140 daniel 2552: state = ctxt->instate;
1.97 daniel 2553: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2554: SHRINK;
1.40 daniel 2555: SKIP(4);
1.135 daniel 2556: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2557: if (buf == NULL) {
2558: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2559: ctxt->instate = state;
1.135 daniel 2560: return;
2561: }
1.152 daniel 2562: q = CUR_CHAR(ql);
2563: NEXTL(ql);
2564: r = CUR_CHAR(rl);
2565: NEXTL(rl);
2566: cur = CUR_CHAR(l);
1.195 daniel 2567: len = 0;
1.223 veillard 2568: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2569: ((cur != '>') ||
2570: (r != '-') || (q != '-'))) {
1.195 daniel 2571: if ((r == '-') && (q == '-') && (len > 1)) {
1.230 veillard 2572: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.55 daniel 2573: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2574: ctxt->sax->error(ctxt->userData,
1.38 daniel 2575: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2576: ctxt->wellFormed = 0;
1.180 daniel 2577: ctxt->disableSAX = 1;
1.59 daniel 2578: }
1.152 daniel 2579: if (len + 5 >= size) {
1.135 daniel 2580: size *= 2;
1.204 veillard 2581: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2582: if (buf == NULL) {
2583: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2584: ctxt->instate = state;
1.135 daniel 2585: return;
2586: }
2587: }
1.152 daniel 2588: COPY_BUF(ql,buf,len,q);
1.135 daniel 2589: q = r;
1.152 daniel 2590: ql = rl;
1.135 daniel 2591: r = cur;
1.152 daniel 2592: rl = l;
1.223 veillard 2593:
2594: count++;
2595: if (count > 50) {
2596: GROW;
2597: count = 0;
2598: }
1.152 daniel 2599: NEXTL(l);
2600: cur = CUR_CHAR(l);
1.135 daniel 2601: if (cur == 0) {
2602: SHRINK;
2603: GROW;
1.152 daniel 2604: cur = CUR_CHAR(l);
1.135 daniel 2605: }
1.3 veillard 2606: }
1.135 daniel 2607: buf[len] = 0;
2608: if (!IS_CHAR(cur)) {
1.230 veillard 2609: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.55 daniel 2610: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2611: ctxt->sax->error(ctxt->userData,
1.135 daniel 2612: "Comment not terminated \n<!--%.50s\n", buf);
1.59 daniel 2613: ctxt->wellFormed = 0;
1.180 daniel 2614: ctxt->disableSAX = 1;
1.178 daniel 2615: xmlFree(buf);
1.3 veillard 2616: } else {
1.187 daniel 2617: if (input != ctxt->input) {
1.230 veillard 2618: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2619: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2620: ctxt->sax->error(ctxt->userData,
2621: "Comment doesn't start and stop in the same entity\n");
2622: ctxt->wellFormed = 0;
2623: ctxt->disableSAX = 1;
2624: }
1.40 daniel 2625: NEXT;
1.171 daniel 2626: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2627: (!ctxt->disableSAX))
1.135 daniel 2628: ctxt->sax->comment(ctxt->userData, buf);
2629: xmlFree(buf);
1.3 veillard 2630: }
1.140 daniel 2631: ctxt->instate = state;
1.3 veillard 2632: }
2633:
1.50 daniel 2634: /**
2635: * xmlParsePITarget:
2636: * @ctxt: an XML parser context
2637: *
2638: * parse the name of a PI
1.22 daniel 2639: *
2640: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2641: *
2642: * Returns the PITarget name or NULL
1.22 daniel 2643: */
2644:
1.123 daniel 2645: xmlChar *
1.55 daniel 2646: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2647: xmlChar *name;
1.22 daniel 2648:
2649: name = xmlParseName(ctxt);
1.139 daniel 2650: if ((name != NULL) &&
1.22 daniel 2651: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2652: ((name[1] == 'm') || (name[1] == 'M')) &&
2653: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 2654: int i;
1.177 daniel 2655: if ((name[0] == 'x') && (name[1] == 'm') &&
2656: (name[2] == 'l') && (name[3] == 0)) {
1.230 veillard 2657: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2659: ctxt->sax->error(ctxt->userData,
2660: "XML declaration allowed only at the start of the document\n");
2661: ctxt->wellFormed = 0;
1.180 daniel 2662: ctxt->disableSAX = 1;
1.151 daniel 2663: return(name);
2664: } else if (name[3] == 0) {
1.230 veillard 2665: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2668: ctxt->wellFormed = 0;
1.180 daniel 2669: ctxt->disableSAX = 1;
1.151 daniel 2670: return(name);
2671: }
1.139 daniel 2672: for (i = 0;;i++) {
2673: if (xmlW3CPIs[i] == NULL) break;
1.236 veillard 2674: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
1.139 daniel 2675: return(name);
2676: }
2677: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
1.230 veillard 2678: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.139 daniel 2679: ctxt->sax->warning(ctxt->userData,
1.122 daniel 2680: "xmlParsePItarget: invalid name prefix 'xml'\n");
2681: }
1.22 daniel 2682: }
2683: return(name);
2684: }
2685:
1.50 daniel 2686: /**
2687: * xmlParsePI:
2688: * @ctxt: an XML parser context
2689: *
2690: * parse an XML Processing Instruction.
1.22 daniel 2691: *
2692: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2693: *
1.69 daniel 2694: * The processing is transfered to SAX once parsed.
1.3 veillard 2695: */
2696:
1.55 daniel 2697: void
2698: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 2699: xmlChar *buf = NULL;
2700: int len = 0;
1.140 daniel 2701: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2702: int cur, l;
1.123 daniel 2703: xmlChar *target;
1.140 daniel 2704: xmlParserInputState state;
1.223 veillard 2705: int count = 0;
1.22 daniel 2706:
1.152 daniel 2707: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 2708: xmlParserInputPtr input = ctxt->input;
1.140 daniel 2709: state = ctxt->instate;
2710: ctxt->instate = XML_PARSER_PI;
1.3 veillard 2711: /*
2712: * this is a Processing Instruction.
2713: */
1.40 daniel 2714: SKIP(2);
1.91 daniel 2715: SHRINK;
1.3 veillard 2716:
2717: /*
1.22 daniel 2718: * Parse the target name and check for special support like
2719: * namespace.
1.3 veillard 2720: */
1.22 daniel 2721: target = xmlParsePITarget(ctxt);
2722: if (target != NULL) {
1.156 daniel 2723: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 2724: if (input != ctxt->input) {
1.230 veillard 2725: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2726: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2727: ctxt->sax->error(ctxt->userData,
2728: "PI declaration doesn't start and stop in the same entity\n");
2729: ctxt->wellFormed = 0;
2730: ctxt->disableSAX = 1;
2731: }
1.156 daniel 2732: SKIP(2);
2733:
2734: /*
2735: * SAX: PI detected.
2736: */
1.171 daniel 2737: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 2738: (ctxt->sax->processingInstruction != NULL))
2739: ctxt->sax->processingInstruction(ctxt->userData,
2740: target, NULL);
2741: ctxt->instate = state;
1.170 daniel 2742: xmlFree(target);
1.156 daniel 2743: return;
2744: }
1.135 daniel 2745: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2746: if (buf == NULL) {
2747: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2748: ctxt->instate = state;
1.135 daniel 2749: return;
2750: }
2751: cur = CUR;
2752: if (!IS_BLANK(cur)) {
1.230 veillard 2753: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2755: ctxt->sax->error(ctxt->userData,
2756: "xmlParsePI: PI %s space expected\n", target);
2757: ctxt->wellFormed = 0;
1.180 daniel 2758: ctxt->disableSAX = 1;
1.114 daniel 2759: }
2760: SKIP_BLANKS;
1.152 daniel 2761: cur = CUR_CHAR(l);
1.223 veillard 2762: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2763: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 2764: if (len + 5 >= size) {
1.135 daniel 2765: size *= 2;
1.204 veillard 2766: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2767: if (buf == NULL) {
2768: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2769: ctxt->instate = state;
1.135 daniel 2770: return;
2771: }
1.223 veillard 2772: }
2773: count++;
2774: if (count > 50) {
2775: GROW;
2776: count = 0;
1.135 daniel 2777: }
1.152 daniel 2778: COPY_BUF(l,buf,len,cur);
2779: NEXTL(l);
2780: cur = CUR_CHAR(l);
1.135 daniel 2781: if (cur == 0) {
2782: SHRINK;
2783: GROW;
1.152 daniel 2784: cur = CUR_CHAR(l);
1.135 daniel 2785: }
2786: }
2787: buf[len] = 0;
1.152 daniel 2788: if (cur != '?') {
1.230 veillard 2789: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2790: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2791: ctxt->sax->error(ctxt->userData,
1.72 daniel 2792: "xmlParsePI: PI %s never end ...\n", target);
2793: ctxt->wellFormed = 0;
1.180 daniel 2794: ctxt->disableSAX = 1;
1.22 daniel 2795: } else {
1.187 daniel 2796: if (input != ctxt->input) {
1.230 veillard 2797: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2799: ctxt->sax->error(ctxt->userData,
2800: "PI declaration doesn't start and stop in the same entity\n");
2801: ctxt->wellFormed = 0;
2802: ctxt->disableSAX = 1;
2803: }
1.72 daniel 2804: SKIP(2);
1.44 daniel 2805:
1.72 daniel 2806: /*
2807: * SAX: PI detected.
2808: */
1.171 daniel 2809: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 2810: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2811: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 2812: target, buf);
1.22 daniel 2813: }
1.135 daniel 2814: xmlFree(buf);
1.119 daniel 2815: xmlFree(target);
1.3 veillard 2816: } else {
1.230 veillard 2817: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.55 daniel 2818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2819: ctxt->sax->error(ctxt->userData,
2820: "xmlParsePI : no target name\n");
1.59 daniel 2821: ctxt->wellFormed = 0;
1.180 daniel 2822: ctxt->disableSAX = 1;
1.22 daniel 2823: }
1.140 daniel 2824: ctxt->instate = state;
1.22 daniel 2825: }
2826: }
2827:
1.50 daniel 2828: /**
2829: * xmlParseNotationDecl:
2830: * @ctxt: an XML parser context
2831: *
2832: * parse a notation declaration
1.22 daniel 2833: *
2834: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2835: *
2836: * Hence there is actually 3 choices:
2837: * 'PUBLIC' S PubidLiteral
2838: * 'PUBLIC' S PubidLiteral S SystemLiteral
2839: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2840: *
1.67 daniel 2841: * See the NOTE on xmlParseExternalID().
1.22 daniel 2842: */
2843:
1.55 daniel 2844: void
2845: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2846: xmlChar *name;
2847: xmlChar *Pubid;
2848: xmlChar *Systemid;
1.22 daniel 2849:
1.152 daniel 2850: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2851: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2852: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2853: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2854: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 2855: xmlParserInputPtr input = ctxt->input;
1.91 daniel 2856: SHRINK;
1.40 daniel 2857: SKIP(10);
1.67 daniel 2858: if (!IS_BLANK(CUR)) {
1.230 veillard 2859: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2861: ctxt->sax->error(ctxt->userData,
2862: "Space required after '<!NOTATION'\n");
1.67 daniel 2863: ctxt->wellFormed = 0;
1.180 daniel 2864: ctxt->disableSAX = 1;
1.67 daniel 2865: return;
2866: }
2867: SKIP_BLANKS;
1.22 daniel 2868:
2869: name = xmlParseName(ctxt);
2870: if (name == NULL) {
1.230 veillard 2871: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.55 daniel 2872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2873: ctxt->sax->error(ctxt->userData,
2874: "NOTATION: Name expected here\n");
1.67 daniel 2875: ctxt->wellFormed = 0;
1.180 daniel 2876: ctxt->disableSAX = 1;
1.67 daniel 2877: return;
2878: }
2879: if (!IS_BLANK(CUR)) {
1.230 veillard 2880: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2882: ctxt->sax->error(ctxt->userData,
1.67 daniel 2883: "Space required after the NOTATION name'\n");
1.59 daniel 2884: ctxt->wellFormed = 0;
1.180 daniel 2885: ctxt->disableSAX = 1;
1.22 daniel 2886: return;
2887: }
1.42 daniel 2888: SKIP_BLANKS;
1.67 daniel 2889:
1.22 daniel 2890: /*
1.67 daniel 2891: * Parse the IDs.
1.22 daniel 2892: */
1.160 daniel 2893: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 2894: SKIP_BLANKS;
2895:
1.152 daniel 2896: if (RAW == '>') {
1.187 daniel 2897: if (input != ctxt->input) {
1.230 veillard 2898: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2900: ctxt->sax->error(ctxt->userData,
2901: "Notation declaration doesn't start and stop in the same entity\n");
2902: ctxt->wellFormed = 0;
2903: ctxt->disableSAX = 1;
2904: }
1.40 daniel 2905: NEXT;
1.171 daniel 2906: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2907: (ctxt->sax->notationDecl != NULL))
1.74 daniel 2908: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2909: } else {
1.230 veillard 2910: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 2911: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2912: ctxt->sax->error(ctxt->userData,
1.67 daniel 2913: "'>' required to close NOTATION declaration\n");
2914: ctxt->wellFormed = 0;
1.180 daniel 2915: ctxt->disableSAX = 1;
1.67 daniel 2916: }
1.119 daniel 2917: xmlFree(name);
2918: if (Systemid != NULL) xmlFree(Systemid);
2919: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 2920: }
2921: }
2922:
1.50 daniel 2923: /**
2924: * xmlParseEntityDecl:
2925: * @ctxt: an XML parser context
2926: *
2927: * parse <!ENTITY declarations
1.22 daniel 2928: *
2929: * [70] EntityDecl ::= GEDecl | PEDecl
2930: *
2931: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2932: *
2933: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2934: *
2935: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2936: *
2937: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2938: *
2939: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2940: *
2941: * [ VC: Notation Declared ]
1.116 daniel 2942: * The Name must match the declared name of a notation.
1.22 daniel 2943: */
2944:
1.55 daniel 2945: void
2946: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2947: xmlChar *name = NULL;
2948: xmlChar *value = NULL;
2949: xmlChar *URI = NULL, *literal = NULL;
2950: xmlChar *ndata = NULL;
1.39 daniel 2951: int isParameter = 0;
1.123 daniel 2952: xmlChar *orig = NULL;
1.22 daniel 2953:
1.94 daniel 2954: GROW;
1.152 daniel 2955: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2956: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2957: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2958: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 2959: xmlParserInputPtr input = ctxt->input;
1.96 daniel 2960: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2961: SHRINK;
1.40 daniel 2962: SKIP(8);
1.59 daniel 2963: if (!IS_BLANK(CUR)) {
1.230 veillard 2964: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2965: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2966: ctxt->sax->error(ctxt->userData,
2967: "Space required after '<!ENTITY'\n");
1.59 daniel 2968: ctxt->wellFormed = 0;
1.180 daniel 2969: ctxt->disableSAX = 1;
1.59 daniel 2970: }
2971: SKIP_BLANKS;
1.40 daniel 2972:
1.152 daniel 2973: if (RAW == '%') {
1.40 daniel 2974: NEXT;
1.59 daniel 2975: if (!IS_BLANK(CUR)) {
1.230 veillard 2976: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2978: ctxt->sax->error(ctxt->userData,
2979: "Space required after '%'\n");
1.59 daniel 2980: ctxt->wellFormed = 0;
1.180 daniel 2981: ctxt->disableSAX = 1;
1.59 daniel 2982: }
1.42 daniel 2983: SKIP_BLANKS;
1.39 daniel 2984: isParameter = 1;
1.22 daniel 2985: }
2986:
2987: name = xmlParseName(ctxt);
1.24 daniel 2988: if (name == NULL) {
1.230 veillard 2989: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 2990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2991: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 2992: ctxt->wellFormed = 0;
1.180 daniel 2993: ctxt->disableSAX = 1;
1.24 daniel 2994: return;
2995: }
1.59 daniel 2996: if (!IS_BLANK(CUR)) {
1.230 veillard 2997: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2999: ctxt->sax->error(ctxt->userData,
1.59 daniel 3000: "Space required after the entity name\n");
3001: ctxt->wellFormed = 0;
1.180 daniel 3002: ctxt->disableSAX = 1;
1.59 daniel 3003: }
1.42 daniel 3004: SKIP_BLANKS;
1.24 daniel 3005:
1.22 daniel 3006: /*
1.68 daniel 3007: * handle the various case of definitions...
1.22 daniel 3008: */
1.39 daniel 3009: if (isParameter) {
1.225 veillard 3010: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3011: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3012: if (value) {
1.171 daniel 3013: if ((ctxt->sax != NULL) &&
3014: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3015: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3016: XML_INTERNAL_PARAMETER_ENTITY,
3017: NULL, NULL, value);
3018: }
1.225 veillard 3019: } else {
1.67 daniel 3020: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3021: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3022: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3024: ctxt->sax->error(ctxt->userData,
3025: "Entity value required\n");
3026: ctxt->wellFormed = 0;
1.180 daniel 3027: ctxt->disableSAX = 1;
1.169 daniel 3028: }
1.39 daniel 3029: if (URI) {
1.193 daniel 3030: xmlURIPtr uri;
3031:
3032: uri = xmlParseURI((const char *) URI);
3033: if (uri == NULL) {
1.230 veillard 3034: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3035: if ((ctxt->sax != NULL) &&
3036: (!ctxt->disableSAX) &&
3037: (ctxt->sax->error != NULL))
3038: ctxt->sax->error(ctxt->userData,
3039: "Invalid URI: %s\n", URI);
3040: ctxt->wellFormed = 0;
3041: } else {
3042: if (uri->fragment != NULL) {
1.230 veillard 3043: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3044: if ((ctxt->sax != NULL) &&
3045: (!ctxt->disableSAX) &&
3046: (ctxt->sax->error != NULL))
3047: ctxt->sax->error(ctxt->userData,
3048: "Fragment not allowed: %s\n", URI);
3049: ctxt->wellFormed = 0;
3050: } else {
3051: if ((ctxt->sax != NULL) &&
3052: (!ctxt->disableSAX) &&
3053: (ctxt->sax->entityDecl != NULL))
3054: ctxt->sax->entityDecl(ctxt->userData, name,
3055: XML_EXTERNAL_PARAMETER_ENTITY,
3056: literal, URI, NULL);
3057: }
3058: xmlFreeURI(uri);
3059: }
1.39 daniel 3060: }
1.24 daniel 3061: }
3062: } else {
1.152 daniel 3063: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3064: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 3065: if ((ctxt->sax != NULL) &&
3066: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3067: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3068: XML_INTERNAL_GENERAL_ENTITY,
3069: NULL, NULL, value);
3070: } else {
1.67 daniel 3071: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3072: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3073: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3075: ctxt->sax->error(ctxt->userData,
3076: "Entity value required\n");
3077: ctxt->wellFormed = 0;
1.180 daniel 3078: ctxt->disableSAX = 1;
1.169 daniel 3079: }
1.193 daniel 3080: if (URI) {
3081: xmlURIPtr uri;
3082:
3083: uri = xmlParseURI((const char *)URI);
3084: if (uri == NULL) {
1.230 veillard 3085: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3086: if ((ctxt->sax != NULL) &&
3087: (!ctxt->disableSAX) &&
3088: (ctxt->sax->error != NULL))
3089: ctxt->sax->error(ctxt->userData,
3090: "Invalid URI: %s\n", URI);
3091: ctxt->wellFormed = 0;
3092: } else {
3093: if (uri->fragment != NULL) {
1.230 veillard 3094: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3095: if ((ctxt->sax != NULL) &&
3096: (!ctxt->disableSAX) &&
3097: (ctxt->sax->error != NULL))
3098: ctxt->sax->error(ctxt->userData,
3099: "Fragment not allowed: %s\n", URI);
3100: ctxt->wellFormed = 0;
3101: }
3102: xmlFreeURI(uri);
3103: }
3104: }
1.152 daniel 3105: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.230 veillard 3106: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3107: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3108: ctxt->sax->error(ctxt->userData,
1.59 daniel 3109: "Space required before 'NDATA'\n");
3110: ctxt->wellFormed = 0;
1.180 daniel 3111: ctxt->disableSAX = 1;
1.59 daniel 3112: }
1.42 daniel 3113: SKIP_BLANKS;
1.152 daniel 3114: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 3115: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3116: (NXT(4) == 'A')) {
3117: SKIP(5);
1.59 daniel 3118: if (!IS_BLANK(CUR)) {
1.230 veillard 3119: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3120: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3121: ctxt->sax->error(ctxt->userData,
1.59 daniel 3122: "Space required after 'NDATA'\n");
3123: ctxt->wellFormed = 0;
1.180 daniel 3124: ctxt->disableSAX = 1;
1.59 daniel 3125: }
1.42 daniel 3126: SKIP_BLANKS;
1.24 daniel 3127: ndata = xmlParseName(ctxt);
1.171 daniel 3128: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 3129: (ctxt->sax->unparsedEntityDecl != NULL))
3130: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3131: literal, URI, ndata);
3132: } else {
1.171 daniel 3133: if ((ctxt->sax != NULL) &&
3134: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3135: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3136: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3137: literal, URI, NULL);
1.24 daniel 3138: }
3139: }
3140: }
1.42 daniel 3141: SKIP_BLANKS;
1.152 daniel 3142: if (RAW != '>') {
1.230 veillard 3143: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3144: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3145: ctxt->sax->error(ctxt->userData,
1.31 daniel 3146: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3147: ctxt->wellFormed = 0;
1.180 daniel 3148: ctxt->disableSAX = 1;
1.187 daniel 3149: } else {
3150: if (input != ctxt->input) {
1.230 veillard 3151: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3153: ctxt->sax->error(ctxt->userData,
3154: "Entity declaration doesn't start and stop in the same entity\n");
3155: ctxt->wellFormed = 0;
3156: ctxt->disableSAX = 1;
3157: }
1.40 daniel 3158: NEXT;
1.187 daniel 3159: }
1.78 daniel 3160: if (orig != NULL) {
3161: /*
1.98 daniel 3162: * Ugly mechanism to save the raw entity value.
1.78 daniel 3163: */
3164: xmlEntityPtr cur = NULL;
3165:
1.98 daniel 3166: if (isParameter) {
3167: if ((ctxt->sax != NULL) &&
3168: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3169: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3170: } else {
3171: if ((ctxt->sax != NULL) &&
3172: (ctxt->sax->getEntity != NULL))
1.120 daniel 3173: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3174: }
3175: if (cur != NULL) {
3176: if (cur->orig != NULL)
1.119 daniel 3177: xmlFree(orig);
1.98 daniel 3178: else
3179: cur->orig = orig;
3180: } else
1.119 daniel 3181: xmlFree(orig);
1.78 daniel 3182: }
1.119 daniel 3183: if (name != NULL) xmlFree(name);
3184: if (value != NULL) xmlFree(value);
3185: if (URI != NULL) xmlFree(URI);
3186: if (literal != NULL) xmlFree(literal);
3187: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3188: }
3189: }
3190:
1.50 daniel 3191: /**
1.59 daniel 3192: * xmlParseDefaultDecl:
3193: * @ctxt: an XML parser context
3194: * @value: Receive a possible fixed default value for the attribute
3195: *
3196: * Parse an attribute default declaration
3197: *
3198: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3199: *
1.99 daniel 3200: * [ VC: Required Attribute ]
1.117 daniel 3201: * if the default declaration is the keyword #REQUIRED, then the
3202: * attribute must be specified for all elements of the type in the
3203: * attribute-list declaration.
1.99 daniel 3204: *
3205: * [ VC: Attribute Default Legal ]
1.102 daniel 3206: * The declared default value must meet the lexical constraints of
3207: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3208: *
3209: * [ VC: Fixed Attribute Default ]
1.117 daniel 3210: * if an attribute has a default value declared with the #FIXED
3211: * keyword, instances of that attribute must match the default value.
1.99 daniel 3212: *
3213: * [ WFC: No < in Attribute Values ]
3214: * handled in xmlParseAttValue()
3215: *
1.59 daniel 3216: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3217: * or XML_ATTRIBUTE_FIXED.
3218: */
3219:
3220: int
1.123 daniel 3221: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3222: int val;
1.123 daniel 3223: xmlChar *ret;
1.59 daniel 3224:
3225: *value = NULL;
1.152 daniel 3226: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 3227: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3228: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3229: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3230: (NXT(8) == 'D')) {
3231: SKIP(9);
3232: return(XML_ATTRIBUTE_REQUIRED);
3233: }
1.152 daniel 3234: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 3235: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3236: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3237: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3238: SKIP(8);
3239: return(XML_ATTRIBUTE_IMPLIED);
3240: }
3241: val = XML_ATTRIBUTE_NONE;
1.152 daniel 3242: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 3243: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3244: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3245: SKIP(6);
3246: val = XML_ATTRIBUTE_FIXED;
3247: if (!IS_BLANK(CUR)) {
1.230 veillard 3248: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3249: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3250: ctxt->sax->error(ctxt->userData,
3251: "Space required after '#FIXED'\n");
1.59 daniel 3252: ctxt->wellFormed = 0;
1.180 daniel 3253: ctxt->disableSAX = 1;
1.59 daniel 3254: }
3255: SKIP_BLANKS;
3256: }
3257: ret = xmlParseAttValue(ctxt);
1.96 daniel 3258: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3259: if (ret == NULL) {
3260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3261: ctxt->sax->error(ctxt->userData,
1.59 daniel 3262: "Attribute default value declaration error\n");
3263: ctxt->wellFormed = 0;
1.180 daniel 3264: ctxt->disableSAX = 1;
1.59 daniel 3265: } else
3266: *value = ret;
3267: return(val);
3268: }
3269:
3270: /**
1.66 daniel 3271: * xmlParseNotationType:
3272: * @ctxt: an XML parser context
3273: *
3274: * parse an Notation attribute type.
3275: *
1.99 daniel 3276: * Note: the leading 'NOTATION' S part has already being parsed...
3277: *
1.66 daniel 3278: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3279: *
1.99 daniel 3280: * [ VC: Notation Attributes ]
1.117 daniel 3281: * Values of this type must match one of the notation names included
1.99 daniel 3282: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3283: *
3284: * Returns: the notation attribute tree built while parsing
3285: */
3286:
3287: xmlEnumerationPtr
3288: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3289: xmlChar *name;
1.66 daniel 3290: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3291:
1.152 daniel 3292: if (RAW != '(') {
1.230 veillard 3293: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3294: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3295: ctxt->sax->error(ctxt->userData,
3296: "'(' required to start 'NOTATION'\n");
1.66 daniel 3297: ctxt->wellFormed = 0;
1.180 daniel 3298: ctxt->disableSAX = 1;
1.66 daniel 3299: return(NULL);
3300: }
1.91 daniel 3301: SHRINK;
1.66 daniel 3302: do {
3303: NEXT;
3304: SKIP_BLANKS;
3305: name = xmlParseName(ctxt);
3306: if (name == NULL) {
1.230 veillard 3307: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3308: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3309: ctxt->sax->error(ctxt->userData,
1.66 daniel 3310: "Name expected in NOTATION declaration\n");
3311: ctxt->wellFormed = 0;
1.180 daniel 3312: ctxt->disableSAX = 1;
1.66 daniel 3313: return(ret);
3314: }
3315: cur = xmlCreateEnumeration(name);
1.119 daniel 3316: xmlFree(name);
1.66 daniel 3317: if (cur == NULL) return(ret);
3318: if (last == NULL) ret = last = cur;
3319: else {
3320: last->next = cur;
3321: last = cur;
3322: }
3323: SKIP_BLANKS;
1.152 daniel 3324: } while (RAW == '|');
3325: if (RAW != ')') {
1.230 veillard 3326: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3328: ctxt->sax->error(ctxt->userData,
1.66 daniel 3329: "')' required to finish NOTATION declaration\n");
3330: ctxt->wellFormed = 0;
1.180 daniel 3331: ctxt->disableSAX = 1;
1.170 daniel 3332: if ((last != NULL) && (last != ret))
3333: xmlFreeEnumeration(last);
1.66 daniel 3334: return(ret);
3335: }
3336: NEXT;
3337: return(ret);
3338: }
3339:
3340: /**
3341: * xmlParseEnumerationType:
3342: * @ctxt: an XML parser context
3343: *
3344: * parse an Enumeration attribute type.
3345: *
3346: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3347: *
1.99 daniel 3348: * [ VC: Enumeration ]
1.117 daniel 3349: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3350: * the declaration
3351: *
1.66 daniel 3352: * Returns: the enumeration attribute tree built while parsing
3353: */
3354:
3355: xmlEnumerationPtr
3356: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3357: xmlChar *name;
1.66 daniel 3358: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3359:
1.152 daniel 3360: if (RAW != '(') {
1.230 veillard 3361: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3363: ctxt->sax->error(ctxt->userData,
1.66 daniel 3364: "'(' required to start ATTLIST enumeration\n");
3365: ctxt->wellFormed = 0;
1.180 daniel 3366: ctxt->disableSAX = 1;
1.66 daniel 3367: return(NULL);
3368: }
1.91 daniel 3369: SHRINK;
1.66 daniel 3370: do {
3371: NEXT;
3372: SKIP_BLANKS;
3373: name = xmlParseNmtoken(ctxt);
3374: if (name == NULL) {
1.230 veillard 3375: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3377: ctxt->sax->error(ctxt->userData,
1.66 daniel 3378: "NmToken expected in ATTLIST enumeration\n");
3379: ctxt->wellFormed = 0;
1.180 daniel 3380: ctxt->disableSAX = 1;
1.66 daniel 3381: return(ret);
3382: }
3383: cur = xmlCreateEnumeration(name);
1.119 daniel 3384: xmlFree(name);
1.66 daniel 3385: if (cur == NULL) return(ret);
3386: if (last == NULL) ret = last = cur;
3387: else {
3388: last->next = cur;
3389: last = cur;
3390: }
3391: SKIP_BLANKS;
1.152 daniel 3392: } while (RAW == '|');
3393: if (RAW != ')') {
1.230 veillard 3394: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3396: ctxt->sax->error(ctxt->userData,
1.66 daniel 3397: "')' required to finish ATTLIST enumeration\n");
3398: ctxt->wellFormed = 0;
1.180 daniel 3399: ctxt->disableSAX = 1;
1.66 daniel 3400: return(ret);
3401: }
3402: NEXT;
3403: return(ret);
3404: }
3405:
3406: /**
1.50 daniel 3407: * xmlParseEnumeratedType:
3408: * @ctxt: an XML parser context
1.66 daniel 3409: * @tree: the enumeration tree built while parsing
1.50 daniel 3410: *
1.66 daniel 3411: * parse an Enumerated attribute type.
1.22 daniel 3412: *
3413: * [57] EnumeratedType ::= NotationType | Enumeration
3414: *
3415: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3416: *
1.50 daniel 3417: *
1.66 daniel 3418: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3419: */
3420:
1.66 daniel 3421: int
3422: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 3423: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 3424: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3425: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3426: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3427: SKIP(8);
3428: if (!IS_BLANK(CUR)) {
1.230 veillard 3429: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3431: ctxt->sax->error(ctxt->userData,
3432: "Space required after 'NOTATION'\n");
1.66 daniel 3433: ctxt->wellFormed = 0;
1.180 daniel 3434: ctxt->disableSAX = 1;
1.66 daniel 3435: return(0);
3436: }
3437: SKIP_BLANKS;
3438: *tree = xmlParseNotationType(ctxt);
3439: if (*tree == NULL) return(0);
3440: return(XML_ATTRIBUTE_NOTATION);
3441: }
3442: *tree = xmlParseEnumerationType(ctxt);
3443: if (*tree == NULL) return(0);
3444: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3445: }
3446:
1.50 daniel 3447: /**
3448: * xmlParseAttributeType:
3449: * @ctxt: an XML parser context
1.66 daniel 3450: * @tree: the enumeration tree built while parsing
1.50 daniel 3451: *
1.59 daniel 3452: * parse the Attribute list def for an element
1.22 daniel 3453: *
3454: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3455: *
3456: * [55] StringType ::= 'CDATA'
3457: *
3458: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3459: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3460: *
1.102 daniel 3461: * Validity constraints for attribute values syntax are checked in
3462: * xmlValidateAttributeValue()
3463: *
1.99 daniel 3464: * [ VC: ID ]
1.117 daniel 3465: * Values of type ID must match the Name production. A name must not
1.99 daniel 3466: * appear more than once in an XML document as a value of this type;
3467: * i.e., ID values must uniquely identify the elements which bear them.
3468: *
3469: * [ VC: One ID per Element Type ]
1.117 daniel 3470: * No element type may have more than one ID attribute specified.
1.99 daniel 3471: *
3472: * [ VC: ID Attribute Default ]
1.117 daniel 3473: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3474: *
3475: * [ VC: IDREF ]
1.102 daniel 3476: * Values of type IDREF must match the Name production, and values
1.140 daniel 3477: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 3478: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3479: * values must match the value of some ID attribute.
3480: *
3481: * [ VC: Entity Name ]
1.102 daniel 3482: * Values of type ENTITY must match the Name production, values
1.140 daniel 3483: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 3484: * name of an unparsed entity declared in the DTD.
1.99 daniel 3485: *
3486: * [ VC: Name Token ]
1.102 daniel 3487: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3488: * of type NMTOKENS must match Nmtokens.
3489: *
1.69 daniel 3490: * Returns the attribute type
1.22 daniel 3491: */
1.59 daniel 3492: int
1.66 daniel 3493: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3494: SHRINK;
1.152 daniel 3495: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 3496: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3497: (NXT(4) == 'A')) {
3498: SKIP(5);
1.66 daniel 3499: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 3500: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 3501: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3502: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3503: SKIP(6);
3504: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 3505: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 3506: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3507: (NXT(4) == 'F')) {
3508: SKIP(5);
1.59 daniel 3509: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 3510: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 3511: SKIP(2);
3512: return(XML_ATTRIBUTE_ID);
1.152 daniel 3513: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3514: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3515: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3516: SKIP(6);
1.59 daniel 3517: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 3518: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3519: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3520: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3521: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3522: SKIP(8);
1.59 daniel 3523: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 3524: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 3525: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3526: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3527: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3528: SKIP(8);
3529: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 3530: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 3531: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3532: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3533: (NXT(6) == 'N')) {
3534: SKIP(7);
1.59 daniel 3535: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3536: }
1.66 daniel 3537: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3538: }
3539:
1.50 daniel 3540: /**
3541: * xmlParseAttributeListDecl:
3542: * @ctxt: an XML parser context
3543: *
3544: * : parse the Attribute list def for an element
1.22 daniel 3545: *
3546: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3547: *
3548: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3549: *
1.22 daniel 3550: */
1.55 daniel 3551: void
3552: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3553: xmlChar *elemName;
3554: xmlChar *attrName;
1.103 daniel 3555: xmlEnumerationPtr tree;
1.22 daniel 3556:
1.152 daniel 3557: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3558: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3559: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3560: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3561: (NXT(8) == 'T')) {
1.187 daniel 3562: xmlParserInputPtr input = ctxt->input;
3563:
1.40 daniel 3564: SKIP(9);
1.59 daniel 3565: if (!IS_BLANK(CUR)) {
1.230 veillard 3566: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3568: ctxt->sax->error(ctxt->userData,
3569: "Space required after '<!ATTLIST'\n");
1.59 daniel 3570: ctxt->wellFormed = 0;
1.180 daniel 3571: ctxt->disableSAX = 1;
1.59 daniel 3572: }
1.42 daniel 3573: SKIP_BLANKS;
1.59 daniel 3574: elemName = xmlParseName(ctxt);
3575: if (elemName == NULL) {
1.230 veillard 3576: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3577: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3578: ctxt->sax->error(ctxt->userData,
3579: "ATTLIST: no name for Element\n");
1.59 daniel 3580: ctxt->wellFormed = 0;
1.180 daniel 3581: ctxt->disableSAX = 1;
1.22 daniel 3582: return;
3583: }
1.42 daniel 3584: SKIP_BLANKS;
1.220 veillard 3585: GROW;
1.152 daniel 3586: while (RAW != '>') {
1.123 daniel 3587: const xmlChar *check = CUR_PTR;
1.59 daniel 3588: int type;
3589: int def;
1.123 daniel 3590: xmlChar *defaultValue = NULL;
1.59 daniel 3591:
1.220 veillard 3592: GROW;
1.103 daniel 3593: tree = NULL;
1.59 daniel 3594: attrName = xmlParseName(ctxt);
3595: if (attrName == NULL) {
1.230 veillard 3596: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3598: ctxt->sax->error(ctxt->userData,
3599: "ATTLIST: no name for Attribute\n");
1.59 daniel 3600: ctxt->wellFormed = 0;
1.180 daniel 3601: ctxt->disableSAX = 1;
1.59 daniel 3602: break;
3603: }
1.97 daniel 3604: GROW;
1.59 daniel 3605: if (!IS_BLANK(CUR)) {
1.230 veillard 3606: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3608: ctxt->sax->error(ctxt->userData,
1.59 daniel 3609: "Space required after the attribute name\n");
3610: ctxt->wellFormed = 0;
1.180 daniel 3611: ctxt->disableSAX = 1;
1.170 daniel 3612: if (attrName != NULL)
3613: xmlFree(attrName);
3614: if (defaultValue != NULL)
3615: xmlFree(defaultValue);
1.59 daniel 3616: break;
3617: }
3618: SKIP_BLANKS;
3619:
1.66 daniel 3620: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 3621: if (type <= 0) {
3622: if (attrName != NULL)
3623: xmlFree(attrName);
3624: if (defaultValue != NULL)
3625: xmlFree(defaultValue);
3626: break;
3627: }
1.22 daniel 3628:
1.97 daniel 3629: GROW;
1.59 daniel 3630: if (!IS_BLANK(CUR)) {
1.230 veillard 3631: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3633: ctxt->sax->error(ctxt->userData,
1.59 daniel 3634: "Space required after the attribute type\n");
3635: ctxt->wellFormed = 0;
1.180 daniel 3636: ctxt->disableSAX = 1;
1.170 daniel 3637: if (attrName != NULL)
3638: xmlFree(attrName);
3639: if (defaultValue != NULL)
3640: xmlFree(defaultValue);
3641: if (tree != NULL)
3642: xmlFreeEnumeration(tree);
1.59 daniel 3643: break;
3644: }
1.42 daniel 3645: SKIP_BLANKS;
1.59 daniel 3646:
3647: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 3648: if (def <= 0) {
3649: if (attrName != NULL)
3650: xmlFree(attrName);
3651: if (defaultValue != NULL)
3652: xmlFree(defaultValue);
3653: if (tree != NULL)
3654: xmlFreeEnumeration(tree);
3655: break;
3656: }
1.59 daniel 3657:
1.97 daniel 3658: GROW;
1.152 daniel 3659: if (RAW != '>') {
1.59 daniel 3660: if (!IS_BLANK(CUR)) {
1.230 veillard 3661: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3663: ctxt->sax->error(ctxt->userData,
1.59 daniel 3664: "Space required after the attribute default value\n");
3665: ctxt->wellFormed = 0;
1.180 daniel 3666: ctxt->disableSAX = 1;
1.170 daniel 3667: if (attrName != NULL)
3668: xmlFree(attrName);
3669: if (defaultValue != NULL)
3670: xmlFree(defaultValue);
3671: if (tree != NULL)
3672: xmlFreeEnumeration(tree);
1.59 daniel 3673: break;
3674: }
3675: SKIP_BLANKS;
3676: }
1.40 daniel 3677: if (check == CUR_PTR) {
1.230 veillard 3678: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3679: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3680: ctxt->sax->error(ctxt->userData,
1.59 daniel 3681: "xmlParseAttributeListDecl: detected internal error\n");
1.170 daniel 3682: if (attrName != NULL)
3683: xmlFree(attrName);
3684: if (defaultValue != NULL)
3685: xmlFree(defaultValue);
3686: if (tree != NULL)
3687: xmlFreeEnumeration(tree);
1.22 daniel 3688: break;
3689: }
1.171 daniel 3690: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3691: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3692: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3693: type, def, defaultValue, tree);
1.59 daniel 3694: if (attrName != NULL)
1.119 daniel 3695: xmlFree(attrName);
1.59 daniel 3696: if (defaultValue != NULL)
1.119 daniel 3697: xmlFree(defaultValue);
1.97 daniel 3698: GROW;
1.22 daniel 3699: }
1.187 daniel 3700: if (RAW == '>') {
3701: if (input != ctxt->input) {
1.230 veillard 3702: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3704: ctxt->sax->error(ctxt->userData,
3705: "Attribute list declaration doesn't start and stop in the same entity\n");
3706: ctxt->wellFormed = 0;
3707: ctxt->disableSAX = 1;
3708: }
1.40 daniel 3709: NEXT;
1.187 daniel 3710: }
1.22 daniel 3711:
1.119 daniel 3712: xmlFree(elemName);
1.22 daniel 3713: }
3714: }
3715:
1.50 daniel 3716: /**
1.61 daniel 3717: * xmlParseElementMixedContentDecl:
3718: * @ctxt: an XML parser context
3719: *
3720: * parse the declaration for a Mixed Element content
3721: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3722: *
3723: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3724: * '(' S? '#PCDATA' S? ')'
3725: *
1.99 daniel 3726: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3727: *
3728: * [ VC: No Duplicate Types ]
1.117 daniel 3729: * The same name must not appear more than once in a single
3730: * mixed-content declaration.
1.99 daniel 3731: *
1.61 daniel 3732: * returns: the list of the xmlElementContentPtr describing the element choices
3733: */
3734: xmlElementContentPtr
1.62 daniel 3735: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3736: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3737: xmlChar *elem = NULL;
1.61 daniel 3738:
1.97 daniel 3739: GROW;
1.152 daniel 3740: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 3741: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3742: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3743: (NXT(6) == 'A')) {
3744: SKIP(7);
3745: SKIP_BLANKS;
1.91 daniel 3746: SHRINK;
1.152 daniel 3747: if (RAW == ')') {
1.187 daniel 3748: ctxt->entity = ctxt->input;
1.63 daniel 3749: NEXT;
3750: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 3751: if (RAW == '*') {
1.136 daniel 3752: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3753: NEXT;
3754: }
1.63 daniel 3755: return(ret);
3756: }
1.152 daniel 3757: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 3758: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3759: if (ret == NULL) return(NULL);
1.99 daniel 3760: }
1.152 daniel 3761: while (RAW == '|') {
1.64 daniel 3762: NEXT;
1.61 daniel 3763: if (elem == NULL) {
3764: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3765: if (ret == NULL) return(NULL);
3766: ret->c1 = cur;
1.64 daniel 3767: cur = ret;
1.61 daniel 3768: } else {
1.64 daniel 3769: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3770: if (n == NULL) return(NULL);
3771: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3772: cur->c2 = n;
3773: cur = n;
1.119 daniel 3774: xmlFree(elem);
1.61 daniel 3775: }
3776: SKIP_BLANKS;
3777: elem = xmlParseName(ctxt);
3778: if (elem == NULL) {
1.230 veillard 3779: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3780: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3781: ctxt->sax->error(ctxt->userData,
1.61 daniel 3782: "xmlParseElementMixedContentDecl : Name expected\n");
3783: ctxt->wellFormed = 0;
1.180 daniel 3784: ctxt->disableSAX = 1;
1.61 daniel 3785: xmlFreeElementContent(cur);
3786: return(NULL);
3787: }
3788: SKIP_BLANKS;
1.97 daniel 3789: GROW;
1.61 daniel 3790: }
1.152 daniel 3791: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 3792: if (elem != NULL) {
1.61 daniel 3793: cur->c2 = xmlNewElementContent(elem,
3794: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3795: xmlFree(elem);
1.66 daniel 3796: }
1.65 daniel 3797: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 3798: ctxt->entity = ctxt->input;
1.64 daniel 3799: SKIP(2);
1.61 daniel 3800: } else {
1.119 daniel 3801: if (elem != NULL) xmlFree(elem);
1.230 veillard 3802: xmlFreeElementContent(ret);
3803: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3805: ctxt->sax->error(ctxt->userData,
1.63 daniel 3806: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3807: ctxt->wellFormed = 0;
1.180 daniel 3808: ctxt->disableSAX = 1;
1.61 daniel 3809: return(NULL);
3810: }
3811:
3812: } else {
1.230 veillard 3813: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3815: ctxt->sax->error(ctxt->userData,
1.61 daniel 3816: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3817: ctxt->wellFormed = 0;
1.180 daniel 3818: ctxt->disableSAX = 1;
1.61 daniel 3819: }
3820: return(ret);
3821: }
3822:
3823: /**
3824: * xmlParseElementChildrenContentDecl:
1.50 daniel 3825: * @ctxt: an XML parser context
3826: *
1.61 daniel 3827: * parse the declaration for a Mixed Element content
3828: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3829: *
1.61 daniel 3830: *
1.22 daniel 3831: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3832: *
3833: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3834: *
3835: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3836: *
3837: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3838: *
1.99 daniel 3839: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3840: * TODO Parameter-entity replacement text must be properly nested
3841: * with parenthetized groups. That is to say, if either of the
3842: * opening or closing parentheses in a choice, seq, or Mixed
3843: * construct is contained in the replacement text for a parameter
3844: * entity, both must be contained in the same replacement text. For
3845: * interoperability, if a parameter-entity reference appears in a
3846: * choice, seq, or Mixed construct, its replacement text should not
3847: * be empty, and neither the first nor last non-blank character of
3848: * the replacement text should be a connector (| or ,).
3849: *
1.62 daniel 3850: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3851: * hierarchy.
3852: */
3853: xmlElementContentPtr
1.62 daniel 3854: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3855: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 3856: xmlChar *elem;
3857: xmlChar type = 0;
1.62 daniel 3858:
3859: SKIP_BLANKS;
1.94 daniel 3860: GROW;
1.152 daniel 3861: if (RAW == '(') {
1.63 daniel 3862: /* Recurse on first child */
1.62 daniel 3863: NEXT;
3864: SKIP_BLANKS;
3865: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3866: SKIP_BLANKS;
1.101 daniel 3867: GROW;
1.62 daniel 3868: } else {
3869: elem = xmlParseName(ctxt);
3870: if (elem == NULL) {
1.230 veillard 3871: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3873: ctxt->sax->error(ctxt->userData,
1.62 daniel 3874: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3875: ctxt->wellFormed = 0;
1.180 daniel 3876: ctxt->disableSAX = 1;
1.62 daniel 3877: return(NULL);
3878: }
3879: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3880: GROW;
1.152 daniel 3881: if (RAW == '?') {
1.104 daniel 3882: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3883: NEXT;
1.152 daniel 3884: } else if (RAW == '*') {
1.104 daniel 3885: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3886: NEXT;
1.152 daniel 3887: } else if (RAW == '+') {
1.104 daniel 3888: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3889: NEXT;
3890: } else {
1.104 daniel 3891: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3892: }
1.119 daniel 3893: xmlFree(elem);
1.101 daniel 3894: GROW;
1.62 daniel 3895: }
3896: SKIP_BLANKS;
1.91 daniel 3897: SHRINK;
1.152 daniel 3898: while (RAW != ')') {
1.63 daniel 3899: /*
3900: * Each loop we parse one separator and one element.
3901: */
1.152 daniel 3902: if (RAW == ',') {
1.62 daniel 3903: if (type == 0) type = CUR;
3904:
3905: /*
3906: * Detect "Name | Name , Name" error
3907: */
3908: else if (type != CUR) {
1.230 veillard 3909: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3910: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3911: ctxt->sax->error(ctxt->userData,
1.62 daniel 3912: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3913: type);
3914: ctxt->wellFormed = 0;
1.180 daniel 3915: ctxt->disableSAX = 1;
1.170 daniel 3916: if ((op != NULL) && (op != ret))
3917: xmlFreeElementContent(op);
1.211 veillard 3918: if ((last != NULL) && (last != ret) &&
3919: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3920: xmlFreeElementContent(last);
3921: if (ret != NULL)
3922: xmlFreeElementContent(ret);
1.62 daniel 3923: return(NULL);
3924: }
1.64 daniel 3925: NEXT;
1.62 daniel 3926:
1.63 daniel 3927: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3928: if (op == NULL) {
3929: xmlFreeElementContent(ret);
3930: return(NULL);
3931: }
3932: if (last == NULL) {
3933: op->c1 = ret;
1.65 daniel 3934: ret = cur = op;
1.63 daniel 3935: } else {
3936: cur->c2 = op;
3937: op->c1 = last;
3938: cur =op;
1.65 daniel 3939: last = NULL;
1.63 daniel 3940: }
1.152 daniel 3941: } else if (RAW == '|') {
1.62 daniel 3942: if (type == 0) type = CUR;
3943:
3944: /*
1.63 daniel 3945: * Detect "Name , Name | Name" error
1.62 daniel 3946: */
3947: else if (type != CUR) {
1.230 veillard 3948: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3949: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3950: ctxt->sax->error(ctxt->userData,
1.62 daniel 3951: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3952: type);
3953: ctxt->wellFormed = 0;
1.180 daniel 3954: ctxt->disableSAX = 1;
1.211 veillard 3955: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 3956: xmlFreeElementContent(op);
1.211 veillard 3957: if ((last != NULL) && (last != ret) &&
3958: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3959: xmlFreeElementContent(last);
3960: if (ret != NULL)
3961: xmlFreeElementContent(ret);
1.62 daniel 3962: return(NULL);
3963: }
1.64 daniel 3964: NEXT;
1.62 daniel 3965:
1.63 daniel 3966: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3967: if (op == NULL) {
1.170 daniel 3968: if ((op != NULL) && (op != ret))
3969: xmlFreeElementContent(op);
1.211 veillard 3970: if ((last != NULL) && (last != ret) &&
3971: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3972: xmlFreeElementContent(last);
3973: if (ret != NULL)
3974: xmlFreeElementContent(ret);
1.63 daniel 3975: return(NULL);
3976: }
3977: if (last == NULL) {
3978: op->c1 = ret;
1.65 daniel 3979: ret = cur = op;
1.63 daniel 3980: } else {
3981: cur->c2 = op;
3982: op->c1 = last;
3983: cur =op;
1.65 daniel 3984: last = NULL;
1.63 daniel 3985: }
1.62 daniel 3986: } else {
1.230 veillard 3987: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 3988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3989: ctxt->sax->error(ctxt->userData,
1.62 daniel 3990: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3991: ctxt->wellFormed = 0;
1.180 daniel 3992: ctxt->disableSAX = 1;
1.170 daniel 3993: if ((op != NULL) && (op != ret))
3994: xmlFreeElementContent(op);
1.211 veillard 3995: if ((last != NULL) && (last != ret) &&
3996: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3997: xmlFreeElementContent(last);
3998: if (ret != NULL)
3999: xmlFreeElementContent(ret);
1.62 daniel 4000: return(NULL);
4001: }
1.101 daniel 4002: GROW;
1.62 daniel 4003: SKIP_BLANKS;
1.101 daniel 4004: GROW;
1.152 daniel 4005: if (RAW == '(') {
1.63 daniel 4006: /* Recurse on second child */
1.62 daniel 4007: NEXT;
4008: SKIP_BLANKS;
1.65 daniel 4009: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4010: SKIP_BLANKS;
4011: } else {
4012: elem = xmlParseName(ctxt);
4013: if (elem == NULL) {
1.230 veillard 4014: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4015: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4016: ctxt->sax->error(ctxt->userData,
1.122 daniel 4017: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.62 daniel 4018: ctxt->wellFormed = 0;
1.180 daniel 4019: ctxt->disableSAX = 1;
1.170 daniel 4020: if ((op != NULL) && (op != ret))
4021: xmlFreeElementContent(op);
1.211 veillard 4022: if ((last != NULL) && (last != ret) &&
4023: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4024: xmlFreeElementContent(last);
4025: if (ret != NULL)
4026: xmlFreeElementContent(ret);
1.62 daniel 4027: return(NULL);
4028: }
1.65 daniel 4029: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4030: xmlFree(elem);
1.152 daniel 4031: if (RAW == '?') {
1.105 daniel 4032: last->ocur = XML_ELEMENT_CONTENT_OPT;
4033: NEXT;
1.152 daniel 4034: } else if (RAW == '*') {
1.105 daniel 4035: last->ocur = XML_ELEMENT_CONTENT_MULT;
4036: NEXT;
1.152 daniel 4037: } else if (RAW == '+') {
1.105 daniel 4038: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4039: NEXT;
4040: } else {
4041: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4042: }
1.63 daniel 4043: }
4044: SKIP_BLANKS;
1.97 daniel 4045: GROW;
1.64 daniel 4046: }
1.65 daniel 4047: if ((cur != NULL) && (last != NULL)) {
4048: cur->c2 = last;
1.62 daniel 4049: }
1.187 daniel 4050: ctxt->entity = ctxt->input;
1.62 daniel 4051: NEXT;
1.152 daniel 4052: if (RAW == '?') {
1.62 daniel 4053: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4054: NEXT;
1.152 daniel 4055: } else if (RAW == '*') {
1.62 daniel 4056: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4057: NEXT;
1.152 daniel 4058: } else if (RAW == '+') {
1.62 daniel 4059: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4060: NEXT;
4061: }
4062: return(ret);
1.61 daniel 4063: }
4064:
4065: /**
4066: * xmlParseElementContentDecl:
4067: * @ctxt: an XML parser context
4068: * @name: the name of the element being defined.
4069: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4070: *
1.61 daniel 4071: * parse the declaration for an Element content either Mixed or Children,
4072: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4073: *
4074: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4075: *
1.61 daniel 4076: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4077: */
4078:
1.61 daniel 4079: int
1.123 daniel 4080: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4081: xmlElementContentPtr *result) {
4082:
4083: xmlElementContentPtr tree = NULL;
1.187 daniel 4084: xmlParserInputPtr input = ctxt->input;
1.61 daniel 4085: int res;
4086:
4087: *result = NULL;
4088:
1.152 daniel 4089: if (RAW != '(') {
1.230 veillard 4090: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4092: ctxt->sax->error(ctxt->userData,
1.61 daniel 4093: "xmlParseElementContentDecl : '(' expected\n");
4094: ctxt->wellFormed = 0;
1.180 daniel 4095: ctxt->disableSAX = 1;
1.61 daniel 4096: return(-1);
4097: }
4098: NEXT;
1.97 daniel 4099: GROW;
1.61 daniel 4100: SKIP_BLANKS;
1.152 daniel 4101: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4102: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4103: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4104: (NXT(6) == 'A')) {
1.62 daniel 4105: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4106: res = XML_ELEMENT_TYPE_MIXED;
4107: } else {
1.62 daniel 4108: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4109: res = XML_ELEMENT_TYPE_ELEMENT;
4110: }
1.187 daniel 4111: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
1.230 veillard 4112: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4113: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4114: ctxt->sax->error(ctxt->userData,
4115: "Element content declaration doesn't start and stop in the same entity\n");
4116: ctxt->wellFormed = 0;
4117: ctxt->disableSAX = 1;
4118: }
1.61 daniel 4119: SKIP_BLANKS;
1.63 daniel 4120: *result = tree;
1.61 daniel 4121: return(res);
1.22 daniel 4122: }
4123:
1.50 daniel 4124: /**
4125: * xmlParseElementDecl:
4126: * @ctxt: an XML parser context
4127: *
4128: * parse an Element declaration.
1.22 daniel 4129: *
4130: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4131: *
1.99 daniel 4132: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4133: * No element type may be declared more than once
1.69 daniel 4134: *
4135: * Returns the type of the element, or -1 in case of error
1.22 daniel 4136: */
1.59 daniel 4137: int
1.55 daniel 4138: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4139: xmlChar *name;
1.59 daniel 4140: int ret = -1;
1.61 daniel 4141: xmlElementContentPtr content = NULL;
1.22 daniel 4142:
1.97 daniel 4143: GROW;
1.152 daniel 4144: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4145: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4146: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4147: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4148: (NXT(8) == 'T')) {
1.187 daniel 4149: xmlParserInputPtr input = ctxt->input;
4150:
1.40 daniel 4151: SKIP(9);
1.59 daniel 4152: if (!IS_BLANK(CUR)) {
1.230 veillard 4153: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4154: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4155: ctxt->sax->error(ctxt->userData,
1.59 daniel 4156: "Space required after 'ELEMENT'\n");
4157: ctxt->wellFormed = 0;
1.180 daniel 4158: ctxt->disableSAX = 1;
1.59 daniel 4159: }
1.42 daniel 4160: SKIP_BLANKS;
1.22 daniel 4161: name = xmlParseName(ctxt);
4162: if (name == NULL) {
1.230 veillard 4163: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4164: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4165: ctxt->sax->error(ctxt->userData,
1.59 daniel 4166: "xmlParseElementDecl: no name for Element\n");
4167: ctxt->wellFormed = 0;
1.180 daniel 4168: ctxt->disableSAX = 1;
1.59 daniel 4169: return(-1);
4170: }
4171: if (!IS_BLANK(CUR)) {
1.230 veillard 4172: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4174: ctxt->sax->error(ctxt->userData,
1.59 daniel 4175: "Space required after the element name\n");
4176: ctxt->wellFormed = 0;
1.180 daniel 4177: ctxt->disableSAX = 1;
1.22 daniel 4178: }
1.42 daniel 4179: SKIP_BLANKS;
1.152 daniel 4180: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 4181: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4182: (NXT(4) == 'Y')) {
4183: SKIP(5);
1.22 daniel 4184: /*
4185: * Element must always be empty.
4186: */
1.59 daniel 4187: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 4188: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 4189: (NXT(2) == 'Y')) {
4190: SKIP(3);
1.22 daniel 4191: /*
4192: * Element is a generic container.
4193: */
1.59 daniel 4194: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 4195: } else if (RAW == '(') {
1.61 daniel 4196: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4197: } else {
1.98 daniel 4198: /*
4199: * [ WFC: PEs in Internal Subset ] error handling.
4200: */
1.152 daniel 4201: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 4202: (ctxt->inputNr == 1)) {
1.230 veillard 4203: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4204: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4205: ctxt->sax->error(ctxt->userData,
4206: "PEReference: forbidden within markup decl in internal subset\n");
4207: } else {
1.230 veillard 4208: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4210: ctxt->sax->error(ctxt->userData,
4211: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4212: }
1.61 daniel 4213: ctxt->wellFormed = 0;
1.180 daniel 4214: ctxt->disableSAX = 1;
1.119 daniel 4215: if (name != NULL) xmlFree(name);
1.61 daniel 4216: return(-1);
1.22 daniel 4217: }
1.142 daniel 4218:
4219: SKIP_BLANKS;
4220: /*
4221: * Pop-up of finished entities.
4222: */
1.152 daniel 4223: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 4224: xmlPopInput(ctxt);
1.42 daniel 4225: SKIP_BLANKS;
1.142 daniel 4226:
1.152 daniel 4227: if (RAW != '>') {
1.230 veillard 4228: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 4229: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4230: ctxt->sax->error(ctxt->userData,
1.31 daniel 4231: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4232: ctxt->wellFormed = 0;
1.180 daniel 4233: ctxt->disableSAX = 1;
1.61 daniel 4234: } else {
1.187 daniel 4235: if (input != ctxt->input) {
1.230 veillard 4236: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4238: ctxt->sax->error(ctxt->userData,
4239: "Element declaration doesn't start and stop in the same entity\n");
4240: ctxt->wellFormed = 0;
4241: ctxt->disableSAX = 1;
4242: }
4243:
1.40 daniel 4244: NEXT;
1.171 daniel 4245: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4246: (ctxt->sax->elementDecl != NULL))
1.76 daniel 4247: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4248: content);
1.61 daniel 4249: }
1.84 daniel 4250: if (content != NULL) {
4251: xmlFreeElementContent(content);
4252: }
1.61 daniel 4253: if (name != NULL) {
1.119 daniel 4254: xmlFree(name);
1.61 daniel 4255: }
1.22 daniel 4256: }
1.59 daniel 4257: return(ret);
1.22 daniel 4258: }
4259:
1.50 daniel 4260: /**
4261: * xmlParseMarkupDecl:
4262: * @ctxt: an XML parser context
4263: *
4264: * parse Markup declarations
1.22 daniel 4265: *
4266: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4267: * NotationDecl | PI | Comment
4268: *
1.98 daniel 4269: * [ VC: Proper Declaration/PE Nesting ]
1.229 veillard 4270: * Parameter-entity replacement text must be properly nested with
1.98 daniel 4271: * markup declarations. That is to say, if either the first character
4272: * or the last character of a markup declaration (markupdecl above) is
4273: * contained in the replacement text for a parameter-entity reference,
4274: * both must be contained in the same replacement text.
4275: *
4276: * [ WFC: PEs in Internal Subset ]
4277: * In the internal DTD subset, parameter-entity references can occur
4278: * only where markup declarations can occur, not within markup declarations.
4279: * (This does not apply to references that occur in external parameter
4280: * entities or to the external subset.)
1.22 daniel 4281: */
1.55 daniel 4282: void
4283: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4284: GROW;
1.22 daniel 4285: xmlParseElementDecl(ctxt);
4286: xmlParseAttributeListDecl(ctxt);
4287: xmlParseEntityDecl(ctxt);
4288: xmlParseNotationDecl(ctxt);
4289: xmlParsePI(ctxt);
1.114 daniel 4290: xmlParseComment(ctxt);
1.98 daniel 4291: /*
4292: * This is only for internal subset. On external entities,
4293: * the replacement is done before parsing stage
4294: */
4295: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4296: xmlParsePEReference(ctxt);
1.97 daniel 4297: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4298: }
4299:
1.50 daniel 4300: /**
1.76 daniel 4301: * xmlParseTextDecl:
4302: * @ctxt: an XML parser context
4303: *
4304: * parse an XML declaration header for external entities
4305: *
4306: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 4307: *
4308: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 4309: */
4310:
1.172 daniel 4311: void
1.76 daniel 4312: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4313: xmlChar *version;
1.76 daniel 4314:
4315: /*
4316: * We know that '<?xml' is here.
4317: */
1.193 daniel 4318: if ((RAW == '<') && (NXT(1) == '?') &&
4319: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4320: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4321: SKIP(5);
4322: } else {
1.230 veillard 4323: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
1.193 daniel 4324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4325: ctxt->sax->error(ctxt->userData,
4326: "Text declaration '<?xml' required\n");
4327: ctxt->wellFormed = 0;
4328: ctxt->disableSAX = 1;
4329:
4330: return;
4331: }
1.76 daniel 4332:
4333: if (!IS_BLANK(CUR)) {
1.230 veillard 4334: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4336: ctxt->sax->error(ctxt->userData,
4337: "Space needed after '<?xml'\n");
1.76 daniel 4338: ctxt->wellFormed = 0;
1.180 daniel 4339: ctxt->disableSAX = 1;
1.76 daniel 4340: }
4341: SKIP_BLANKS;
4342:
4343: /*
4344: * We may have the VersionInfo here.
4345: */
4346: version = xmlParseVersionInfo(ctxt);
4347: if (version == NULL)
4348: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 4349: ctxt->input->version = version;
1.76 daniel 4350:
4351: /*
4352: * We must have the encoding declaration
4353: */
4354: if (!IS_BLANK(CUR)) {
1.230 veillard 4355: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4357: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.76 daniel 4358: ctxt->wellFormed = 0;
1.180 daniel 4359: ctxt->disableSAX = 1;
1.76 daniel 4360: }
1.195 daniel 4361: xmlParseEncodingDecl(ctxt);
1.193 daniel 4362: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4363: /*
4364: * The XML REC instructs us to stop parsing right here
4365: */
4366: return;
4367: }
1.76 daniel 4368:
4369: SKIP_BLANKS;
1.152 daniel 4370: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 4371: SKIP(2);
1.152 daniel 4372: } else if (RAW == '>') {
1.76 daniel 4373: /* Deprecated old WD ... */
1.230 veillard 4374: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4375: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4376: ctxt->sax->error(ctxt->userData,
4377: "XML declaration must end-up with '?>'\n");
1.76 daniel 4378: ctxt->wellFormed = 0;
1.180 daniel 4379: ctxt->disableSAX = 1;
1.76 daniel 4380: NEXT;
4381: } else {
1.230 veillard 4382: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4384: ctxt->sax->error(ctxt->userData,
4385: "parsing XML declaration: '?>' expected\n");
1.76 daniel 4386: ctxt->wellFormed = 0;
1.180 daniel 4387: ctxt->disableSAX = 1;
1.76 daniel 4388: MOVETO_ENDTAG(CUR_PTR);
4389: NEXT;
4390: }
4391: }
4392:
4393: /*
4394: * xmlParseConditionalSections
4395: * @ctxt: an XML parser context
4396: *
4397: * [61] conditionalSect ::= includeSect | ignoreSect
4398: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4399: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4400: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4401: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4402: */
4403:
4404: void
4405: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 4406: SKIP(3);
4407: SKIP_BLANKS;
1.168 daniel 4408: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4409: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4410: (NXT(6) == 'E')) {
1.165 daniel 4411: SKIP(7);
1.168 daniel 4412: SKIP_BLANKS;
4413: if (RAW != '[') {
1.230 veillard 4414: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4415: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4416: ctxt->sax->error(ctxt->userData,
4417: "XML conditional section '[' expected\n");
4418: ctxt->wellFormed = 0;
1.180 daniel 4419: ctxt->disableSAX = 1;
1.168 daniel 4420: } else {
4421: NEXT;
4422: }
1.220 veillard 4423: if (xmlParserDebugEntities) {
4424: if ((ctxt->input != NULL) && (ctxt->input->filename))
4425: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4426: ctxt->input->line);
4427: fprintf(stderr, "Entering INCLUDE Conditional Section\n");
4428: }
4429:
1.165 daniel 4430: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4431: (NXT(2) != '>'))) {
4432: const xmlChar *check = CUR_PTR;
4433: int cons = ctxt->input->consumed;
4434: int tok = ctxt->token;
4435:
4436: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4437: xmlParseConditionalSections(ctxt);
4438: } else if (IS_BLANK(CUR)) {
4439: NEXT;
4440: } else if (RAW == '%') {
4441: xmlParsePEReference(ctxt);
4442: } else
4443: xmlParseMarkupDecl(ctxt);
4444:
4445: /*
4446: * Pop-up of finished entities.
4447: */
4448: while ((RAW == 0) && (ctxt->inputNr > 1))
4449: xmlPopInput(ctxt);
4450:
4451: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4452: (tok == ctxt->token)) {
1.230 veillard 4453: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.165 daniel 4454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4455: ctxt->sax->error(ctxt->userData,
4456: "Content error in the external subset\n");
4457: ctxt->wellFormed = 0;
1.180 daniel 4458: ctxt->disableSAX = 1;
1.165 daniel 4459: break;
4460: }
4461: }
1.220 veillard 4462: if (xmlParserDebugEntities) {
4463: if ((ctxt->input != NULL) && (ctxt->input->filename))
4464: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4465: ctxt->input->line);
4466: fprintf(stderr, "Leaving INCLUDE Conditional Section\n");
4467: }
4468:
1.168 daniel 4469: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4470: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 4471: int state;
4472:
1.168 daniel 4473: SKIP(6);
4474: SKIP_BLANKS;
4475: if (RAW != '[') {
1.230 veillard 4476: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4478: ctxt->sax->error(ctxt->userData,
4479: "XML conditional section '[' expected\n");
4480: ctxt->wellFormed = 0;
1.180 daniel 4481: ctxt->disableSAX = 1;
1.168 daniel 4482: } else {
4483: NEXT;
4484: }
1.220 veillard 4485: if (xmlParserDebugEntities) {
4486: if ((ctxt->input != NULL) && (ctxt->input->filename))
4487: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4488: ctxt->input->line);
4489: fprintf(stderr, "Entering IGNORE Conditional Section\n");
4490: }
1.171 daniel 4491:
1.143 daniel 4492: /*
1.171 daniel 4493: * Parse up to the end of the conditionnal section
4494: * But disable SAX event generating DTD building in the meantime
1.143 daniel 4495: */
1.171 daniel 4496: state = ctxt->disableSAX;
1.220 veillard 4497: ctxt->disableSAX = 1;
1.165 daniel 4498: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4499: (NXT(2) != '>'))) {
1.171 daniel 4500: const xmlChar *check = CUR_PTR;
4501: int cons = ctxt->input->consumed;
4502: int tok = ctxt->token;
4503:
4504: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4505: xmlParseConditionalSections(ctxt);
4506: } else if (IS_BLANK(CUR)) {
4507: NEXT;
4508: } else if (RAW == '%') {
4509: xmlParsePEReference(ctxt);
4510: } else
4511: xmlParseMarkupDecl(ctxt);
4512:
1.165 daniel 4513: /*
4514: * Pop-up of finished entities.
4515: */
4516: while ((RAW == 0) && (ctxt->inputNr > 1))
4517: xmlPopInput(ctxt);
1.143 daniel 4518:
1.171 daniel 4519: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4520: (tok == ctxt->token)) {
1.230 veillard 4521: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.171 daniel 4522: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4523: ctxt->sax->error(ctxt->userData,
4524: "Content error in the external subset\n");
4525: ctxt->wellFormed = 0;
1.180 daniel 4526: ctxt->disableSAX = 1;
1.171 daniel 4527: break;
4528: }
1.165 daniel 4529: }
1.171 daniel 4530: ctxt->disableSAX = state;
1.220 veillard 4531: if (xmlParserDebugEntities) {
4532: if ((ctxt->input != NULL) && (ctxt->input->filename))
4533: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4534: ctxt->input->line);
4535: fprintf(stderr, "Leaving IGNORE Conditional Section\n");
4536: }
4537:
1.168 daniel 4538: } else {
1.230 veillard 4539: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4541: ctxt->sax->error(ctxt->userData,
4542: "XML conditional section INCLUDE or IGNORE keyword expected\n");
4543: ctxt->wellFormed = 0;
1.180 daniel 4544: ctxt->disableSAX = 1;
1.143 daniel 4545: }
4546:
1.152 daniel 4547: if (RAW == 0)
1.143 daniel 4548: SHRINK;
4549:
1.152 daniel 4550: if (RAW == 0) {
1.230 veillard 4551: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4552: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4553: ctxt->sax->error(ctxt->userData,
4554: "XML conditional section not closed\n");
4555: ctxt->wellFormed = 0;
1.180 daniel 4556: ctxt->disableSAX = 1;
1.143 daniel 4557: } else {
4558: SKIP(3);
1.76 daniel 4559: }
4560: }
4561:
4562: /**
1.124 daniel 4563: * xmlParseExternalSubset:
1.76 daniel 4564: * @ctxt: an XML parser context
1.124 daniel 4565: * @ExternalID: the external identifier
4566: * @SystemID: the system identifier (or URL)
1.76 daniel 4567: *
4568: * parse Markup declarations from an external subset
4569: *
4570: * [30] extSubset ::= textDecl? extSubsetDecl
4571: *
4572: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4573: */
4574: void
1.123 daniel 4575: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4576: const xmlChar *SystemID) {
1.132 daniel 4577: GROW;
1.152 daniel 4578: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 4579: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4580: (NXT(4) == 'l')) {
1.172 daniel 4581: xmlParseTextDecl(ctxt);
1.193 daniel 4582: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4583: /*
4584: * The XML REC instructs us to stop parsing right here
4585: */
4586: ctxt->instate = XML_PARSER_EOF;
4587: return;
4588: }
1.76 daniel 4589: }
1.79 daniel 4590: if (ctxt->myDoc == NULL) {
1.116 daniel 4591: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4592: }
4593: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4594: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4595:
1.96 daniel 4596: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4597: ctxt->external = 1;
1.152 daniel 4598: while (((RAW == '<') && (NXT(1) == '?')) ||
4599: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 4600: IS_BLANK(CUR)) {
1.123 daniel 4601: const xmlChar *check = CUR_PTR;
1.115 daniel 4602: int cons = ctxt->input->consumed;
1.164 daniel 4603: int tok = ctxt->token;
1.115 daniel 4604:
1.221 veillard 4605: GROW;
1.152 daniel 4606: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 4607: xmlParseConditionalSections(ctxt);
4608: } else if (IS_BLANK(CUR)) {
4609: NEXT;
1.152 daniel 4610: } else if (RAW == '%') {
1.76 daniel 4611: xmlParsePEReference(ctxt);
4612: } else
4613: xmlParseMarkupDecl(ctxt);
1.77 daniel 4614:
4615: /*
4616: * Pop-up of finished entities.
4617: */
1.166 daniel 4618: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 4619: xmlPopInput(ctxt);
4620:
1.164 daniel 4621: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4622: (tok == ctxt->token)) {
1.230 veillard 4623: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4624: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4625: ctxt->sax->error(ctxt->userData,
4626: "Content error in the external subset\n");
4627: ctxt->wellFormed = 0;
1.180 daniel 4628: ctxt->disableSAX = 1;
1.115 daniel 4629: break;
4630: }
1.76 daniel 4631: }
4632:
1.152 daniel 4633: if (RAW != 0) {
1.230 veillard 4634: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4636: ctxt->sax->error(ctxt->userData,
4637: "Extra content at the end of the document\n");
4638: ctxt->wellFormed = 0;
1.180 daniel 4639: ctxt->disableSAX = 1;
1.76 daniel 4640: }
4641:
4642: }
4643:
4644: /**
1.77 daniel 4645: * xmlParseReference:
4646: * @ctxt: an XML parser context
4647: *
4648: * parse and handle entity references in content, depending on the SAX
4649: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4650: * CharRef, a predefined entity, if there is no reference() callback.
4651: * or if the parser was asked to switch to that mode.
1.77 daniel 4652: *
4653: * [67] Reference ::= EntityRef | CharRef
4654: */
4655: void
4656: xmlParseReference(xmlParserCtxtPtr ctxt) {
4657: xmlEntityPtr ent;
1.123 daniel 4658: xmlChar *val;
1.152 daniel 4659: if (RAW != '&') return;
1.77 daniel 4660:
4661: if (NXT(1) == '#') {
1.152 daniel 4662: int i = 0;
1.153 daniel 4663: xmlChar out[10];
4664: int hex = NXT(2);
1.77 daniel 4665: int val = xmlParseCharRef(ctxt);
1.152 daniel 4666:
1.198 daniel 4667: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 4668: /*
4669: * So we are using non-UTF-8 buffers
4670: * Check that the char fit on 8bits, if not
4671: * generate a CharRef.
4672: */
4673: if (val <= 0xFF) {
4674: out[0] = val;
4675: out[1] = 0;
1.171 daniel 4676: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4677: (!ctxt->disableSAX))
1.153 daniel 4678: ctxt->sax->characters(ctxt->userData, out, 1);
4679: } else {
4680: if ((hex == 'x') || (hex == 'X'))
4681: sprintf((char *)out, "#x%X", val);
4682: else
4683: sprintf((char *)out, "#%d", val);
1.171 daniel 4684: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4685: (!ctxt->disableSAX))
1.153 daniel 4686: ctxt->sax->reference(ctxt->userData, out);
4687: }
4688: } else {
4689: /*
4690: * Just encode the value in UTF-8
4691: */
4692: COPY_BUF(0 ,out, i, val);
4693: out[i] = 0;
1.171 daniel 4694: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4695: (!ctxt->disableSAX))
1.153 daniel 4696: ctxt->sax->characters(ctxt->userData, out, i);
4697: }
1.77 daniel 4698: } else {
4699: ent = xmlParseEntityRef(ctxt);
4700: if (ent == NULL) return;
4701: if ((ent->name != NULL) &&
1.159 daniel 4702: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 4703: xmlNodePtr list = NULL;
4704: int ret;
4705:
4706:
4707: /*
4708: * The first reference to the entity trigger a parsing phase
4709: * where the ent->children is filled with the result from
4710: * the parsing.
4711: */
4712: if (ent->children == NULL) {
4713: xmlChar *value;
4714: value = ent->content;
4715:
4716: /*
4717: * Check that this entity is well formed
4718: */
4719: if ((value != NULL) &&
4720: (value[1] == 0) && (value[0] == '<') &&
1.236 veillard 4721: (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
1.180 daniel 4722: /*
1.222 veillard 4723: * DONE: get definite answer on this !!!
1.180 daniel 4724: * Lots of entity decls are used to declare a single
4725: * char
4726: * <!ENTITY lt "<">
4727: * Which seems to be valid since
4728: * 2.4: The ampersand character (&) and the left angle
4729: * bracket (<) may appear in their literal form only
4730: * when used ... They are also legal within the literal
4731: * entity value of an internal entity declaration;i
4732: * see "4.3.2 Well-Formed Parsed Entities".
4733: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4734: * Looking at the OASIS test suite and James Clark
4735: * tests, this is broken. However the XML REC uses
4736: * it. Is the XML REC not well-formed ????
4737: * This is a hack to avoid this problem
1.222 veillard 4738: *
4739: * ANSWER: since lt gt amp .. are already defined,
4740: * this is a redefinition and hence the fact that the
4741: * contentis not well balanced is not a Wf error, this
4742: * is lousy but acceptable.
1.180 daniel 4743: */
4744: list = xmlNewDocText(ctxt->myDoc, value);
4745: if (list != NULL) {
4746: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4747: (ent->children == NULL)) {
4748: ent->children = list;
4749: ent->last = list;
4750: list->parent = (xmlNodePtr) ent;
4751: } else {
4752: xmlFreeNodeList(list);
4753: }
4754: } else if (list != NULL) {
4755: xmlFreeNodeList(list);
4756: }
1.181 daniel 4757: } else {
1.180 daniel 4758: /*
4759: * 4.3.2: An internal general parsed entity is well-formed
4760: * if its replacement text matches the production labeled
4761: * content.
4762: */
1.185 daniel 4763: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4764: ctxt->depth++;
1.180 daniel 4765: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 4766: ctxt->sax, NULL, ctxt->depth,
4767: value, &list);
4768: ctxt->depth--;
4769: } else if (ent->etype ==
4770: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4771: ctxt->depth++;
1.180 daniel 4772: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 4773: ctxt->sax, NULL, ctxt->depth,
1.228 veillard 4774: ent->URI, ent->ExternalID, &list);
1.185 daniel 4775: ctxt->depth--;
4776: } else {
1.180 daniel 4777: ret = -1;
4778: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4779: ctxt->sax->error(ctxt->userData,
4780: "Internal: invalid entity type\n");
4781: }
1.185 daniel 4782: if (ret == XML_ERR_ENTITY_LOOP) {
1.230 veillard 4783: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 4784: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4785: ctxt->sax->error(ctxt->userData,
4786: "Detected entity reference loop\n");
4787: ctxt->wellFormed = 0;
4788: ctxt->disableSAX = 1;
4789: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 4790: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4791: (ent->children == NULL)) {
4792: ent->children = list;
4793: while (list != NULL) {
4794: list->parent = (xmlNodePtr) ent;
4795: if (list->next == NULL)
4796: ent->last = list;
4797: list = list->next;
4798: }
4799: } else {
4800: xmlFreeNodeList(list);
4801: }
4802: } else if (ret > 0) {
1.230 veillard 4803: ctxt->errNo = ret;
1.180 daniel 4804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4805: ctxt->sax->error(ctxt->userData,
4806: "Entity value required\n");
4807: ctxt->wellFormed = 0;
4808: ctxt->disableSAX = 1;
4809: } else if (list != NULL) {
4810: xmlFreeNodeList(list);
4811: }
4812: }
4813: }
1.113 daniel 4814: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 4815: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 4816: /*
4817: * Create a node.
4818: */
4819: ctxt->sax->reference(ctxt->userData, ent->name);
4820: return;
4821: } else if (ctxt->replaceEntities) {
1.222 veillard 4822: if ((ctxt->node != NULL) && (ent->children != NULL)) {
4823: /*
4824: * Seems we are generating the DOM content, do
4825: * a simple tree copy
4826: */
4827: xmlNodePtr new;
4828: new = xmlCopyNodeList(ent->children);
4829:
4830: xmlAddChildList(ctxt->node, new);
4831: /*
4832: * This is to avoid a nasty side effect, see
4833: * characters() in SAX.c
4834: */
4835: ctxt->nodemem = 0;
4836: ctxt->nodelen = 0;
4837: return;
4838: } else {
4839: /*
4840: * Probably running in SAX mode
4841: */
4842: xmlParserInputPtr input;
1.79 daniel 4843:
1.222 veillard 4844: input = xmlNewEntityInputStream(ctxt, ent);
4845: xmlPushInput(ctxt, input);
4846: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4847: (RAW == '<') && (NXT(1) == '?') &&
4848: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4849: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4850: xmlParseTextDecl(ctxt);
4851: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4852: /*
4853: * The XML REC instructs us to stop parsing right here
4854: */
4855: ctxt->instate = XML_PARSER_EOF;
4856: return;
4857: }
4858: if (input->standalone == 1) {
1.230 veillard 4859: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
1.222 veillard 4860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4861: ctxt->sax->error(ctxt->userData,
4862: "external parsed entities cannot be standalone\n");
4863: ctxt->wellFormed = 0;
4864: ctxt->disableSAX = 1;
4865: }
1.167 daniel 4866: }
1.222 veillard 4867: return;
1.167 daniel 4868: }
1.113 daniel 4869: }
1.222 veillard 4870: } else {
4871: val = ent->content;
4872: if (val == NULL) return;
4873: /*
4874: * inline the entity.
4875: */
4876: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4877: (!ctxt->disableSAX))
4878: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
1.77 daniel 4879: }
4880: }
1.24 daniel 4881: }
4882:
1.50 daniel 4883: /**
4884: * xmlParseEntityRef:
4885: * @ctxt: an XML parser context
4886: *
4887: * parse ENTITY references declarations
1.24 daniel 4888: *
4889: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4890: *
1.98 daniel 4891: * [ WFC: Entity Declared ]
4892: * In a document without any DTD, a document with only an internal DTD
4893: * subset which contains no parameter entity references, or a document
4894: * with "standalone='yes'", the Name given in the entity reference
4895: * must match that in an entity declaration, except that well-formed
4896: * documents need not declare any of the following entities: amp, lt,
4897: * gt, apos, quot. The declaration of a parameter entity must precede
4898: * any reference to it. Similarly, the declaration of a general entity
4899: * must precede any reference to it which appears in a default value in an
4900: * attribute-list declaration. Note that if entities are declared in the
4901: * external subset or in external parameter entities, a non-validating
4902: * processor is not obligated to read and process their declarations;
4903: * for such documents, the rule that an entity must be declared is a
4904: * well-formedness constraint only if standalone='yes'.
4905: *
4906: * [ WFC: Parsed Entity ]
4907: * An entity reference must not contain the name of an unparsed entity
4908: *
1.77 daniel 4909: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4910: */
1.77 daniel 4911: xmlEntityPtr
1.55 daniel 4912: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 4913: xmlChar *name;
1.72 daniel 4914: xmlEntityPtr ent = NULL;
1.24 daniel 4915:
1.91 daniel 4916: GROW;
1.111 daniel 4917:
1.152 daniel 4918: if (RAW == '&') {
1.40 daniel 4919: NEXT;
1.24 daniel 4920: name = xmlParseName(ctxt);
4921: if (name == NULL) {
1.230 veillard 4922: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4923: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4924: ctxt->sax->error(ctxt->userData,
4925: "xmlParseEntityRef: no name\n");
1.59 daniel 4926: ctxt->wellFormed = 0;
1.180 daniel 4927: ctxt->disableSAX = 1;
1.24 daniel 4928: } else {
1.152 daniel 4929: if (RAW == ';') {
1.40 daniel 4930: NEXT;
1.24 daniel 4931: /*
1.77 daniel 4932: * Ask first SAX for entity resolution, otherwise try the
4933: * predefined set.
4934: */
4935: if (ctxt->sax != NULL) {
4936: if (ctxt->sax->getEntity != NULL)
4937: ent = ctxt->sax->getEntity(ctxt->userData, name);
4938: if (ent == NULL)
4939: ent = xmlGetPredefinedEntity(name);
4940: }
4941: /*
1.98 daniel 4942: * [ WFC: Entity Declared ]
4943: * In a document without any DTD, a document with only an
4944: * internal DTD subset which contains no parameter entity
4945: * references, or a document with "standalone='yes'", the
4946: * Name given in the entity reference must match that in an
4947: * entity declaration, except that well-formed documents
4948: * need not declare any of the following entities: amp, lt,
4949: * gt, apos, quot.
4950: * The declaration of a parameter entity must precede any
4951: * reference to it.
4952: * Similarly, the declaration of a general entity must
4953: * precede any reference to it which appears in a default
4954: * value in an attribute-list declaration. Note that if
4955: * entities are declared in the external subset or in
4956: * external parameter entities, a non-validating processor
4957: * is not obligated to read and process their declarations;
4958: * for such documents, the rule that an entity must be
4959: * declared is a well-formedness constraint only if
4960: * standalone='yes'.
1.59 daniel 4961: */
1.77 daniel 4962: if (ent == NULL) {
1.98 daniel 4963: if ((ctxt->standalone == 1) ||
4964: ((ctxt->hasExternalSubset == 0) &&
4965: (ctxt->hasPErefs == 0))) {
1.230 veillard 4966: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 4967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4968: ctxt->sax->error(ctxt->userData,
4969: "Entity '%s' not defined\n", name);
4970: ctxt->wellFormed = 0;
1.180 daniel 4971: ctxt->disableSAX = 1;
1.77 daniel 4972: } else {
1.230 veillard 4973: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.98 daniel 4974: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4975: ctxt->sax->warning(ctxt->userData,
4976: "Entity '%s' not defined\n", name);
1.59 daniel 4977: }
1.77 daniel 4978: }
1.59 daniel 4979:
4980: /*
1.98 daniel 4981: * [ WFC: Parsed Entity ]
4982: * An entity reference must not contain the name of an
4983: * unparsed entity
4984: */
1.159 daniel 4985: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 4986: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 4987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4988: ctxt->sax->error(ctxt->userData,
4989: "Entity reference to unparsed entity %s\n", name);
4990: ctxt->wellFormed = 0;
1.180 daniel 4991: ctxt->disableSAX = 1;
1.98 daniel 4992: }
4993:
4994: /*
4995: * [ WFC: No External Entity References ]
4996: * Attribute values cannot contain direct or indirect
4997: * entity references to external entities.
4998: */
4999: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5000: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5001: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5002: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5003: ctxt->sax->error(ctxt->userData,
5004: "Attribute references external entity '%s'\n", name);
5005: ctxt->wellFormed = 0;
1.180 daniel 5006: ctxt->disableSAX = 1;
1.98 daniel 5007: }
5008: /*
5009: * [ WFC: No < in Attribute Values ]
5010: * The replacement text of any entity referred to directly or
5011: * indirectly in an attribute value (other than "<") must
5012: * not contain a <.
1.59 daniel 5013: */
1.98 daniel 5014: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5015: (ent != NULL) &&
1.236 veillard 5016: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5017: (ent->content != NULL) &&
5018: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5019: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5021: ctxt->sax->error(ctxt->userData,
5022: "'<' in entity '%s' is not allowed in attributes values\n", name);
5023: ctxt->wellFormed = 0;
1.180 daniel 5024: ctxt->disableSAX = 1;
1.98 daniel 5025: }
5026:
5027: /*
5028: * Internal check, no parameter entities here ...
5029: */
5030: else {
1.159 daniel 5031: switch (ent->etype) {
1.59 daniel 5032: case XML_INTERNAL_PARAMETER_ENTITY:
5033: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5034: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5036: ctxt->sax->error(ctxt->userData,
1.59 daniel 5037: "Attempt to reference the parameter entity '%s'\n", name);
5038: ctxt->wellFormed = 0;
1.180 daniel 5039: ctxt->disableSAX = 1;
5040: break;
5041: default:
1.59 daniel 5042: break;
5043: }
5044: }
5045:
5046: /*
1.98 daniel 5047: * [ WFC: No Recursion ]
1.229 veillard 5048: * A parsed entity must not contain a recursive reference
1.117 daniel 5049: * to itself, either directly or indirectly.
1.229 veillard 5050: * Done somewhere else
1.59 daniel 5051: */
1.77 daniel 5052:
1.24 daniel 5053: } else {
1.230 veillard 5054: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5056: ctxt->sax->error(ctxt->userData,
1.59 daniel 5057: "xmlParseEntityRef: expecting ';'\n");
5058: ctxt->wellFormed = 0;
1.180 daniel 5059: ctxt->disableSAX = 1;
1.24 daniel 5060: }
1.119 daniel 5061: xmlFree(name);
1.24 daniel 5062: }
5063: }
1.77 daniel 5064: return(ent);
1.24 daniel 5065: }
1.229 veillard 5066:
1.135 daniel 5067: /**
5068: * xmlParseStringEntityRef:
5069: * @ctxt: an XML parser context
5070: * @str: a pointer to an index in the string
5071: *
5072: * parse ENTITY references declarations, but this version parses it from
5073: * a string value.
5074: *
5075: * [68] EntityRef ::= '&' Name ';'
5076: *
5077: * [ WFC: Entity Declared ]
5078: * In a document without any DTD, a document with only an internal DTD
5079: * subset which contains no parameter entity references, or a document
5080: * with "standalone='yes'", the Name given in the entity reference
5081: * must match that in an entity declaration, except that well-formed
5082: * documents need not declare any of the following entities: amp, lt,
5083: * gt, apos, quot. The declaration of a parameter entity must precede
5084: * any reference to it. Similarly, the declaration of a general entity
5085: * must precede any reference to it which appears in a default value in an
5086: * attribute-list declaration. Note that if entities are declared in the
5087: * external subset or in external parameter entities, a non-validating
5088: * processor is not obligated to read and process their declarations;
5089: * for such documents, the rule that an entity must be declared is a
5090: * well-formedness constraint only if standalone='yes'.
5091: *
5092: * [ WFC: Parsed Entity ]
5093: * An entity reference must not contain the name of an unparsed entity
5094: *
5095: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5096: * is updated to the current location in the string.
5097: */
5098: xmlEntityPtr
5099: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5100: xmlChar *name;
5101: const xmlChar *ptr;
5102: xmlChar cur;
5103: xmlEntityPtr ent = NULL;
5104:
1.156 daniel 5105: if ((str == NULL) || (*str == NULL))
5106: return(NULL);
1.135 daniel 5107: ptr = *str;
5108: cur = *ptr;
5109: if (cur == '&') {
5110: ptr++;
5111: cur = *ptr;
5112: name = xmlParseStringName(ctxt, &ptr);
5113: if (name == NULL) {
1.230 veillard 5114: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5116: ctxt->sax->error(ctxt->userData,
5117: "xmlParseEntityRef: no name\n");
5118: ctxt->wellFormed = 0;
1.180 daniel 5119: ctxt->disableSAX = 1;
1.135 daniel 5120: } else {
1.185 daniel 5121: if (*ptr == ';') {
5122: ptr++;
1.135 daniel 5123: /*
5124: * Ask first SAX for entity resolution, otherwise try the
5125: * predefined set.
5126: */
5127: if (ctxt->sax != NULL) {
5128: if (ctxt->sax->getEntity != NULL)
5129: ent = ctxt->sax->getEntity(ctxt->userData, name);
5130: if (ent == NULL)
5131: ent = xmlGetPredefinedEntity(name);
5132: }
5133: /*
5134: * [ WFC: Entity Declared ]
5135: * In a document without any DTD, a document with only an
5136: * internal DTD subset which contains no parameter entity
5137: * references, or a document with "standalone='yes'", the
5138: * Name given in the entity reference must match that in an
5139: * entity declaration, except that well-formed documents
5140: * need not declare any of the following entities: amp, lt,
5141: * gt, apos, quot.
5142: * The declaration of a parameter entity must precede any
5143: * reference to it.
5144: * Similarly, the declaration of a general entity must
5145: * precede any reference to it which appears in a default
5146: * value in an attribute-list declaration. Note that if
5147: * entities are declared in the external subset or in
5148: * external parameter entities, a non-validating processor
5149: * is not obligated to read and process their declarations;
5150: * for such documents, the rule that an entity must be
5151: * declared is a well-formedness constraint only if
5152: * standalone='yes'.
5153: */
5154: if (ent == NULL) {
5155: if ((ctxt->standalone == 1) ||
5156: ((ctxt->hasExternalSubset == 0) &&
5157: (ctxt->hasPErefs == 0))) {
1.230 veillard 5158: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5159: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5160: ctxt->sax->error(ctxt->userData,
5161: "Entity '%s' not defined\n", name);
5162: ctxt->wellFormed = 0;
1.180 daniel 5163: ctxt->disableSAX = 1;
1.135 daniel 5164: } else {
1.230 veillard 5165: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.135 daniel 5166: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5167: ctxt->sax->warning(ctxt->userData,
5168: "Entity '%s' not defined\n", name);
5169: }
5170: }
5171:
5172: /*
5173: * [ WFC: Parsed Entity ]
5174: * An entity reference must not contain the name of an
5175: * unparsed entity
5176: */
1.159 daniel 5177: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5178: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.135 daniel 5179: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5180: ctxt->sax->error(ctxt->userData,
5181: "Entity reference to unparsed entity %s\n", name);
5182: ctxt->wellFormed = 0;
1.180 daniel 5183: ctxt->disableSAX = 1;
1.135 daniel 5184: }
5185:
5186: /*
5187: * [ WFC: No External Entity References ]
5188: * Attribute values cannot contain direct or indirect
5189: * entity references to external entities.
5190: */
5191: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5192: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5193: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.135 daniel 5194: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5195: ctxt->sax->error(ctxt->userData,
5196: "Attribute references external entity '%s'\n", name);
5197: ctxt->wellFormed = 0;
1.180 daniel 5198: ctxt->disableSAX = 1;
1.135 daniel 5199: }
5200: /*
5201: * [ WFC: No < in Attribute Values ]
5202: * The replacement text of any entity referred to directly or
5203: * indirectly in an attribute value (other than "<") must
5204: * not contain a <.
5205: */
5206: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5207: (ent != NULL) &&
1.236 veillard 5208: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.135 daniel 5209: (ent->content != NULL) &&
5210: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5211: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.135 daniel 5212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5213: ctxt->sax->error(ctxt->userData,
5214: "'<' in entity '%s' is not allowed in attributes values\n", name);
5215: ctxt->wellFormed = 0;
1.180 daniel 5216: ctxt->disableSAX = 1;
1.135 daniel 5217: }
5218:
5219: /*
5220: * Internal check, no parameter entities here ...
5221: */
5222: else {
1.159 daniel 5223: switch (ent->etype) {
1.135 daniel 5224: case XML_INTERNAL_PARAMETER_ENTITY:
5225: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5226: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.135 daniel 5227: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5228: ctxt->sax->error(ctxt->userData,
5229: "Attempt to reference the parameter entity '%s'\n", name);
5230: ctxt->wellFormed = 0;
1.180 daniel 5231: ctxt->disableSAX = 1;
5232: break;
5233: default:
1.135 daniel 5234: break;
5235: }
5236: }
5237:
5238: /*
5239: * [ WFC: No Recursion ]
1.229 veillard 5240: * A parsed entity must not contain a recursive reference
1.135 daniel 5241: * to itself, either directly or indirectly.
1.229 veillard 5242: * Done somewhwere else
1.135 daniel 5243: */
5244:
5245: } else {
1.230 veillard 5246: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5247: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5248: ctxt->sax->error(ctxt->userData,
5249: "xmlParseEntityRef: expecting ';'\n");
5250: ctxt->wellFormed = 0;
1.180 daniel 5251: ctxt->disableSAX = 1;
1.135 daniel 5252: }
5253: xmlFree(name);
5254: }
5255: }
1.185 daniel 5256: *str = ptr;
1.135 daniel 5257: return(ent);
5258: }
1.24 daniel 5259:
1.50 daniel 5260: /**
5261: * xmlParsePEReference:
5262: * @ctxt: an XML parser context
5263: *
5264: * parse PEReference declarations
1.77 daniel 5265: * The entity content is handled directly by pushing it's content as
5266: * a new input stream.
1.22 daniel 5267: *
5268: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5269: *
1.98 daniel 5270: * [ WFC: No Recursion ]
1.229 veillard 5271: * A parsed entity must not contain a recursive
1.98 daniel 5272: * reference to itself, either directly or indirectly.
5273: *
5274: * [ WFC: Entity Declared ]
5275: * In a document without any DTD, a document with only an internal DTD
5276: * subset which contains no parameter entity references, or a document
5277: * with "standalone='yes'", ... ... The declaration of a parameter
5278: * entity must precede any reference to it...
5279: *
5280: * [ VC: Entity Declared ]
5281: * In a document with an external subset or external parameter entities
5282: * with "standalone='no'", ... ... The declaration of a parameter entity
5283: * must precede any reference to it...
5284: *
5285: * [ WFC: In DTD ]
5286: * Parameter-entity references may only appear in the DTD.
5287: * NOTE: misleading but this is handled.
1.22 daniel 5288: */
1.77 daniel 5289: void
1.55 daniel 5290: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5291: xmlChar *name;
1.72 daniel 5292: xmlEntityPtr entity = NULL;
1.50 daniel 5293: xmlParserInputPtr input;
1.22 daniel 5294:
1.152 daniel 5295: if (RAW == '%') {
1.40 daniel 5296: NEXT;
1.22 daniel 5297: name = xmlParseName(ctxt);
5298: if (name == NULL) {
1.230 veillard 5299: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5301: ctxt->sax->error(ctxt->userData,
5302: "xmlParsePEReference: no name\n");
1.59 daniel 5303: ctxt->wellFormed = 0;
1.180 daniel 5304: ctxt->disableSAX = 1;
1.22 daniel 5305: } else {
1.152 daniel 5306: if (RAW == ';') {
1.40 daniel 5307: NEXT;
1.98 daniel 5308: if ((ctxt->sax != NULL) &&
5309: (ctxt->sax->getParameterEntity != NULL))
5310: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5311: name);
1.45 daniel 5312: if (entity == NULL) {
1.98 daniel 5313: /*
5314: * [ WFC: Entity Declared ]
5315: * In a document without any DTD, a document with only an
5316: * internal DTD subset which contains no parameter entity
5317: * references, or a document with "standalone='yes'", ...
5318: * ... The declaration of a parameter entity must precede
5319: * any reference to it...
5320: */
5321: if ((ctxt->standalone == 1) ||
5322: ((ctxt->hasExternalSubset == 0) &&
5323: (ctxt->hasPErefs == 0))) {
1.230 veillard 5324: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.220 veillard 5325: if ((!ctxt->disableSAX) &&
5326: (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5327: ctxt->sax->error(ctxt->userData,
5328: "PEReference: %%%s; not found\n", name);
5329: ctxt->wellFormed = 0;
1.180 daniel 5330: ctxt->disableSAX = 1;
1.98 daniel 5331: } else {
5332: /*
5333: * [ VC: Entity Declared ]
5334: * In a document with an external subset or external
5335: * parameter entities with "standalone='no'", ...
5336: * ... The declaration of a parameter entity must precede
5337: * any reference to it...
5338: */
1.220 veillard 5339: if ((!ctxt->disableSAX) &&
5340: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 5341: ctxt->sax->warning(ctxt->userData,
5342: "PEReference: %%%s; not found\n", name);
5343: ctxt->valid = 0;
5344: }
1.50 daniel 5345: } else {
1.98 daniel 5346: /*
5347: * Internal checking in case the entity quest barfed
5348: */
1.159 daniel 5349: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5350: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5351: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5352: ctxt->sax->warning(ctxt->userData,
5353: "Internal: %%%s; is not a parameter entity\n", name);
5354: } else {
1.164 daniel 5355: /*
5356: * TODO !!!
5357: * handle the extra spaces added before and after
5358: * c.f. http://www.w3.org/TR/REC-xml#as-PE
5359: */
1.98 daniel 5360: input = xmlNewEntityInputStream(ctxt, entity);
5361: xmlPushInput(ctxt, input);
1.164 daniel 5362: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5363: (RAW == '<') && (NXT(1) == '?') &&
5364: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5365: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5366: xmlParseTextDecl(ctxt);
1.193 daniel 5367: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5368: /*
5369: * The XML REC instructs us to stop parsing
5370: * right here
5371: */
5372: ctxt->instate = XML_PARSER_EOF;
5373: xmlFree(name);
5374: return;
5375: }
1.164 daniel 5376: }
5377: if (ctxt->token == 0)
5378: ctxt->token = ' ';
1.98 daniel 5379: }
1.45 daniel 5380: }
1.98 daniel 5381: ctxt->hasPErefs = 1;
1.22 daniel 5382: } else {
1.230 veillard 5383: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5385: ctxt->sax->error(ctxt->userData,
1.59 daniel 5386: "xmlParsePEReference: expecting ';'\n");
5387: ctxt->wellFormed = 0;
1.180 daniel 5388: ctxt->disableSAX = 1;
1.22 daniel 5389: }
1.119 daniel 5390: xmlFree(name);
1.3 veillard 5391: }
5392: }
5393: }
5394:
1.50 daniel 5395: /**
1.135 daniel 5396: * xmlParseStringPEReference:
5397: * @ctxt: an XML parser context
5398: * @str: a pointer to an index in the string
5399: *
5400: * parse PEReference declarations
5401: *
5402: * [69] PEReference ::= '%' Name ';'
5403: *
5404: * [ WFC: No Recursion ]
1.229 veillard 5405: * A parsed entity must not contain a recursive
1.135 daniel 5406: * reference to itself, either directly or indirectly.
5407: *
5408: * [ WFC: Entity Declared ]
5409: * In a document without any DTD, a document with only an internal DTD
5410: * subset which contains no parameter entity references, or a document
5411: * with "standalone='yes'", ... ... The declaration of a parameter
5412: * entity must precede any reference to it...
5413: *
5414: * [ VC: Entity Declared ]
5415: * In a document with an external subset or external parameter entities
5416: * with "standalone='no'", ... ... The declaration of a parameter entity
5417: * must precede any reference to it...
5418: *
5419: * [ WFC: In DTD ]
5420: * Parameter-entity references may only appear in the DTD.
5421: * NOTE: misleading but this is handled.
5422: *
5423: * Returns the string of the entity content.
5424: * str is updated to the current value of the index
5425: */
5426: xmlEntityPtr
5427: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5428: const xmlChar *ptr;
5429: xmlChar cur;
5430: xmlChar *name;
5431: xmlEntityPtr entity = NULL;
5432:
5433: if ((str == NULL) || (*str == NULL)) return(NULL);
5434: ptr = *str;
5435: cur = *ptr;
5436: if (cur == '%') {
5437: ptr++;
5438: cur = *ptr;
5439: name = xmlParseStringName(ctxt, &ptr);
5440: if (name == NULL) {
1.230 veillard 5441: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5443: ctxt->sax->error(ctxt->userData,
5444: "xmlParseStringPEReference: no name\n");
5445: ctxt->wellFormed = 0;
1.180 daniel 5446: ctxt->disableSAX = 1;
1.135 daniel 5447: } else {
5448: cur = *ptr;
5449: if (cur == ';') {
5450: ptr++;
5451: cur = *ptr;
5452: if ((ctxt->sax != NULL) &&
5453: (ctxt->sax->getParameterEntity != NULL))
5454: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5455: name);
5456: if (entity == NULL) {
5457: /*
5458: * [ WFC: Entity Declared ]
5459: * In a document without any DTD, a document with only an
5460: * internal DTD subset which contains no parameter entity
5461: * references, or a document with "standalone='yes'", ...
5462: * ... The declaration of a parameter entity must precede
5463: * any reference to it...
5464: */
5465: if ((ctxt->standalone == 1) ||
5466: ((ctxt->hasExternalSubset == 0) &&
5467: (ctxt->hasPErefs == 0))) {
1.230 veillard 5468: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5470: ctxt->sax->error(ctxt->userData,
5471: "PEReference: %%%s; not found\n", name);
5472: ctxt->wellFormed = 0;
1.180 daniel 5473: ctxt->disableSAX = 1;
1.135 daniel 5474: } else {
5475: /*
5476: * [ VC: Entity Declared ]
5477: * In a document with an external subset or external
5478: * parameter entities with "standalone='no'", ...
5479: * ... The declaration of a parameter entity must
5480: * precede any reference to it...
5481: */
5482: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5483: ctxt->sax->warning(ctxt->userData,
5484: "PEReference: %%%s; not found\n", name);
5485: ctxt->valid = 0;
5486: }
5487: } else {
5488: /*
5489: * Internal checking in case the entity quest barfed
5490: */
1.159 daniel 5491: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5492: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 5493: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5494: ctxt->sax->warning(ctxt->userData,
5495: "Internal: %%%s; is not a parameter entity\n", name);
5496: }
5497: }
5498: ctxt->hasPErefs = 1;
5499: } else {
1.230 veillard 5500: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5501: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5502: ctxt->sax->error(ctxt->userData,
5503: "xmlParseStringPEReference: expecting ';'\n");
5504: ctxt->wellFormed = 0;
1.180 daniel 5505: ctxt->disableSAX = 1;
1.135 daniel 5506: }
5507: xmlFree(name);
5508: }
5509: }
5510: *str = ptr;
5511: return(entity);
5512: }
5513:
5514: /**
1.181 daniel 5515: * xmlParseDocTypeDecl:
1.50 daniel 5516: * @ctxt: an XML parser context
5517: *
5518: * parse a DOCTYPE declaration
1.21 daniel 5519: *
1.22 daniel 5520: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5521: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 5522: *
5523: * [ VC: Root Element Type ]
1.99 daniel 5524: * The Name in the document type declaration must match the element
1.98 daniel 5525: * type of the root element.
1.21 daniel 5526: */
5527:
1.55 daniel 5528: void
5529: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 5530: xmlChar *name = NULL;
1.123 daniel 5531: xmlChar *ExternalID = NULL;
5532: xmlChar *URI = NULL;
1.21 daniel 5533:
5534: /*
5535: * We know that '<!DOCTYPE' has been detected.
5536: */
1.40 daniel 5537: SKIP(9);
1.21 daniel 5538:
1.42 daniel 5539: SKIP_BLANKS;
1.21 daniel 5540:
5541: /*
5542: * Parse the DOCTYPE name.
5543: */
5544: name = xmlParseName(ctxt);
5545: if (name == NULL) {
1.230 veillard 5546: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5548: ctxt->sax->error(ctxt->userData,
5549: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 5550: ctxt->wellFormed = 0;
1.180 daniel 5551: ctxt->disableSAX = 1;
1.21 daniel 5552: }
1.165 daniel 5553: ctxt->intSubName = name;
1.21 daniel 5554:
1.42 daniel 5555: SKIP_BLANKS;
1.21 daniel 5556:
5557: /*
1.22 daniel 5558: * Check for SystemID and ExternalID
5559: */
1.67 daniel 5560: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 5561:
5562: if ((URI != NULL) || (ExternalID != NULL)) {
5563: ctxt->hasExternalSubset = 1;
5564: }
1.165 daniel 5565: ctxt->extSubURI = URI;
5566: ctxt->extSubSystem = ExternalID;
1.98 daniel 5567:
1.42 daniel 5568: SKIP_BLANKS;
1.36 daniel 5569:
1.76 daniel 5570: /*
1.165 daniel 5571: * Create and update the internal subset.
1.76 daniel 5572: */
1.171 daniel 5573: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5574: (!ctxt->disableSAX))
1.74 daniel 5575: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 5576:
5577: /*
1.140 daniel 5578: * Is there any internal subset declarations ?
5579: * they are handled separately in xmlParseInternalSubset()
5580: */
1.152 daniel 5581: if (RAW == '[')
1.140 daniel 5582: return;
5583:
5584: /*
5585: * We should be at the end of the DOCTYPE declaration.
5586: */
1.152 daniel 5587: if (RAW != '>') {
1.230 veillard 5588: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.140 daniel 5589: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5590: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5591: ctxt->wellFormed = 0;
1.180 daniel 5592: ctxt->disableSAX = 1;
1.140 daniel 5593: }
5594: NEXT;
5595: }
5596:
5597: /**
1.181 daniel 5598: * xmlParseInternalsubset:
1.140 daniel 5599: * @ctxt: an XML parser context
5600: *
5601: * parse the internal subset declaration
5602: *
5603: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5604: */
5605:
5606: void
5607: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5608: /*
1.22 daniel 5609: * Is there any DTD definition ?
5610: */
1.152 daniel 5611: if (RAW == '[') {
1.96 daniel 5612: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 5613: NEXT;
1.22 daniel 5614: /*
5615: * Parse the succession of Markup declarations and
5616: * PEReferences.
5617: * Subsequence (markupdecl | PEReference | S)*
5618: */
1.152 daniel 5619: while (RAW != ']') {
1.123 daniel 5620: const xmlChar *check = CUR_PTR;
1.115 daniel 5621: int cons = ctxt->input->consumed;
1.22 daniel 5622:
1.42 daniel 5623: SKIP_BLANKS;
1.22 daniel 5624: xmlParseMarkupDecl(ctxt);
1.50 daniel 5625: xmlParsePEReference(ctxt);
1.22 daniel 5626:
1.115 daniel 5627: /*
5628: * Pop-up of finished entities.
5629: */
1.152 daniel 5630: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 5631: xmlPopInput(ctxt);
5632:
1.118 daniel 5633: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.230 veillard 5634: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5636: ctxt->sax->error(ctxt->userData,
1.140 daniel 5637: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 5638: ctxt->wellFormed = 0;
1.180 daniel 5639: ctxt->disableSAX = 1;
1.22 daniel 5640: break;
5641: }
5642: }
1.209 veillard 5643: if (RAW == ']') {
5644: NEXT;
5645: SKIP_BLANKS;
5646: }
1.22 daniel 5647: }
5648:
5649: /*
5650: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 5651: */
1.152 daniel 5652: if (RAW != '>') {
1.230 veillard 5653: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.55 daniel 5654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5655: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 5656: ctxt->wellFormed = 0;
1.180 daniel 5657: ctxt->disableSAX = 1;
1.21 daniel 5658: }
1.40 daniel 5659: NEXT;
1.21 daniel 5660: }
5661:
1.50 daniel 5662: /**
5663: * xmlParseAttribute:
5664: * @ctxt: an XML parser context
1.123 daniel 5665: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 5666: *
5667: * parse an attribute
1.3 veillard 5668: *
1.22 daniel 5669: * [41] Attribute ::= Name Eq AttValue
5670: *
1.98 daniel 5671: * [ WFC: No External Entity References ]
5672: * Attribute values cannot contain direct or indirect entity references
5673: * to external entities.
5674: *
5675: * [ WFC: No < in Attribute Values ]
5676: * The replacement text of any entity referred to directly or indirectly in
5677: * an attribute value (other than "<") must not contain a <.
5678: *
5679: * [ VC: Attribute Value Type ]
1.117 daniel 5680: * The attribute must have been declared; the value must be of the type
1.99 daniel 5681: * declared for it.
1.98 daniel 5682: *
1.22 daniel 5683: * [25] Eq ::= S? '=' S?
5684: *
1.29 daniel 5685: * With namespace:
5686: *
5687: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 5688: *
5689: * Also the case QName == xmlns:??? is handled independently as a namespace
5690: * definition.
1.69 daniel 5691: *
1.72 daniel 5692: * Returns the attribute name, and the value in *value.
1.3 veillard 5693: */
5694:
1.123 daniel 5695: xmlChar *
5696: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5697: xmlChar *name, *val;
1.3 veillard 5698:
1.72 daniel 5699: *value = NULL;
5700: name = xmlParseName(ctxt);
1.22 daniel 5701: if (name == NULL) {
1.230 veillard 5702: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5703: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5704: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 5705: ctxt->wellFormed = 0;
1.180 daniel 5706: ctxt->disableSAX = 1;
1.52 daniel 5707: return(NULL);
1.3 veillard 5708: }
5709:
5710: /*
1.29 daniel 5711: * read the value
1.3 veillard 5712: */
1.42 daniel 5713: SKIP_BLANKS;
1.152 daniel 5714: if (RAW == '=') {
1.40 daniel 5715: NEXT;
1.42 daniel 5716: SKIP_BLANKS;
1.72 daniel 5717: val = xmlParseAttValue(ctxt);
1.96 daniel 5718: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 5719: } else {
1.230 veillard 5720: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.55 daniel 5721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5722: ctxt->sax->error(ctxt->userData,
1.59 daniel 5723: "Specification mandate value for attribute %s\n", name);
5724: ctxt->wellFormed = 0;
1.180 daniel 5725: ctxt->disableSAX = 1;
1.170 daniel 5726: xmlFree(name);
1.52 daniel 5727: return(NULL);
1.43 daniel 5728: }
5729:
1.172 daniel 5730: /*
5731: * Check that xml:lang conforms to the specification
1.222 veillard 5732: * No more registered as an error, just generate a warning now
5733: * since this was deprecated in XML second edition
1.172 daniel 5734: */
1.236 veillard 5735: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
1.172 daniel 5736: if (!xmlCheckLanguageID(val)) {
1.222 veillard 5737: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5738: ctxt->sax->warning(ctxt->userData,
5739: "Malformed value for xml:lang : %s\n", val);
1.172 daniel 5740: }
5741: }
5742:
1.176 daniel 5743: /*
5744: * Check that xml:space conforms to the specification
5745: */
1.236 veillard 5746: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5747: if (xmlStrEqual(val, BAD_CAST "default"))
1.176 daniel 5748: *(ctxt->space) = 0;
1.236 veillard 5749: else if (xmlStrEqual(val, BAD_CAST "preserve"))
1.176 daniel 5750: *(ctxt->space) = 1;
5751: else {
1.230 veillard 5752: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.176 daniel 5753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5754: ctxt->sax->error(ctxt->userData,
5755: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5756: val);
5757: ctxt->wellFormed = 0;
1.180 daniel 5758: ctxt->disableSAX = 1;
1.176 daniel 5759: }
5760: }
5761:
1.72 daniel 5762: *value = val;
5763: return(name);
1.3 veillard 5764: }
5765:
1.50 daniel 5766: /**
5767: * xmlParseStartTag:
5768: * @ctxt: an XML parser context
5769: *
5770: * parse a start of tag either for rule element or
5771: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 5772: *
5773: * [40] STag ::= '<' Name (S Attribute)* S? '>'
5774: *
1.98 daniel 5775: * [ WFC: Unique Att Spec ]
5776: * No attribute name may appear more than once in the same start-tag or
5777: * empty-element tag.
5778: *
1.29 daniel 5779: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5780: *
1.98 daniel 5781: * [ WFC: Unique Att Spec ]
5782: * No attribute name may appear more than once in the same start-tag or
5783: * empty-element tag.
5784: *
1.29 daniel 5785: * With namespace:
5786: *
5787: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5788: *
5789: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 5790: *
1.192 daniel 5791: * Returns the element name parsed
1.2 veillard 5792: */
5793:
1.123 daniel 5794: xmlChar *
1.69 daniel 5795: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5796: xmlChar *name;
5797: xmlChar *attname;
5798: xmlChar *attvalue;
5799: const xmlChar **atts = NULL;
1.72 daniel 5800: int nbatts = 0;
5801: int maxatts = 0;
5802: int i;
1.2 veillard 5803:
1.152 daniel 5804: if (RAW != '<') return(NULL);
1.40 daniel 5805: NEXT;
1.3 veillard 5806:
1.72 daniel 5807: name = xmlParseName(ctxt);
1.59 daniel 5808: if (name == NULL) {
1.230 veillard 5809: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5810: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5811: ctxt->sax->error(ctxt->userData,
1.59 daniel 5812: "xmlParseStartTag: invalid element name\n");
5813: ctxt->wellFormed = 0;
1.180 daniel 5814: ctxt->disableSAX = 1;
1.83 daniel 5815: return(NULL);
1.50 daniel 5816: }
5817:
5818: /*
1.3 veillard 5819: * Now parse the attributes, it ends up with the ending
5820: *
5821: * (S Attribute)* S?
5822: */
1.42 daniel 5823: SKIP_BLANKS;
1.91 daniel 5824: GROW;
1.168 daniel 5825:
1.153 daniel 5826: while ((IS_CHAR(RAW)) &&
1.152 daniel 5827: (RAW != '>') &&
5828: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 5829: const xmlChar *q = CUR_PTR;
1.91 daniel 5830: int cons = ctxt->input->consumed;
1.29 daniel 5831:
1.72 daniel 5832: attname = xmlParseAttribute(ctxt, &attvalue);
5833: if ((attname != NULL) && (attvalue != NULL)) {
5834: /*
1.98 daniel 5835: * [ WFC: Unique Att Spec ]
5836: * No attribute name may appear more than once in the same
5837: * start-tag or empty-element tag.
1.72 daniel 5838: */
5839: for (i = 0; i < nbatts;i += 2) {
1.236 veillard 5840: if (xmlStrEqual(atts[i], attname)) {
1.230 veillard 5841: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.72 daniel 5842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5843: ctxt->sax->error(ctxt->userData,
5844: "Attribute %s redefined\n",
5845: attname);
1.72 daniel 5846: ctxt->wellFormed = 0;
1.180 daniel 5847: ctxt->disableSAX = 1;
1.119 daniel 5848: xmlFree(attname);
5849: xmlFree(attvalue);
1.98 daniel 5850: goto failed;
1.72 daniel 5851: }
5852: }
5853:
5854: /*
5855: * Add the pair to atts
5856: */
5857: if (atts == NULL) {
5858: maxatts = 10;
1.123 daniel 5859: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5860: if (atts == NULL) {
1.86 daniel 5861: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 5862: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5863: return(NULL);
1.72 daniel 5864: }
1.127 daniel 5865: } else if (nbatts + 4 > maxatts) {
1.72 daniel 5866: maxatts *= 2;
1.233 veillard 5867: atts = (const xmlChar **) xmlRealloc((void *) atts,
5868: maxatts * sizeof(xmlChar *));
1.72 daniel 5869: if (atts == NULL) {
1.86 daniel 5870: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 5871: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5872: return(NULL);
1.72 daniel 5873: }
5874: }
5875: atts[nbatts++] = attname;
5876: atts[nbatts++] = attvalue;
5877: atts[nbatts] = NULL;
5878: atts[nbatts + 1] = NULL;
1.176 daniel 5879: } else {
5880: if (attname != NULL)
5881: xmlFree(attname);
5882: if (attvalue != NULL)
5883: xmlFree(attvalue);
1.72 daniel 5884: }
5885:
1.116 daniel 5886: failed:
1.168 daniel 5887:
5888: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5889: break;
5890: if (!IS_BLANK(RAW)) {
1.230 veillard 5891: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.168 daniel 5892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5893: ctxt->sax->error(ctxt->userData,
5894: "attributes construct error\n");
5895: ctxt->wellFormed = 0;
1.180 daniel 5896: ctxt->disableSAX = 1;
1.168 daniel 5897: }
1.42 daniel 5898: SKIP_BLANKS;
1.91 daniel 5899: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.230 veillard 5900: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5902: ctxt->sax->error(ctxt->userData,
1.31 daniel 5903: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 5904: ctxt->wellFormed = 0;
1.180 daniel 5905: ctxt->disableSAX = 1;
1.29 daniel 5906: break;
1.3 veillard 5907: }
1.91 daniel 5908: GROW;
1.3 veillard 5909: }
5910:
1.43 daniel 5911: /*
1.72 daniel 5912: * SAX: Start of Element !
1.43 daniel 5913: */
1.171 daniel 5914: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5915: (!ctxt->disableSAX))
1.74 daniel 5916: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5917:
1.72 daniel 5918: if (atts != NULL) {
1.123 daniel 5919: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.233 veillard 5920: xmlFree((void *) atts);
1.72 daniel 5921: }
1.83 daniel 5922: return(name);
1.3 veillard 5923: }
5924:
1.50 daniel 5925: /**
5926: * xmlParseEndTag:
5927: * @ctxt: an XML parser context
5928: *
5929: * parse an end of tag
1.27 daniel 5930: *
5931: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5932: *
5933: * With namespace
5934: *
1.72 daniel 5935: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5936: */
5937:
1.55 daniel 5938: void
1.140 daniel 5939: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5940: xmlChar *name;
1.140 daniel 5941: xmlChar *oldname;
1.7 veillard 5942:
1.91 daniel 5943: GROW;
1.152 daniel 5944: if ((RAW != '<') || (NXT(1) != '/')) {
1.230 veillard 5945: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.55 daniel 5946: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5947: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5948: ctxt->wellFormed = 0;
1.180 daniel 5949: ctxt->disableSAX = 1;
1.27 daniel 5950: return;
5951: }
1.40 daniel 5952: SKIP(2);
1.7 veillard 5953:
1.72 daniel 5954: name = xmlParseName(ctxt);
1.7 veillard 5955:
5956: /*
5957: * We should definitely be at the ending "S? '>'" part
5958: */
1.91 daniel 5959: GROW;
1.42 daniel 5960: SKIP_BLANKS;
1.153 daniel 5961: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.230 veillard 5962: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 5963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5964: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 5965: ctxt->wellFormed = 0;
1.180 daniel 5966: ctxt->disableSAX = 1;
1.7 veillard 5967: } else
1.40 daniel 5968: NEXT;
1.7 veillard 5969:
1.72 daniel 5970: /*
1.98 daniel 5971: * [ WFC: Element Type Match ]
5972: * The Name in an element's end-tag must match the element type in the
5973: * start-tag.
5974: *
1.83 daniel 5975: */
1.147 daniel 5976: if ((name == NULL) || (ctxt->name == NULL) ||
1.236 veillard 5977: (!xmlStrEqual(name, ctxt->name))) {
1.230 veillard 5978: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.147 daniel 5979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
5980: if ((name != NULL) && (ctxt->name != NULL)) {
5981: ctxt->sax->error(ctxt->userData,
5982: "Opening and ending tag mismatch: %s and %s\n",
5983: ctxt->name, name);
5984: } else if (ctxt->name != NULL) {
5985: ctxt->sax->error(ctxt->userData,
5986: "Ending tag eror for: %s\n", ctxt->name);
5987: } else {
5988: ctxt->sax->error(ctxt->userData,
5989: "Ending tag error: internal error ???\n");
5990: }
1.122 daniel 5991:
1.147 daniel 5992: }
1.83 daniel 5993: ctxt->wellFormed = 0;
1.180 daniel 5994: ctxt->disableSAX = 1;
1.83 daniel 5995: }
5996:
5997: /*
1.72 daniel 5998: * SAX: End of Tag
5999: */
1.171 daniel 6000: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6001: (!ctxt->disableSAX))
1.74 daniel 6002: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6003:
6004: if (name != NULL)
1.119 daniel 6005: xmlFree(name);
1.140 daniel 6006: oldname = namePop(ctxt);
1.176 daniel 6007: spacePop(ctxt);
1.140 daniel 6008: if (oldname != NULL) {
6009: #ifdef DEBUG_STACK
6010: fprintf(stderr,"Close: popped %s\n", oldname);
6011: #endif
6012: xmlFree(oldname);
6013: }
1.7 veillard 6014: return;
6015: }
6016:
1.50 daniel 6017: /**
6018: * xmlParseCDSect:
6019: * @ctxt: an XML parser context
6020: *
6021: * Parse escaped pure raw content.
1.29 daniel 6022: *
6023: * [18] CDSect ::= CDStart CData CDEnd
6024: *
6025: * [19] CDStart ::= '<![CDATA['
6026: *
6027: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6028: *
6029: * [21] CDEnd ::= ']]>'
1.3 veillard 6030: */
1.55 daniel 6031: void
6032: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6033: xmlChar *buf = NULL;
6034: int len = 0;
1.140 daniel 6035: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6036: int r, rl;
6037: int s, sl;
6038: int cur, l;
1.234 veillard 6039: int count = 0;
1.3 veillard 6040:
1.106 daniel 6041: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6042: (NXT(2) == '[') && (NXT(3) == 'C') &&
6043: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6044: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6045: (NXT(8) == '[')) {
6046: SKIP(9);
1.29 daniel 6047: } else
1.45 daniel 6048: return;
1.109 daniel 6049:
6050: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6051: r = CUR_CHAR(rl);
6052: if (!IS_CHAR(r)) {
1.230 veillard 6053: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6054: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6055: ctxt->sax->error(ctxt->userData,
1.135 daniel 6056: "CData section not finished\n");
1.59 daniel 6057: ctxt->wellFormed = 0;
1.180 daniel 6058: ctxt->disableSAX = 1;
1.109 daniel 6059: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6060: return;
1.3 veillard 6061: }
1.152 daniel 6062: NEXTL(rl);
6063: s = CUR_CHAR(sl);
6064: if (!IS_CHAR(s)) {
1.230 veillard 6065: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6066: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6067: ctxt->sax->error(ctxt->userData,
1.135 daniel 6068: "CData section not finished\n");
1.59 daniel 6069: ctxt->wellFormed = 0;
1.180 daniel 6070: ctxt->disableSAX = 1;
1.109 daniel 6071: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6072: return;
1.3 veillard 6073: }
1.152 daniel 6074: NEXTL(sl);
6075: cur = CUR_CHAR(l);
1.135 daniel 6076: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6077: if (buf == NULL) {
6078: fprintf(stderr, "malloc of %d byte failed\n", size);
6079: return;
6080: }
1.108 veillard 6081: while (IS_CHAR(cur) &&
1.110 daniel 6082: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6083: if (len + 5 >= size) {
1.135 daniel 6084: size *= 2;
1.204 veillard 6085: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6086: if (buf == NULL) {
6087: fprintf(stderr, "realloc of %d byte failed\n", size);
6088: return;
6089: }
6090: }
1.152 daniel 6091: COPY_BUF(rl,buf,len,r);
1.110 daniel 6092: r = s;
1.152 daniel 6093: rl = sl;
1.110 daniel 6094: s = cur;
1.152 daniel 6095: sl = l;
1.234 veillard 6096: count++;
6097: if (count > 50) {
6098: GROW;
6099: count = 0;
6100: }
1.152 daniel 6101: NEXTL(l);
6102: cur = CUR_CHAR(l);
1.3 veillard 6103: }
1.135 daniel 6104: buf[len] = 0;
1.109 daniel 6105: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6106: if (cur != '>') {
1.230 veillard 6107: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6108: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6109: ctxt->sax->error(ctxt->userData,
1.135 daniel 6110: "CData section not finished\n%.50s\n", buf);
1.59 daniel 6111: ctxt->wellFormed = 0;
1.180 daniel 6112: ctxt->disableSAX = 1;
1.135 daniel 6113: xmlFree(buf);
1.45 daniel 6114: return;
1.3 veillard 6115: }
1.152 daniel 6116: NEXTL(l);
1.16 daniel 6117:
1.45 daniel 6118: /*
1.135 daniel 6119: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6120: */
1.171 daniel 6121: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 6122: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6123: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6124: }
1.135 daniel 6125: xmlFree(buf);
1.2 veillard 6126: }
6127:
1.50 daniel 6128: /**
6129: * xmlParseContent:
6130: * @ctxt: an XML parser context
6131: *
6132: * Parse a content:
1.2 veillard 6133: *
1.27 daniel 6134: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6135: */
6136:
1.55 daniel 6137: void
6138: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6139: GROW;
1.176 daniel 6140: while (((RAW != 0) || (ctxt->token != 0)) &&
6141: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 6142: const xmlChar *test = CUR_PTR;
1.91 daniel 6143: int cons = ctxt->input->consumed;
1.123 daniel 6144: xmlChar tok = ctxt->token;
1.27 daniel 6145:
6146: /*
1.152 daniel 6147: * Handle possible processed charrefs.
6148: */
6149: if (ctxt->token != 0) {
6150: xmlParseCharData(ctxt, 0);
6151: }
6152: /*
1.27 daniel 6153: * First case : a Processing Instruction.
6154: */
1.152 daniel 6155: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6156: xmlParsePI(ctxt);
6157: }
1.72 daniel 6158:
1.27 daniel 6159: /*
6160: * Second case : a CDSection
6161: */
1.152 daniel 6162: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6163: (NXT(2) == '[') && (NXT(3) == 'C') &&
6164: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6165: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6166: (NXT(8) == '[')) {
1.45 daniel 6167: xmlParseCDSect(ctxt);
1.27 daniel 6168: }
1.72 daniel 6169:
1.27 daniel 6170: /*
6171: * Third case : a comment
6172: */
1.152 daniel 6173: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6174: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6175: xmlParseComment(ctxt);
1.97 daniel 6176: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6177: }
1.72 daniel 6178:
1.27 daniel 6179: /*
6180: * Fourth case : a sub-element.
6181: */
1.152 daniel 6182: else if (RAW == '<') {
1.72 daniel 6183: xmlParseElement(ctxt);
1.45 daniel 6184: }
1.72 daniel 6185:
1.45 daniel 6186: /*
1.50 daniel 6187: * Fifth case : a reference. If if has not been resolved,
6188: * parsing returns it's Name, create the node
1.45 daniel 6189: */
1.97 daniel 6190:
1.152 daniel 6191: else if (RAW == '&') {
1.77 daniel 6192: xmlParseReference(ctxt);
1.27 daniel 6193: }
1.72 daniel 6194:
1.27 daniel 6195: /*
6196: * Last case, text. Note that References are handled directly.
6197: */
6198: else {
1.45 daniel 6199: xmlParseCharData(ctxt, 0);
1.3 veillard 6200: }
1.14 veillard 6201:
1.91 daniel 6202: GROW;
1.14 veillard 6203: /*
1.45 daniel 6204: * Pop-up of finished entities.
1.14 veillard 6205: */
1.152 daniel 6206: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6207: xmlPopInput(ctxt);
1.135 daniel 6208: SHRINK;
1.45 daniel 6209:
1.113 daniel 6210: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6211: (tok == ctxt->token)) {
1.230 veillard 6212: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6214: ctxt->sax->error(ctxt->userData,
1.59 daniel 6215: "detected an error in element content\n");
6216: ctxt->wellFormed = 0;
1.180 daniel 6217: ctxt->disableSAX = 1;
1.224 veillard 6218: ctxt->instate = XML_PARSER_EOF;
1.29 daniel 6219: break;
6220: }
1.3 veillard 6221: }
1.2 veillard 6222: }
6223:
1.50 daniel 6224: /**
6225: * xmlParseElement:
6226: * @ctxt: an XML parser context
6227: *
6228: * parse an XML element, this is highly recursive
1.26 daniel 6229: *
6230: * [39] element ::= EmptyElemTag | STag content ETag
6231: *
1.98 daniel 6232: * [ WFC: Element Type Match ]
6233: * The Name in an element's end-tag must match the element type in the
6234: * start-tag.
6235: *
6236: * [ VC: Element Valid ]
1.117 daniel 6237: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6238: * where the Name matches the element type and one of the following holds:
6239: * - The declaration matches EMPTY and the element has no content.
6240: * - The declaration matches children and the sequence of child elements
6241: * belongs to the language generated by the regular expression in the
6242: * content model, with optional white space (characters matching the
6243: * nonterminal S) between each pair of child elements.
6244: * - The declaration matches Mixed and the content consists of character
6245: * data and child elements whose types match names in the content model.
6246: * - The declaration matches ANY, and the types of any child elements have
6247: * been declared.
1.2 veillard 6248: */
1.26 daniel 6249:
1.72 daniel 6250: void
1.69 daniel 6251: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6252: const xmlChar *openTag = CUR_PTR;
6253: xmlChar *name;
1.140 daniel 6254: xmlChar *oldname;
1.32 daniel 6255: xmlParserNodeInfo node_info;
1.118 daniel 6256: xmlNodePtr ret;
1.2 veillard 6257:
1.32 daniel 6258: /* Capture start position */
1.118 daniel 6259: if (ctxt->record_info) {
6260: node_info.begin_pos = ctxt->input->consumed +
6261: (CUR_PTR - ctxt->input->base);
6262: node_info.begin_line = ctxt->input->line;
6263: }
1.32 daniel 6264:
1.176 daniel 6265: if (ctxt->spaceNr == 0)
6266: spacePush(ctxt, -1);
6267: else
6268: spacePush(ctxt, *ctxt->space);
6269:
1.83 daniel 6270: name = xmlParseStartTag(ctxt);
6271: if (name == NULL) {
1.176 daniel 6272: spacePop(ctxt);
1.83 daniel 6273: return;
6274: }
1.140 daniel 6275: namePush(ctxt, name);
1.118 daniel 6276: ret = ctxt->node;
1.2 veillard 6277:
6278: /*
1.99 daniel 6279: * [ VC: Root Element Type ]
6280: * The Name in the document type declaration must match the element
6281: * type of the root element.
6282: */
1.105 daniel 6283: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6284: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6285: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6286:
6287: /*
1.2 veillard 6288: * Check for an Empty Element.
6289: */
1.152 daniel 6290: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6291: SKIP(2);
1.171 daniel 6292: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6293: (!ctxt->disableSAX))
1.83 daniel 6294: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6295: oldname = namePop(ctxt);
1.176 daniel 6296: spacePop(ctxt);
1.140 daniel 6297: if (oldname != NULL) {
6298: #ifdef DEBUG_STACK
6299: fprintf(stderr,"Close: popped %s\n", oldname);
6300: #endif
6301: xmlFree(oldname);
1.211 veillard 6302: }
6303: if ( ret != NULL && ctxt->record_info ) {
6304: node_info.end_pos = ctxt->input->consumed +
6305: (CUR_PTR - ctxt->input->base);
6306: node_info.end_line = ctxt->input->line;
6307: node_info.node = ret;
6308: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 6309: }
1.72 daniel 6310: return;
1.2 veillard 6311: }
1.152 daniel 6312: if (RAW == '>') {
1.91 daniel 6313: NEXT;
6314: } else {
1.230 veillard 6315: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6317: ctxt->sax->error(ctxt->userData,
6318: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6319: openTag);
1.59 daniel 6320: ctxt->wellFormed = 0;
1.180 daniel 6321: ctxt->disableSAX = 1;
1.45 daniel 6322:
6323: /*
6324: * end of parsing of this node.
6325: */
6326: nodePop(ctxt);
1.140 daniel 6327: oldname = namePop(ctxt);
1.176 daniel 6328: spacePop(ctxt);
1.140 daniel 6329: if (oldname != NULL) {
6330: #ifdef DEBUG_STACK
6331: fprintf(stderr,"Close: popped %s\n", oldname);
6332: #endif
6333: xmlFree(oldname);
6334: }
1.118 daniel 6335:
6336: /*
6337: * Capture end position and add node
6338: */
6339: if ( ret != NULL && ctxt->record_info ) {
6340: node_info.end_pos = ctxt->input->consumed +
6341: (CUR_PTR - ctxt->input->base);
6342: node_info.end_line = ctxt->input->line;
6343: node_info.node = ret;
6344: xmlParserAddNodeInfo(ctxt, &node_info);
6345: }
1.72 daniel 6346: return;
1.2 veillard 6347: }
6348:
6349: /*
6350: * Parse the content of the element:
6351: */
1.45 daniel 6352: xmlParseContent(ctxt);
1.153 daniel 6353: if (!IS_CHAR(RAW)) {
1.230 veillard 6354: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.55 daniel 6355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6356: ctxt->sax->error(ctxt->userData,
1.57 daniel 6357: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6358: ctxt->wellFormed = 0;
1.180 daniel 6359: ctxt->disableSAX = 1;
1.45 daniel 6360:
6361: /*
6362: * end of parsing of this node.
6363: */
6364: nodePop(ctxt);
1.140 daniel 6365: oldname = namePop(ctxt);
1.176 daniel 6366: spacePop(ctxt);
1.140 daniel 6367: if (oldname != NULL) {
6368: #ifdef DEBUG_STACK
6369: fprintf(stderr,"Close: popped %s\n", oldname);
6370: #endif
6371: xmlFree(oldname);
6372: }
1.72 daniel 6373: return;
1.2 veillard 6374: }
6375:
6376: /*
1.27 daniel 6377: * parse the end of tag: '</' should be here.
1.2 veillard 6378: */
1.140 daniel 6379: xmlParseEndTag(ctxt);
1.118 daniel 6380:
6381: /*
6382: * Capture end position and add node
6383: */
6384: if ( ret != NULL && ctxt->record_info ) {
6385: node_info.end_pos = ctxt->input->consumed +
6386: (CUR_PTR - ctxt->input->base);
6387: node_info.end_line = ctxt->input->line;
6388: node_info.node = ret;
6389: xmlParserAddNodeInfo(ctxt, &node_info);
6390: }
1.2 veillard 6391: }
6392:
1.50 daniel 6393: /**
6394: * xmlParseVersionNum:
6395: * @ctxt: an XML parser context
6396: *
6397: * parse the XML version value.
1.29 daniel 6398: *
6399: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6400: *
6401: * Returns the string giving the XML version number, or NULL
1.29 daniel 6402: */
1.123 daniel 6403: xmlChar *
1.55 daniel 6404: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6405: xmlChar *buf = NULL;
6406: int len = 0;
6407: int size = 10;
6408: xmlChar cur;
1.29 daniel 6409:
1.135 daniel 6410: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6411: if (buf == NULL) {
6412: fprintf(stderr, "malloc of %d byte failed\n", size);
6413: return(NULL);
6414: }
6415: cur = CUR;
1.152 daniel 6416: while (((cur >= 'a') && (cur <= 'z')) ||
6417: ((cur >= 'A') && (cur <= 'Z')) ||
6418: ((cur >= '0') && (cur <= '9')) ||
6419: (cur == '_') || (cur == '.') ||
6420: (cur == ':') || (cur == '-')) {
1.135 daniel 6421: if (len + 1 >= size) {
6422: size *= 2;
1.204 veillard 6423: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6424: if (buf == NULL) {
6425: fprintf(stderr, "realloc of %d byte failed\n", size);
6426: return(NULL);
6427: }
6428: }
6429: buf[len++] = cur;
6430: NEXT;
6431: cur=CUR;
6432: }
6433: buf[len] = 0;
6434: return(buf);
1.29 daniel 6435: }
6436:
1.50 daniel 6437: /**
6438: * xmlParseVersionInfo:
6439: * @ctxt: an XML parser context
6440: *
6441: * parse the XML version.
1.29 daniel 6442: *
6443: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6444: *
6445: * [25] Eq ::= S? '=' S?
1.50 daniel 6446: *
1.68 daniel 6447: * Returns the version string, e.g. "1.0"
1.29 daniel 6448: */
6449:
1.123 daniel 6450: xmlChar *
1.55 daniel 6451: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6452: xmlChar *version = NULL;
6453: const xmlChar *q;
1.29 daniel 6454:
1.152 daniel 6455: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6456: (NXT(2) == 'r') && (NXT(3) == 's') &&
6457: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6458: (NXT(6) == 'n')) {
6459: SKIP(7);
1.42 daniel 6460: SKIP_BLANKS;
1.152 daniel 6461: if (RAW != '=') {
1.230 veillard 6462: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6464: ctxt->sax->error(ctxt->userData,
6465: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6466: ctxt->wellFormed = 0;
1.180 daniel 6467: ctxt->disableSAX = 1;
1.31 daniel 6468: return(NULL);
6469: }
1.40 daniel 6470: NEXT;
1.42 daniel 6471: SKIP_BLANKS;
1.152 daniel 6472: if (RAW == '"') {
1.40 daniel 6473: NEXT;
6474: q = CUR_PTR;
1.29 daniel 6475: version = xmlParseVersionNum(ctxt);
1.152 daniel 6476: if (RAW != '"') {
1.230 veillard 6477: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6479: ctxt->sax->error(ctxt->userData,
6480: "String not closed\n%.50s\n", q);
1.59 daniel 6481: ctxt->wellFormed = 0;
1.180 daniel 6482: ctxt->disableSAX = 1;
1.55 daniel 6483: } else
1.40 daniel 6484: NEXT;
1.152 daniel 6485: } else if (RAW == '\''){
1.40 daniel 6486: NEXT;
6487: q = CUR_PTR;
1.29 daniel 6488: version = xmlParseVersionNum(ctxt);
1.152 daniel 6489: if (RAW != '\'') {
1.230 veillard 6490: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6492: ctxt->sax->error(ctxt->userData,
6493: "String not closed\n%.50s\n", q);
1.59 daniel 6494: ctxt->wellFormed = 0;
1.180 daniel 6495: ctxt->disableSAX = 1;
1.55 daniel 6496: } else
1.40 daniel 6497: NEXT;
1.31 daniel 6498: } else {
1.230 veillard 6499: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6500: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6501: ctxt->sax->error(ctxt->userData,
1.59 daniel 6502: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6503: ctxt->wellFormed = 0;
1.180 daniel 6504: ctxt->disableSAX = 1;
1.29 daniel 6505: }
6506: }
6507: return(version);
6508: }
6509:
1.50 daniel 6510: /**
6511: * xmlParseEncName:
6512: * @ctxt: an XML parser context
6513: *
6514: * parse the XML encoding name
1.29 daniel 6515: *
6516: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 6517: *
1.68 daniel 6518: * Returns the encoding name value or NULL
1.29 daniel 6519: */
1.123 daniel 6520: xmlChar *
1.55 daniel 6521: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 6522: xmlChar *buf = NULL;
6523: int len = 0;
6524: int size = 10;
6525: xmlChar cur;
1.29 daniel 6526:
1.135 daniel 6527: cur = CUR;
6528: if (((cur >= 'a') && (cur <= 'z')) ||
6529: ((cur >= 'A') && (cur <= 'Z'))) {
6530: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6531: if (buf == NULL) {
6532: fprintf(stderr, "malloc of %d byte failed\n", size);
6533: return(NULL);
6534: }
6535:
6536: buf[len++] = cur;
1.40 daniel 6537: NEXT;
1.135 daniel 6538: cur = CUR;
1.152 daniel 6539: while (((cur >= 'a') && (cur <= 'z')) ||
6540: ((cur >= 'A') && (cur <= 'Z')) ||
6541: ((cur >= '0') && (cur <= '9')) ||
6542: (cur == '.') || (cur == '_') ||
6543: (cur == '-')) {
1.135 daniel 6544: if (len + 1 >= size) {
6545: size *= 2;
1.204 veillard 6546: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6547: if (buf == NULL) {
6548: fprintf(stderr, "realloc of %d byte failed\n", size);
6549: return(NULL);
6550: }
6551: }
6552: buf[len++] = cur;
6553: NEXT;
6554: cur = CUR;
6555: if (cur == 0) {
6556: SHRINK;
6557: GROW;
6558: cur = CUR;
6559: }
6560: }
6561: buf[len] = 0;
1.29 daniel 6562: } else {
1.230 veillard 6563: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.55 daniel 6564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6565: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 6566: ctxt->wellFormed = 0;
1.180 daniel 6567: ctxt->disableSAX = 1;
1.29 daniel 6568: }
1.135 daniel 6569: return(buf);
1.29 daniel 6570: }
6571:
1.50 daniel 6572: /**
6573: * xmlParseEncodingDecl:
6574: * @ctxt: an XML parser context
6575: *
6576: * parse the XML encoding declaration
1.29 daniel 6577: *
6578: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 6579: *
1.229 veillard 6580: * this setups the conversion filters.
1.50 daniel 6581: *
1.68 daniel 6582: * Returns the encoding value or NULL
1.29 daniel 6583: */
6584:
1.123 daniel 6585: xmlChar *
1.55 daniel 6586: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6587: xmlChar *encoding = NULL;
6588: const xmlChar *q;
1.29 daniel 6589:
1.42 daniel 6590: SKIP_BLANKS;
1.152 daniel 6591: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 6592: (NXT(2) == 'c') && (NXT(3) == 'o') &&
6593: (NXT(4) == 'd') && (NXT(5) == 'i') &&
6594: (NXT(6) == 'n') && (NXT(7) == 'g')) {
6595: SKIP(8);
1.42 daniel 6596: SKIP_BLANKS;
1.152 daniel 6597: if (RAW != '=') {
1.230 veillard 6598: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6599: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6600: ctxt->sax->error(ctxt->userData,
6601: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 6602: ctxt->wellFormed = 0;
1.180 daniel 6603: ctxt->disableSAX = 1;
1.31 daniel 6604: return(NULL);
6605: }
1.40 daniel 6606: NEXT;
1.42 daniel 6607: SKIP_BLANKS;
1.152 daniel 6608: if (RAW == '"') {
1.40 daniel 6609: NEXT;
6610: q = CUR_PTR;
1.29 daniel 6611: encoding = xmlParseEncName(ctxt);
1.152 daniel 6612: if (RAW != '"') {
1.230 veillard 6613: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6614: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6615: ctxt->sax->error(ctxt->userData,
6616: "String not closed\n%.50s\n", q);
1.59 daniel 6617: ctxt->wellFormed = 0;
1.180 daniel 6618: ctxt->disableSAX = 1;
1.55 daniel 6619: } else
1.40 daniel 6620: NEXT;
1.152 daniel 6621: } else if (RAW == '\''){
1.40 daniel 6622: NEXT;
6623: q = CUR_PTR;
1.29 daniel 6624: encoding = xmlParseEncName(ctxt);
1.152 daniel 6625: if (RAW != '\'') {
1.230 veillard 6626: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6628: ctxt->sax->error(ctxt->userData,
6629: "String not closed\n%.50s\n", q);
1.59 daniel 6630: ctxt->wellFormed = 0;
1.180 daniel 6631: ctxt->disableSAX = 1;
1.55 daniel 6632: } else
1.40 daniel 6633: NEXT;
1.152 daniel 6634: } else if (RAW == '"'){
1.230 veillard 6635: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6637: ctxt->sax->error(ctxt->userData,
1.59 daniel 6638: "xmlParseEncodingDecl : expected ' or \"\n");
6639: ctxt->wellFormed = 0;
1.180 daniel 6640: ctxt->disableSAX = 1;
1.29 daniel 6641: }
1.193 daniel 6642: if (encoding != NULL) {
6643: xmlCharEncoding enc;
6644: xmlCharEncodingHandlerPtr handler;
6645:
1.195 daniel 6646: if (ctxt->input->encoding != NULL)
6647: xmlFree((xmlChar *) ctxt->input->encoding);
6648: ctxt->input->encoding = encoding;
6649:
1.193 daniel 6650: enc = xmlParseCharEncoding((const char *) encoding);
6651: /*
6652: * registered set of known encodings
6653: */
6654: if (enc != XML_CHAR_ENCODING_ERROR) {
6655: xmlSwitchEncoding(ctxt, enc);
6656: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6657: xmlFree(encoding);
6658: return(NULL);
6659: }
6660: } else {
6661: /*
6662: * fallback for unknown encodings
6663: */
6664: handler = xmlFindCharEncodingHandler((const char *) encoding);
6665: if (handler != NULL) {
6666: xmlSwitchToEncoding(ctxt, handler);
6667: } else {
6668: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 6669: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6670: ctxt->sax->error(ctxt->userData,
6671: "Unsupported encoding %s\n", encoding);
1.193 daniel 6672: return(NULL);
6673: }
6674: }
6675: }
1.29 daniel 6676: }
6677: return(encoding);
6678: }
6679:
1.50 daniel 6680: /**
6681: * xmlParseSDDecl:
6682: * @ctxt: an XML parser context
6683: *
6684: * parse the XML standalone declaration
1.29 daniel 6685: *
6686: * [32] SDDecl ::= S 'standalone' Eq
6687: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 6688: *
6689: * [ VC: Standalone Document Declaration ]
6690: * TODO The standalone document declaration must have the value "no"
6691: * if any external markup declarations contain declarations of:
6692: * - attributes with default values, if elements to which these
6693: * attributes apply appear in the document without specifications
6694: * of values for these attributes, or
6695: * - entities (other than amp, lt, gt, apos, quot), if references
6696: * to those entities appear in the document, or
6697: * - attributes with values subject to normalization, where the
6698: * attribute appears in the document with a value which will change
6699: * as a result of normalization, or
6700: * - element types with element content, if white space occurs directly
6701: * within any instance of those types.
1.68 daniel 6702: *
6703: * Returns 1 if standalone, 0 otherwise
1.29 daniel 6704: */
6705:
1.55 daniel 6706: int
6707: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 6708: int standalone = -1;
6709:
1.42 daniel 6710: SKIP_BLANKS;
1.152 daniel 6711: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 6712: (NXT(2) == 'a') && (NXT(3) == 'n') &&
6713: (NXT(4) == 'd') && (NXT(5) == 'a') &&
6714: (NXT(6) == 'l') && (NXT(7) == 'o') &&
6715: (NXT(8) == 'n') && (NXT(9) == 'e')) {
6716: SKIP(10);
1.81 daniel 6717: SKIP_BLANKS;
1.152 daniel 6718: if (RAW != '=') {
1.230 veillard 6719: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6720: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6721: ctxt->sax->error(ctxt->userData,
1.59 daniel 6722: "XML standalone declaration : expected '='\n");
6723: ctxt->wellFormed = 0;
1.180 daniel 6724: ctxt->disableSAX = 1;
1.32 daniel 6725: return(standalone);
6726: }
1.40 daniel 6727: NEXT;
1.42 daniel 6728: SKIP_BLANKS;
1.152 daniel 6729: if (RAW == '\''){
1.40 daniel 6730: NEXT;
1.152 daniel 6731: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6732: standalone = 0;
1.40 daniel 6733: SKIP(2);
1.152 daniel 6734: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6735: (NXT(2) == 's')) {
1.29 daniel 6736: standalone = 1;
1.40 daniel 6737: SKIP(3);
1.29 daniel 6738: } else {
1.230 veillard 6739: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6740: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6741: ctxt->sax->error(ctxt->userData,
6742: "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 6743: ctxt->wellFormed = 0;
1.180 daniel 6744: ctxt->disableSAX = 1;
1.29 daniel 6745: }
1.152 daniel 6746: if (RAW != '\'') {
1.230 veillard 6747: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6749: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6750: ctxt->wellFormed = 0;
1.180 daniel 6751: ctxt->disableSAX = 1;
1.55 daniel 6752: } else
1.40 daniel 6753: NEXT;
1.152 daniel 6754: } else if (RAW == '"'){
1.40 daniel 6755: NEXT;
1.152 daniel 6756: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6757: standalone = 0;
1.40 daniel 6758: SKIP(2);
1.152 daniel 6759: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6760: (NXT(2) == 's')) {
1.29 daniel 6761: standalone = 1;
1.40 daniel 6762: SKIP(3);
1.29 daniel 6763: } else {
1.230 veillard 6764: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6766: ctxt->sax->error(ctxt->userData,
1.59 daniel 6767: "standalone accepts only 'yes' or 'no'\n");
6768: ctxt->wellFormed = 0;
1.180 daniel 6769: ctxt->disableSAX = 1;
1.29 daniel 6770: }
1.152 daniel 6771: if (RAW != '"') {
1.230 veillard 6772: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6774: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6775: ctxt->wellFormed = 0;
1.180 daniel 6776: ctxt->disableSAX = 1;
1.55 daniel 6777: } else
1.40 daniel 6778: NEXT;
1.37 daniel 6779: } else {
1.230 veillard 6780: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6782: ctxt->sax->error(ctxt->userData,
6783: "Standalone value not found\n");
1.59 daniel 6784: ctxt->wellFormed = 0;
1.180 daniel 6785: ctxt->disableSAX = 1;
1.37 daniel 6786: }
1.29 daniel 6787: }
6788: return(standalone);
6789: }
6790:
1.50 daniel 6791: /**
6792: * xmlParseXMLDecl:
6793: * @ctxt: an XML parser context
6794: *
6795: * parse an XML declaration header
1.29 daniel 6796: *
6797: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 6798: */
6799:
1.55 daniel 6800: void
6801: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6802: xmlChar *version;
1.1 veillard 6803:
6804: /*
1.19 daniel 6805: * We know that '<?xml' is here.
1.1 veillard 6806: */
1.40 daniel 6807: SKIP(5);
1.1 veillard 6808:
1.153 daniel 6809: if (!IS_BLANK(RAW)) {
1.230 veillard 6810: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6812: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 6813: ctxt->wellFormed = 0;
1.180 daniel 6814: ctxt->disableSAX = 1;
1.59 daniel 6815: }
1.42 daniel 6816: SKIP_BLANKS;
1.1 veillard 6817:
6818: /*
1.29 daniel 6819: * We should have the VersionInfo here.
1.1 veillard 6820: */
1.29 daniel 6821: version = xmlParseVersionInfo(ctxt);
6822: if (version == NULL)
1.45 daniel 6823: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 6824: ctxt->version = xmlStrdup(version);
1.119 daniel 6825: xmlFree(version);
1.29 daniel 6826:
6827: /*
6828: * We may have the encoding declaration
6829: */
1.153 daniel 6830: if (!IS_BLANK(RAW)) {
1.152 daniel 6831: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6832: SKIP(2);
6833: return;
6834: }
1.230 veillard 6835: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6837: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6838: ctxt->wellFormed = 0;
1.180 daniel 6839: ctxt->disableSAX = 1;
1.59 daniel 6840: }
1.195 daniel 6841: xmlParseEncodingDecl(ctxt);
1.193 daniel 6842: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6843: /*
6844: * The XML REC instructs us to stop parsing right here
6845: */
6846: return;
6847: }
1.1 veillard 6848:
6849: /*
1.29 daniel 6850: * We may have the standalone status.
1.1 veillard 6851: */
1.164 daniel 6852: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 6853: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6854: SKIP(2);
6855: return;
6856: }
1.230 veillard 6857: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6859: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6860: ctxt->wellFormed = 0;
1.180 daniel 6861: ctxt->disableSAX = 1;
1.59 daniel 6862: }
6863: SKIP_BLANKS;
1.167 daniel 6864: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 6865:
1.42 daniel 6866: SKIP_BLANKS;
1.152 daniel 6867: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 6868: SKIP(2);
1.152 daniel 6869: } else if (RAW == '>') {
1.31 daniel 6870: /* Deprecated old WD ... */
1.230 veillard 6871: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6873: ctxt->sax->error(ctxt->userData,
6874: "XML declaration must end-up with '?>'\n");
1.59 daniel 6875: ctxt->wellFormed = 0;
1.180 daniel 6876: ctxt->disableSAX = 1;
1.40 daniel 6877: NEXT;
1.29 daniel 6878: } else {
1.230 veillard 6879: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6881: ctxt->sax->error(ctxt->userData,
6882: "parsing XML declaration: '?>' expected\n");
1.59 daniel 6883: ctxt->wellFormed = 0;
1.180 daniel 6884: ctxt->disableSAX = 1;
1.40 daniel 6885: MOVETO_ENDTAG(CUR_PTR);
6886: NEXT;
1.29 daniel 6887: }
1.1 veillard 6888: }
6889:
1.50 daniel 6890: /**
6891: * xmlParseMisc:
6892: * @ctxt: an XML parser context
6893: *
6894: * parse an XML Misc* optionnal field.
1.21 daniel 6895: *
1.22 daniel 6896: * [27] Misc ::= Comment | PI | S
1.1 veillard 6897: */
6898:
1.55 daniel 6899: void
6900: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 6901: while (((RAW == '<') && (NXT(1) == '?')) ||
6902: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6903: (NXT(2) == '-') && (NXT(3) == '-')) ||
6904: IS_BLANK(CUR)) {
1.152 daniel 6905: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 6906: xmlParsePI(ctxt);
1.40 daniel 6907: } else if (IS_BLANK(CUR)) {
6908: NEXT;
1.1 veillard 6909: } else
1.114 daniel 6910: xmlParseComment(ctxt);
1.1 veillard 6911: }
6912: }
6913:
1.50 daniel 6914: /**
1.181 daniel 6915: * xmlParseDocument:
1.50 daniel 6916: * @ctxt: an XML parser context
6917: *
6918: * parse an XML document (and build a tree if using the standard SAX
6919: * interface).
1.21 daniel 6920: *
1.22 daniel 6921: * [1] document ::= prolog element Misc*
1.29 daniel 6922: *
6923: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 6924: *
1.68 daniel 6925: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 6926: * as a result of the parsing.
1.1 veillard 6927: */
6928:
1.55 daniel 6929: int
6930: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 6931: xmlChar start[4];
6932: xmlCharEncoding enc;
6933:
1.235 veillard 6934: xmlInitParser();
1.45 daniel 6935:
1.91 daniel 6936: GROW;
6937:
1.14 veillard 6938: /*
1.44 daniel 6939: * SAX: beginning of the document processing.
6940: */
1.72 daniel 6941: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 6942: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 6943:
1.156 daniel 6944: /*
6945: * Get the 4 first bytes and decode the charset
6946: * if enc != XML_CHAR_ENCODING_NONE
6947: * plug some encoding conversion routines.
6948: */
6949: start[0] = RAW;
6950: start[1] = NXT(1);
6951: start[2] = NXT(2);
6952: start[3] = NXT(3);
6953: enc = xmlDetectCharEncoding(start, 4);
6954: if (enc != XML_CHAR_ENCODING_NONE) {
6955: xmlSwitchEncoding(ctxt, enc);
6956: }
6957:
1.1 veillard 6958:
1.59 daniel 6959: if (CUR == 0) {
1.230 veillard 6960: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 6961: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6962: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 6963: ctxt->wellFormed = 0;
1.180 daniel 6964: ctxt->disableSAX = 1;
1.59 daniel 6965: }
1.1 veillard 6966:
6967: /*
6968: * Check for the XMLDecl in the Prolog.
6969: */
1.91 daniel 6970: GROW;
1.152 daniel 6971: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 6972: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 6973: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 6974:
6975: /*
6976: * Note that we will switch encoding on the fly.
6977: */
1.19 daniel 6978: xmlParseXMLDecl(ctxt);
1.193 daniel 6979: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6980: /*
6981: * The XML REC instructs us to stop parsing right here
6982: */
6983: return(-1);
6984: }
1.167 daniel 6985: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 6986: SKIP_BLANKS;
1.1 veillard 6987: } else {
1.72 daniel 6988: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 6989: }
1.171 daniel 6990: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 6991: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 6992:
6993: /*
6994: * The Misc part of the Prolog
6995: */
1.91 daniel 6996: GROW;
1.16 daniel 6997: xmlParseMisc(ctxt);
1.1 veillard 6998:
6999: /*
1.29 daniel 7000: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7001: * (doctypedecl Misc*)?
7002: */
1.91 daniel 7003: GROW;
1.152 daniel 7004: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7005: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7006: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7007: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7008: (NXT(8) == 'E')) {
1.165 daniel 7009:
1.166 daniel 7010: ctxt->inSubset = 1;
1.22 daniel 7011: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7012: if (RAW == '[') {
1.140 daniel 7013: ctxt->instate = XML_PARSER_DTD;
7014: xmlParseInternalSubset(ctxt);
7015: }
1.165 daniel 7016:
7017: /*
7018: * Create and update the external subset.
7019: */
1.166 daniel 7020: ctxt->inSubset = 2;
1.171 daniel 7021: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7022: (!ctxt->disableSAX))
1.165 daniel 7023: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7024: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7025: ctxt->inSubset = 0;
1.165 daniel 7026:
7027:
1.96 daniel 7028: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7029: xmlParseMisc(ctxt);
1.21 daniel 7030: }
7031:
7032: /*
7033: * Time to start parsing the tree itself
1.1 veillard 7034: */
1.91 daniel 7035: GROW;
1.152 daniel 7036: if (RAW != '<') {
1.230 veillard 7037: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7039: ctxt->sax->error(ctxt->userData,
1.151 daniel 7040: "Start tag expected, '<' not found\n");
1.59 daniel 7041: ctxt->wellFormed = 0;
1.180 daniel 7042: ctxt->disableSAX = 1;
1.140 daniel 7043: ctxt->instate = XML_PARSER_EOF;
7044: } else {
7045: ctxt->instate = XML_PARSER_CONTENT;
7046: xmlParseElement(ctxt);
7047: ctxt->instate = XML_PARSER_EPILOG;
7048:
7049:
7050: /*
7051: * The Misc part at the end
7052: */
7053: xmlParseMisc(ctxt);
7054:
1.152 daniel 7055: if (RAW != 0) {
1.230 veillard 7056: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7057: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7058: ctxt->sax->error(ctxt->userData,
7059: "Extra content at the end of the document\n");
7060: ctxt->wellFormed = 0;
1.180 daniel 7061: ctxt->disableSAX = 1;
1.140 daniel 7062: }
7063: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7064: }
7065:
1.44 daniel 7066: /*
7067: * SAX: end of the document processing.
7068: */
1.171 daniel 7069: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7070: (!ctxt->disableSAX))
1.74 daniel 7071: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7072:
1.59 daniel 7073: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7074: return(0);
7075: }
7076:
1.229 veillard 7077: /**
7078: * xmlParseExtParsedEnt:
7079: * @ctxt: an XML parser context
7080: *
7081: * parse a genreral parsed entity
7082: * An external general parsed entity is well-formed if it matches the
7083: * production labeled extParsedEnt.
7084: *
7085: * [78] extParsedEnt ::= TextDecl? content
7086: *
7087: * Returns 0, -1 in case of error. the parser context is augmented
7088: * as a result of the parsing.
7089: */
7090:
7091: int
7092: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7093: xmlChar start[4];
7094: xmlCharEncoding enc;
7095:
7096: xmlDefaultSAXHandlerInit();
7097:
7098: GROW;
7099:
7100: /*
7101: * SAX: beginning of the document processing.
7102: */
7103: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7104: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7105:
7106: /*
7107: * Get the 4 first bytes and decode the charset
7108: * if enc != XML_CHAR_ENCODING_NONE
7109: * plug some encoding conversion routines.
7110: */
7111: start[0] = RAW;
7112: start[1] = NXT(1);
7113: start[2] = NXT(2);
7114: start[3] = NXT(3);
7115: enc = xmlDetectCharEncoding(start, 4);
7116: if (enc != XML_CHAR_ENCODING_NONE) {
7117: xmlSwitchEncoding(ctxt, enc);
7118: }
7119:
7120:
7121: if (CUR == 0) {
1.230 veillard 7122: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.229 veillard 7123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7124: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7125: ctxt->wellFormed = 0;
7126: ctxt->disableSAX = 1;
7127: }
7128:
7129: /*
7130: * Check for the XMLDecl in the Prolog.
7131: */
7132: GROW;
7133: if ((RAW == '<') && (NXT(1) == '?') &&
7134: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7135: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7136:
7137: /*
7138: * Note that we will switch encoding on the fly.
7139: */
7140: xmlParseXMLDecl(ctxt);
7141: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7142: /*
7143: * The XML REC instructs us to stop parsing right here
7144: */
7145: return(-1);
7146: }
7147: SKIP_BLANKS;
7148: } else {
7149: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7150: }
7151: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7152: ctxt->sax->startDocument(ctxt->userData);
7153:
7154: /*
7155: * Doing validity checking on chunk doesn't make sense
7156: */
7157: ctxt->instate = XML_PARSER_CONTENT;
7158: ctxt->validate = 0;
7159: ctxt->depth = 0;
7160:
7161: xmlParseContent(ctxt);
7162:
7163: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 7164: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.229 veillard 7165: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7166: ctxt->sax->error(ctxt->userData,
7167: "chunk is not well balanced\n");
7168: ctxt->wellFormed = 0;
7169: ctxt->disableSAX = 1;
7170: } else if (RAW != 0) {
1.230 veillard 7171: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.229 veillard 7172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7173: ctxt->sax->error(ctxt->userData,
7174: "extra content at the end of well balanced chunk\n");
7175: ctxt->wellFormed = 0;
7176: ctxt->disableSAX = 1;
7177: }
7178:
7179: /*
7180: * SAX: end of the document processing.
7181: */
7182: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7183: (!ctxt->disableSAX))
7184: ctxt->sax->endDocument(ctxt->userData);
7185:
7186: if (! ctxt->wellFormed) return(-1);
7187: return(0);
7188: }
7189:
1.98 daniel 7190: /************************************************************************
7191: * *
1.128 daniel 7192: * Progressive parsing interfaces *
7193: * *
7194: ************************************************************************/
7195:
7196: /**
7197: * xmlParseLookupSequence:
7198: * @ctxt: an XML parser context
7199: * @first: the first char to lookup
1.140 daniel 7200: * @next: the next char to lookup or zero
7201: * @third: the next char to lookup or zero
1.128 daniel 7202: *
1.140 daniel 7203: * Try to find if a sequence (first, next, third) or just (first next) or
7204: * (first) is available in the input stream.
7205: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7206: * to avoid rescanning sequences of bytes, it DOES change the state of the
7207: * parser, do not use liberally.
1.128 daniel 7208: *
1.140 daniel 7209: * Returns the index to the current parsing point if the full sequence
7210: * is available, -1 otherwise.
1.128 daniel 7211: */
7212: int
1.140 daniel 7213: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7214: xmlChar next, xmlChar third) {
7215: int base, len;
7216: xmlParserInputPtr in;
7217: const xmlChar *buf;
7218:
7219: in = ctxt->input;
7220: if (in == NULL) return(-1);
7221: base = in->cur - in->base;
7222: if (base < 0) return(-1);
7223: if (ctxt->checkIndex > base)
7224: base = ctxt->checkIndex;
7225: if (in->buf == NULL) {
7226: buf = in->base;
7227: len = in->length;
7228: } else {
7229: buf = in->buf->buffer->content;
7230: len = in->buf->buffer->use;
7231: }
7232: /* take into account the sequence length */
7233: if (third) len -= 2;
7234: else if (next) len --;
7235: for (;base < len;base++) {
7236: if (buf[base] == first) {
7237: if (third != 0) {
7238: if ((buf[base + 1] != next) ||
7239: (buf[base + 2] != third)) continue;
7240: } else if (next != 0) {
7241: if (buf[base + 1] != next) continue;
7242: }
7243: ctxt->checkIndex = 0;
7244: #ifdef DEBUG_PUSH
7245: if (next == 0)
7246: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7247: first, base);
7248: else if (third == 0)
7249: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7250: first, next, base);
7251: else
7252: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7253: first, next, third, base);
7254: #endif
7255: return(base - (in->cur - in->base));
7256: }
7257: }
7258: ctxt->checkIndex = base;
7259: #ifdef DEBUG_PUSH
7260: if (next == 0)
7261: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7262: else if (third == 0)
7263: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7264: else
7265: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7266: #endif
7267: return(-1);
1.128 daniel 7268: }
7269:
7270: /**
1.143 daniel 7271: * xmlParseTryOrFinish:
1.128 daniel 7272: * @ctxt: an XML parser context
1.143 daniel 7273: * @terminate: last chunk indicator
1.128 daniel 7274: *
7275: * Try to progress on parsing
7276: *
7277: * Returns zero if no parsing was possible
7278: */
7279: int
1.143 daniel 7280: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7281: int ret = 0;
1.140 daniel 7282: int avail;
7283: xmlChar cur, next;
7284:
7285: #ifdef DEBUG_PUSH
7286: switch (ctxt->instate) {
7287: case XML_PARSER_EOF:
7288: fprintf(stderr, "PP: try EOF\n"); break;
7289: case XML_PARSER_START:
7290: fprintf(stderr, "PP: try START\n"); break;
7291: case XML_PARSER_MISC:
7292: fprintf(stderr, "PP: try MISC\n");break;
7293: case XML_PARSER_COMMENT:
7294: fprintf(stderr, "PP: try COMMENT\n");break;
7295: case XML_PARSER_PROLOG:
7296: fprintf(stderr, "PP: try PROLOG\n");break;
7297: case XML_PARSER_START_TAG:
7298: fprintf(stderr, "PP: try START_TAG\n");break;
7299: case XML_PARSER_CONTENT:
7300: fprintf(stderr, "PP: try CONTENT\n");break;
7301: case XML_PARSER_CDATA_SECTION:
7302: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7303: case XML_PARSER_END_TAG:
7304: fprintf(stderr, "PP: try END_TAG\n");break;
7305: case XML_PARSER_ENTITY_DECL:
7306: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7307: case XML_PARSER_ENTITY_VALUE:
7308: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7309: case XML_PARSER_ATTRIBUTE_VALUE:
7310: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7311: case XML_PARSER_DTD:
7312: fprintf(stderr, "PP: try DTD\n");break;
7313: case XML_PARSER_EPILOG:
7314: fprintf(stderr, "PP: try EPILOG\n");break;
7315: case XML_PARSER_PI:
7316: fprintf(stderr, "PP: try PI\n");break;
7317: }
7318: #endif
1.128 daniel 7319:
7320: while (1) {
1.140 daniel 7321: /*
7322: * Pop-up of finished entities.
7323: */
1.152 daniel 7324: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7325: xmlPopInput(ctxt);
7326:
1.184 daniel 7327: if (ctxt->input ==NULL) break;
7328: if (ctxt->input->buf == NULL)
7329: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7330: else
1.184 daniel 7331: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7332: if (avail < 1)
7333: goto done;
1.128 daniel 7334: switch (ctxt->instate) {
7335: case XML_PARSER_EOF:
1.140 daniel 7336: /*
7337: * Document parsing is done !
7338: */
7339: goto done;
7340: case XML_PARSER_START:
7341: /*
7342: * Very first chars read from the document flow.
7343: */
1.184 daniel 7344: cur = ctxt->input->cur[0];
1.140 daniel 7345: if (IS_BLANK(cur)) {
7346: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7347: ctxt->sax->setDocumentLocator(ctxt->userData,
7348: &xmlDefaultSAXLocator);
1.230 veillard 7349: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.140 daniel 7350: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7351: ctxt->sax->error(ctxt->userData,
7352: "Extra spaces at the beginning of the document are not allowed\n");
7353: ctxt->wellFormed = 0;
1.180 daniel 7354: ctxt->disableSAX = 1;
1.140 daniel 7355: SKIP_BLANKS;
7356: ret++;
1.184 daniel 7357: if (ctxt->input->buf == NULL)
7358: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7359: else
1.184 daniel 7360: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7361: }
7362: if (avail < 2)
7363: goto done;
7364:
1.184 daniel 7365: cur = ctxt->input->cur[0];
7366: next = ctxt->input->cur[1];
1.140 daniel 7367: if (cur == 0) {
7368: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7369: ctxt->sax->setDocumentLocator(ctxt->userData,
7370: &xmlDefaultSAXLocator);
1.230 veillard 7371: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7373: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7374: ctxt->wellFormed = 0;
1.180 daniel 7375: ctxt->disableSAX = 1;
1.140 daniel 7376: ctxt->instate = XML_PARSER_EOF;
7377: #ifdef DEBUG_PUSH
7378: fprintf(stderr, "PP: entering EOF\n");
7379: #endif
7380: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7381: ctxt->sax->endDocument(ctxt->userData);
7382: goto done;
7383: }
7384: if ((cur == '<') && (next == '?')) {
7385: /* PI or XML decl */
7386: if (avail < 5) return(ret);
1.143 daniel 7387: if ((!terminate) &&
7388: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7389: return(ret);
7390: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7391: ctxt->sax->setDocumentLocator(ctxt->userData,
7392: &xmlDefaultSAXLocator);
1.184 daniel 7393: if ((ctxt->input->cur[2] == 'x') &&
7394: (ctxt->input->cur[3] == 'm') &&
7395: (ctxt->input->cur[4] == 'l') &&
7396: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 7397: ret += 5;
7398: #ifdef DEBUG_PUSH
7399: fprintf(stderr, "PP: Parsing XML Decl\n");
7400: #endif
7401: xmlParseXMLDecl(ctxt);
1.193 daniel 7402: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7403: /*
7404: * The XML REC instructs us to stop parsing right
7405: * here
7406: */
7407: ctxt->instate = XML_PARSER_EOF;
7408: return(0);
7409: }
1.167 daniel 7410: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7411: if ((ctxt->encoding == NULL) &&
7412: (ctxt->input->encoding != NULL))
7413: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 7414: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7415: (!ctxt->disableSAX))
1.140 daniel 7416: ctxt->sax->startDocument(ctxt->userData);
7417: ctxt->instate = XML_PARSER_MISC;
7418: #ifdef DEBUG_PUSH
7419: fprintf(stderr, "PP: entering MISC\n");
7420: #endif
7421: } else {
7422: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7423: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7424: (!ctxt->disableSAX))
1.140 daniel 7425: ctxt->sax->startDocument(ctxt->userData);
7426: ctxt->instate = XML_PARSER_MISC;
7427: #ifdef DEBUG_PUSH
7428: fprintf(stderr, "PP: entering MISC\n");
7429: #endif
7430: }
7431: } else {
7432: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7433: ctxt->sax->setDocumentLocator(ctxt->userData,
7434: &xmlDefaultSAXLocator);
7435: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7436: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7437: (!ctxt->disableSAX))
1.140 daniel 7438: ctxt->sax->startDocument(ctxt->userData);
7439: ctxt->instate = XML_PARSER_MISC;
7440: #ifdef DEBUG_PUSH
7441: fprintf(stderr, "PP: entering MISC\n");
7442: #endif
7443: }
7444: break;
7445: case XML_PARSER_MISC:
7446: SKIP_BLANKS;
1.184 daniel 7447: if (ctxt->input->buf == NULL)
7448: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7449: else
1.184 daniel 7450: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7451: if (avail < 2)
7452: goto done;
1.184 daniel 7453: cur = ctxt->input->cur[0];
7454: next = ctxt->input->cur[1];
1.140 daniel 7455: if ((cur == '<') && (next == '?')) {
1.143 daniel 7456: if ((!terminate) &&
7457: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7458: goto done;
7459: #ifdef DEBUG_PUSH
7460: fprintf(stderr, "PP: Parsing PI\n");
7461: #endif
7462: xmlParsePI(ctxt);
7463: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7464: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7465: if ((!terminate) &&
7466: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7467: goto done;
7468: #ifdef DEBUG_PUSH
7469: fprintf(stderr, "PP: Parsing Comment\n");
7470: #endif
7471: xmlParseComment(ctxt);
7472: ctxt->instate = XML_PARSER_MISC;
7473: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7474: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7475: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7476: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7477: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 7478: if ((!terminate) &&
7479: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7480: goto done;
7481: #ifdef DEBUG_PUSH
7482: fprintf(stderr, "PP: Parsing internal subset\n");
7483: #endif
1.166 daniel 7484: ctxt->inSubset = 1;
1.140 daniel 7485: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7486: if (RAW == '[') {
1.140 daniel 7487: ctxt->instate = XML_PARSER_DTD;
7488: #ifdef DEBUG_PUSH
7489: fprintf(stderr, "PP: entering DTD\n");
7490: #endif
7491: } else {
1.166 daniel 7492: /*
7493: * Create and update the external subset.
7494: */
7495: ctxt->inSubset = 2;
1.171 daniel 7496: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7497: (ctxt->sax->externalSubset != NULL))
7498: ctxt->sax->externalSubset(ctxt->userData,
7499: ctxt->intSubName, ctxt->extSubSystem,
7500: ctxt->extSubURI);
7501: ctxt->inSubset = 0;
1.140 daniel 7502: ctxt->instate = XML_PARSER_PROLOG;
7503: #ifdef DEBUG_PUSH
7504: fprintf(stderr, "PP: entering PROLOG\n");
7505: #endif
7506: }
7507: } else if ((cur == '<') && (next == '!') &&
7508: (avail < 9)) {
7509: goto done;
7510: } else {
7511: ctxt->instate = XML_PARSER_START_TAG;
7512: #ifdef DEBUG_PUSH
7513: fprintf(stderr, "PP: entering START_TAG\n");
7514: #endif
7515: }
7516: break;
1.128 daniel 7517: case XML_PARSER_PROLOG:
1.140 daniel 7518: SKIP_BLANKS;
1.184 daniel 7519: if (ctxt->input->buf == NULL)
7520: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7521: else
1.184 daniel 7522: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7523: if (avail < 2)
7524: goto done;
1.184 daniel 7525: cur = ctxt->input->cur[0];
7526: next = ctxt->input->cur[1];
1.140 daniel 7527: if ((cur == '<') && (next == '?')) {
1.143 daniel 7528: if ((!terminate) &&
7529: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7530: goto done;
7531: #ifdef DEBUG_PUSH
7532: fprintf(stderr, "PP: Parsing PI\n");
7533: #endif
7534: xmlParsePI(ctxt);
7535: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7536: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7537: if ((!terminate) &&
7538: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7539: goto done;
7540: #ifdef DEBUG_PUSH
7541: fprintf(stderr, "PP: Parsing Comment\n");
7542: #endif
7543: xmlParseComment(ctxt);
7544: ctxt->instate = XML_PARSER_PROLOG;
7545: } else if ((cur == '<') && (next == '!') &&
7546: (avail < 4)) {
7547: goto done;
7548: } else {
7549: ctxt->instate = XML_PARSER_START_TAG;
7550: #ifdef DEBUG_PUSH
7551: fprintf(stderr, "PP: entering START_TAG\n");
7552: #endif
7553: }
7554: break;
7555: case XML_PARSER_EPILOG:
7556: SKIP_BLANKS;
1.184 daniel 7557: if (ctxt->input->buf == NULL)
7558: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7559: else
1.184 daniel 7560: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7561: if (avail < 2)
7562: goto done;
1.184 daniel 7563: cur = ctxt->input->cur[0];
7564: next = ctxt->input->cur[1];
1.140 daniel 7565: if ((cur == '<') && (next == '?')) {
1.143 daniel 7566: if ((!terminate) &&
7567: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7568: goto done;
7569: #ifdef DEBUG_PUSH
7570: fprintf(stderr, "PP: Parsing PI\n");
7571: #endif
7572: xmlParsePI(ctxt);
7573: ctxt->instate = XML_PARSER_EPILOG;
7574: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7575: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7576: if ((!terminate) &&
7577: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7578: goto done;
7579: #ifdef DEBUG_PUSH
7580: fprintf(stderr, "PP: Parsing Comment\n");
7581: #endif
7582: xmlParseComment(ctxt);
7583: ctxt->instate = XML_PARSER_EPILOG;
7584: } else if ((cur == '<') && (next == '!') &&
7585: (avail < 4)) {
7586: goto done;
7587: } else {
1.230 veillard 7588: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7589: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7590: ctxt->sax->error(ctxt->userData,
7591: "Extra content at the end of the document\n");
7592: ctxt->wellFormed = 0;
1.180 daniel 7593: ctxt->disableSAX = 1;
1.140 daniel 7594: ctxt->instate = XML_PARSER_EOF;
7595: #ifdef DEBUG_PUSH
7596: fprintf(stderr, "PP: entering EOF\n");
7597: #endif
1.171 daniel 7598: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7599: (!ctxt->disableSAX))
1.140 daniel 7600: ctxt->sax->endDocument(ctxt->userData);
7601: goto done;
7602: }
7603: break;
7604: case XML_PARSER_START_TAG: {
7605: xmlChar *name, *oldname;
7606:
1.184 daniel 7607: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7608: goto done;
1.184 daniel 7609: cur = ctxt->input->cur[0];
1.140 daniel 7610: if (cur != '<') {
1.230 veillard 7611: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7613: ctxt->sax->error(ctxt->userData,
7614: "Start tag expect, '<' not found\n");
7615: ctxt->wellFormed = 0;
1.180 daniel 7616: ctxt->disableSAX = 1;
1.140 daniel 7617: ctxt->instate = XML_PARSER_EOF;
7618: #ifdef DEBUG_PUSH
7619: fprintf(stderr, "PP: entering EOF\n");
7620: #endif
1.171 daniel 7621: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7622: (!ctxt->disableSAX))
1.140 daniel 7623: ctxt->sax->endDocument(ctxt->userData);
7624: goto done;
7625: }
1.143 daniel 7626: if ((!terminate) &&
7627: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7628: goto done;
1.176 daniel 7629: if (ctxt->spaceNr == 0)
7630: spacePush(ctxt, -1);
7631: else
7632: spacePush(ctxt, *ctxt->space);
1.140 daniel 7633: name = xmlParseStartTag(ctxt);
7634: if (name == NULL) {
1.176 daniel 7635: spacePop(ctxt);
1.140 daniel 7636: ctxt->instate = XML_PARSER_EOF;
7637: #ifdef DEBUG_PUSH
7638: fprintf(stderr, "PP: entering EOF\n");
7639: #endif
1.171 daniel 7640: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7641: (!ctxt->disableSAX))
1.140 daniel 7642: ctxt->sax->endDocument(ctxt->userData);
7643: goto done;
7644: }
7645: namePush(ctxt, xmlStrdup(name));
7646:
7647: /*
7648: * [ VC: Root Element Type ]
7649: * The Name in the document type declaration must match
7650: * the element type of the root element.
7651: */
7652: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7653: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7654: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7655:
7656: /*
7657: * Check for an Empty Element.
7658: */
1.152 daniel 7659: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7660: SKIP(2);
1.171 daniel 7661: if ((ctxt->sax != NULL) &&
7662: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 7663: ctxt->sax->endElement(ctxt->userData, name);
7664: xmlFree(name);
7665: oldname = namePop(ctxt);
1.176 daniel 7666: spacePop(ctxt);
1.140 daniel 7667: if (oldname != NULL) {
7668: #ifdef DEBUG_STACK
7669: fprintf(stderr,"Close: popped %s\n", oldname);
7670: #endif
7671: xmlFree(oldname);
7672: }
7673: if (ctxt->name == NULL) {
7674: ctxt->instate = XML_PARSER_EPILOG;
7675: #ifdef DEBUG_PUSH
7676: fprintf(stderr, "PP: entering EPILOG\n");
7677: #endif
7678: } else {
7679: ctxt->instate = XML_PARSER_CONTENT;
7680: #ifdef DEBUG_PUSH
7681: fprintf(stderr, "PP: entering CONTENT\n");
7682: #endif
7683: }
7684: break;
7685: }
1.152 daniel 7686: if (RAW == '>') {
1.140 daniel 7687: NEXT;
7688: } else {
1.230 veillard 7689: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.140 daniel 7690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7691: ctxt->sax->error(ctxt->userData,
7692: "Couldn't find end of Start Tag %s\n",
7693: name);
7694: ctxt->wellFormed = 0;
1.180 daniel 7695: ctxt->disableSAX = 1;
1.140 daniel 7696:
7697: /*
7698: * end of parsing of this node.
7699: */
7700: nodePop(ctxt);
7701: oldname = namePop(ctxt);
1.176 daniel 7702: spacePop(ctxt);
1.140 daniel 7703: if (oldname != NULL) {
7704: #ifdef DEBUG_STACK
7705: fprintf(stderr,"Close: popped %s\n", oldname);
7706: #endif
7707: xmlFree(oldname);
7708: }
7709: }
7710: xmlFree(name);
7711: ctxt->instate = XML_PARSER_CONTENT;
7712: #ifdef DEBUG_PUSH
7713: fprintf(stderr, "PP: entering CONTENT\n");
7714: #endif
7715: break;
7716: }
1.224 veillard 7717: case XML_PARSER_CONTENT: {
7718: const xmlChar *test;
7719: int cons;
7720: xmlChar tok;
7721:
1.140 daniel 7722: /*
7723: * Handle preparsed entities and charRef
7724: */
7725: if (ctxt->token != 0) {
7726: xmlChar cur[2] = { 0 , 0 } ;
7727:
7728: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 7729: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7730: (ctxt->sax->characters != NULL))
1.140 daniel 7731: ctxt->sax->characters(ctxt->userData, cur, 1);
7732: ctxt->token = 0;
7733: }
1.184 daniel 7734: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7735: goto done;
1.184 daniel 7736: cur = ctxt->input->cur[0];
7737: next = ctxt->input->cur[1];
1.224 veillard 7738:
7739: test = CUR_PTR;
7740: cons = ctxt->input->consumed;
7741: tok = ctxt->token;
1.140 daniel 7742: if ((cur == '<') && (next == '?')) {
1.143 daniel 7743: if ((!terminate) &&
7744: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7745: goto done;
7746: #ifdef DEBUG_PUSH
7747: fprintf(stderr, "PP: Parsing PI\n");
7748: #endif
7749: xmlParsePI(ctxt);
7750: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7751: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7752: if ((!terminate) &&
7753: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7754: goto done;
7755: #ifdef DEBUG_PUSH
7756: fprintf(stderr, "PP: Parsing Comment\n");
7757: #endif
7758: xmlParseComment(ctxt);
7759: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 7760: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7761: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7762: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7763: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7764: (ctxt->input->cur[8] == '[')) {
1.140 daniel 7765: SKIP(9);
7766: ctxt->instate = XML_PARSER_CDATA_SECTION;
7767: #ifdef DEBUG_PUSH
7768: fprintf(stderr, "PP: entering CDATA_SECTION\n");
7769: #endif
7770: break;
7771: } else if ((cur == '<') && (next == '!') &&
7772: (avail < 9)) {
7773: goto done;
7774: } else if ((cur == '<') && (next == '/')) {
7775: ctxt->instate = XML_PARSER_END_TAG;
7776: #ifdef DEBUG_PUSH
7777: fprintf(stderr, "PP: entering END_TAG\n");
7778: #endif
7779: break;
7780: } else if (cur == '<') {
7781: ctxt->instate = XML_PARSER_START_TAG;
7782: #ifdef DEBUG_PUSH
7783: fprintf(stderr, "PP: entering START_TAG\n");
7784: #endif
7785: break;
7786: } else if (cur == '&') {
1.143 daniel 7787: if ((!terminate) &&
7788: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 7789: goto done;
7790: #ifdef DEBUG_PUSH
7791: fprintf(stderr, "PP: Parsing Reference\n");
7792: #endif
7793: xmlParseReference(ctxt);
7794: } else {
1.156 daniel 7795: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 7796: /*
1.181 daniel 7797: * Goal of the following test is:
1.140 daniel 7798: * - minimize calls to the SAX 'character' callback
7799: * when they are mergeable
7800: * - handle an problem for isBlank when we only parse
7801: * a sequence of blank chars and the next one is
7802: * not available to check against '<' presence.
7803: * - tries to homogenize the differences in SAX
7804: * callbacks beween the push and pull versions
7805: * of the parser.
7806: */
7807: if ((ctxt->inputNr == 1) &&
7808: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 7809: if ((!terminate) &&
7810: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 7811: goto done;
7812: }
7813: ctxt->checkIndex = 0;
7814: #ifdef DEBUG_PUSH
7815: fprintf(stderr, "PP: Parsing char data\n");
7816: #endif
7817: xmlParseCharData(ctxt, 0);
7818: }
7819: /*
7820: * Pop-up of finished entities.
7821: */
1.152 daniel 7822: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7823: xmlPopInput(ctxt);
1.224 veillard 7824: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7825: (tok == ctxt->token)) {
1.230 veillard 7826: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.224 veillard 7827: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7828: ctxt->sax->error(ctxt->userData,
7829: "detected an error in element content\n");
7830: ctxt->wellFormed = 0;
7831: ctxt->disableSAX = 1;
7832: ctxt->instate = XML_PARSER_EOF;
7833: break;
7834: }
1.140 daniel 7835: break;
1.224 veillard 7836: }
1.140 daniel 7837: case XML_PARSER_CDATA_SECTION: {
7838: /*
7839: * The Push mode need to have the SAX callback for
7840: * cdataBlock merge back contiguous callbacks.
7841: */
7842: int base;
7843:
7844: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7845: if (base < 0) {
7846: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 7847: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 7848: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 7849: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 7850: XML_PARSER_BIG_BUFFER_SIZE);
7851: }
7852: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7853: ctxt->checkIndex = 0;
7854: }
7855: goto done;
7856: } else {
1.171 daniel 7857: if ((ctxt->sax != NULL) && (base > 0) &&
7858: (!ctxt->disableSAX)) {
1.140 daniel 7859: if (ctxt->sax->cdataBlock != NULL)
7860: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 7861: ctxt->input->cur, base);
1.140 daniel 7862: }
7863: SKIP(base + 3);
7864: ctxt->checkIndex = 0;
7865: ctxt->instate = XML_PARSER_CONTENT;
7866: #ifdef DEBUG_PUSH
7867: fprintf(stderr, "PP: entering CONTENT\n");
7868: #endif
7869: }
7870: break;
7871: }
1.141 daniel 7872: case XML_PARSER_END_TAG:
1.140 daniel 7873: if (avail < 2)
7874: goto done;
1.143 daniel 7875: if ((!terminate) &&
7876: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7877: goto done;
7878: xmlParseEndTag(ctxt);
7879: if (ctxt->name == NULL) {
7880: ctxt->instate = XML_PARSER_EPILOG;
7881: #ifdef DEBUG_PUSH
7882: fprintf(stderr, "PP: entering EPILOG\n");
7883: #endif
7884: } else {
7885: ctxt->instate = XML_PARSER_CONTENT;
7886: #ifdef DEBUG_PUSH
7887: fprintf(stderr, "PP: entering CONTENT\n");
7888: #endif
7889: }
7890: break;
7891: case XML_PARSER_DTD: {
7892: /*
7893: * Sorry but progressive parsing of the internal subset
7894: * is not expected to be supported. We first check that
7895: * the full content of the internal subset is available and
7896: * the parsing is launched only at that point.
7897: * Internal subset ends up with "']' S? '>'" in an unescaped
7898: * section and not in a ']]>' sequence which are conditional
7899: * sections (whoever argued to keep that crap in XML deserve
7900: * a place in hell !).
7901: */
7902: int base, i;
7903: xmlChar *buf;
7904: xmlChar quote = 0;
7905:
1.184 daniel 7906: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 7907: if (base < 0) return(0);
7908: if (ctxt->checkIndex > base)
7909: base = ctxt->checkIndex;
1.184 daniel 7910: buf = ctxt->input->buf->buffer->content;
1.202 daniel 7911: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
7912: base++) {
1.140 daniel 7913: if (quote != 0) {
7914: if (buf[base] == quote)
7915: quote = 0;
7916: continue;
7917: }
7918: if (buf[base] == '"') {
7919: quote = '"';
7920: continue;
7921: }
7922: if (buf[base] == '\'') {
7923: quote = '\'';
7924: continue;
7925: }
7926: if (buf[base] == ']') {
1.202 daniel 7927: if ((unsigned int) base +1 >=
7928: ctxt->input->buf->buffer->use)
1.140 daniel 7929: break;
7930: if (buf[base + 1] == ']') {
7931: /* conditional crap, skip both ']' ! */
7932: base++;
7933: continue;
7934: }
1.202 daniel 7935: for (i = 0;
7936: (unsigned int) base + i < ctxt->input->buf->buffer->use;
7937: i++) {
1.140 daniel 7938: if (buf[base + i] == '>')
7939: goto found_end_int_subset;
7940: }
7941: break;
7942: }
7943: }
7944: /*
7945: * We didn't found the end of the Internal subset
7946: */
7947: if (quote == 0)
7948: ctxt->checkIndex = base;
7949: #ifdef DEBUG_PUSH
7950: if (next == 0)
7951: fprintf(stderr, "PP: lookup of int subset end filed\n");
7952: #endif
7953: goto done;
7954:
7955: found_end_int_subset:
7956: xmlParseInternalSubset(ctxt);
1.166 daniel 7957: ctxt->inSubset = 2;
1.171 daniel 7958: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7959: (ctxt->sax->externalSubset != NULL))
7960: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7961: ctxt->extSubSystem, ctxt->extSubURI);
7962: ctxt->inSubset = 0;
1.140 daniel 7963: ctxt->instate = XML_PARSER_PROLOG;
7964: ctxt->checkIndex = 0;
7965: #ifdef DEBUG_PUSH
7966: fprintf(stderr, "PP: entering PROLOG\n");
7967: #endif
7968: break;
7969: }
7970: case XML_PARSER_COMMENT:
7971: fprintf(stderr, "PP: internal error, state == COMMENT\n");
7972: ctxt->instate = XML_PARSER_CONTENT;
7973: #ifdef DEBUG_PUSH
7974: fprintf(stderr, "PP: entering CONTENT\n");
7975: #endif
7976: break;
7977: case XML_PARSER_PI:
7978: fprintf(stderr, "PP: internal error, state == PI\n");
7979: ctxt->instate = XML_PARSER_CONTENT;
7980: #ifdef DEBUG_PUSH
7981: fprintf(stderr, "PP: entering CONTENT\n");
7982: #endif
7983: break;
1.128 daniel 7984: case XML_PARSER_ENTITY_DECL:
1.140 daniel 7985: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
7986: ctxt->instate = XML_PARSER_DTD;
7987: #ifdef DEBUG_PUSH
7988: fprintf(stderr, "PP: entering DTD\n");
7989: #endif
7990: break;
1.128 daniel 7991: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 7992: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
7993: ctxt->instate = XML_PARSER_CONTENT;
7994: #ifdef DEBUG_PUSH
7995: fprintf(stderr, "PP: entering DTD\n");
7996: #endif
7997: break;
1.128 daniel 7998: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 7999: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8000: ctxt->instate = XML_PARSER_START_TAG;
8001: #ifdef DEBUG_PUSH
8002: fprintf(stderr, "PP: entering START_TAG\n");
8003: #endif
8004: break;
8005: case XML_PARSER_SYSTEM_LITERAL:
8006: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8007: ctxt->instate = XML_PARSER_START_TAG;
8008: #ifdef DEBUG_PUSH
8009: fprintf(stderr, "PP: entering START_TAG\n");
8010: #endif
8011: break;
1.128 daniel 8012: }
8013: }
1.140 daniel 8014: done:
8015: #ifdef DEBUG_PUSH
8016: fprintf(stderr, "PP: done %d\n", ret);
8017: #endif
1.128 daniel 8018: return(ret);
8019: }
8020:
8021: /**
1.143 daniel 8022: * xmlParseTry:
8023: * @ctxt: an XML parser context
8024: *
8025: * Try to progress on parsing
8026: *
8027: * Returns zero if no parsing was possible
8028: */
8029: int
8030: xmlParseTry(xmlParserCtxtPtr ctxt) {
8031: return(xmlParseTryOrFinish(ctxt, 0));
8032: }
8033:
8034: /**
1.128 daniel 8035: * xmlParseChunk:
8036: * @ctxt: an XML parser context
8037: * @chunk: an char array
8038: * @size: the size in byte of the chunk
8039: * @terminate: last chunk indicator
8040: *
8041: * Parse a Chunk of memory
8042: *
8043: * Returns zero if no error, the xmlParserErrors otherwise.
8044: */
1.140 daniel 8045: int
1.128 daniel 8046: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8047: int terminate) {
1.132 daniel 8048: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8049: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8050: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8051: int cur = ctxt->input->cur - ctxt->input->base;
8052:
1.132 daniel 8053: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8054: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8055: ctxt->input->cur = ctxt->input->base + cur;
8056: #ifdef DEBUG_PUSH
8057: fprintf(stderr, "PP: pushed %d\n", size);
8058: #endif
8059:
1.150 daniel 8060: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8061: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8062: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8063: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8064: if (terminate) {
1.151 daniel 8065: /*
8066: * Check for termination
8067: */
1.140 daniel 8068: if ((ctxt->instate != XML_PARSER_EOF) &&
8069: (ctxt->instate != XML_PARSER_EPILOG)) {
1.230 veillard 8070: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 8071: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8072: ctxt->sax->error(ctxt->userData,
8073: "Extra content at the end of the document\n");
8074: ctxt->wellFormed = 0;
1.180 daniel 8075: ctxt->disableSAX = 1;
1.140 daniel 8076: }
8077: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8078: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8079: (!ctxt->disableSAX))
1.140 daniel 8080: ctxt->sax->endDocument(ctxt->userData);
8081: }
8082: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8083: }
8084: return((xmlParserErrors) ctxt->errNo);
8085: }
8086:
8087: /************************************************************************
8088: * *
1.98 daniel 8089: * I/O front end functions to the parser *
8090: * *
8091: ************************************************************************/
1.201 daniel 8092:
8093: /**
1.229 veillard 8094: * xmlStopParser:
1.201 daniel 8095: * @ctxt: an XML parser context
8096: *
8097: * Blocks further parser processing
8098: */
8099: void
8100: xmlStopParser(xmlParserCtxtPtr ctxt) {
8101: ctxt->instate = XML_PARSER_EOF;
8102: if (ctxt->input != NULL)
8103: ctxt->input->cur = BAD_CAST"";
8104: }
1.98 daniel 8105:
1.50 daniel 8106: /**
1.181 daniel 8107: * xmlCreatePushParserCtxt:
1.140 daniel 8108: * @sax: a SAX handler
8109: * @user_data: The user data returned on SAX callbacks
8110: * @chunk: a pointer to an array of chars
8111: * @size: number of chars in the array
8112: * @filename: an optional file name or URI
8113: *
8114: * Create a parser context for using the XML parser in push mode
8115: * To allow content encoding detection, @size should be >= 4
8116: * The value of @filename is used for fetching external entities
8117: * and error/warning reports.
8118: *
8119: * Returns the new parser context or NULL
8120: */
8121: xmlParserCtxtPtr
8122: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8123: const char *chunk, int size, const char *filename) {
8124: xmlParserCtxtPtr ctxt;
8125: xmlParserInputPtr inputStream;
8126: xmlParserInputBufferPtr buf;
8127: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8128:
8129: /*
1.156 daniel 8130: * plug some encoding conversion routines
1.140 daniel 8131: */
8132: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8133: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8134:
8135: buf = xmlAllocParserInputBuffer(enc);
8136: if (buf == NULL) return(NULL);
8137:
8138: ctxt = xmlNewParserCtxt();
8139: if (ctxt == NULL) {
8140: xmlFree(buf);
8141: return(NULL);
8142: }
8143: if (sax != NULL) {
8144: if (ctxt->sax != &xmlDefaultSAXHandler)
8145: xmlFree(ctxt->sax);
8146: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8147: if (ctxt->sax == NULL) {
8148: xmlFree(buf);
8149: xmlFree(ctxt);
8150: return(NULL);
8151: }
8152: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8153: if (user_data != NULL)
8154: ctxt->userData = user_data;
8155: }
8156: if (filename == NULL) {
8157: ctxt->directory = NULL;
8158: } else {
8159: ctxt->directory = xmlParserGetDirectory(filename);
8160: }
8161:
8162: inputStream = xmlNewInputStream(ctxt);
8163: if (inputStream == NULL) {
8164: xmlFreeParserCtxt(ctxt);
8165: return(NULL);
8166: }
8167:
8168: if (filename == NULL)
8169: inputStream->filename = NULL;
8170: else
8171: inputStream->filename = xmlMemStrdup(filename);
8172: inputStream->buf = buf;
8173: inputStream->base = inputStream->buf->buffer->content;
8174: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8175: if (enc != XML_CHAR_ENCODING_NONE) {
8176: xmlSwitchEncoding(ctxt, enc);
8177: }
1.140 daniel 8178:
8179: inputPush(ctxt, inputStream);
8180:
8181: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8182: (ctxt->input->buf != NULL)) {
8183: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8184: #ifdef DEBUG_PUSH
8185: fprintf(stderr, "PP: pushed %d\n", size);
8186: #endif
8187: }
1.190 daniel 8188:
8189: return(ctxt);
8190: }
8191:
8192: /**
8193: * xmlCreateIOParserCtxt:
8194: * @sax: a SAX handler
8195: * @user_data: The user data returned on SAX callbacks
8196: * @ioread: an I/O read function
8197: * @ioclose: an I/O close function
8198: * @ioctx: an I/O handler
8199: * @enc: the charset encoding if known
8200: *
8201: * Create a parser context for using the XML parser with an existing
8202: * I/O stream
8203: *
8204: * Returns the new parser context or NULL
8205: */
8206: xmlParserCtxtPtr
8207: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8208: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8209: void *ioctx, xmlCharEncoding enc) {
8210: xmlParserCtxtPtr ctxt;
8211: xmlParserInputPtr inputStream;
8212: xmlParserInputBufferPtr buf;
8213:
8214: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8215: if (buf == NULL) return(NULL);
8216:
8217: ctxt = xmlNewParserCtxt();
8218: if (ctxt == NULL) {
8219: xmlFree(buf);
8220: return(NULL);
8221: }
8222: if (sax != NULL) {
8223: if (ctxt->sax != &xmlDefaultSAXHandler)
8224: xmlFree(ctxt->sax);
8225: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8226: if (ctxt->sax == NULL) {
8227: xmlFree(buf);
8228: xmlFree(ctxt);
8229: return(NULL);
8230: }
8231: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8232: if (user_data != NULL)
8233: ctxt->userData = user_data;
8234: }
8235:
1.229 veillard 8236: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8237: if (inputStream == NULL) {
8238: xmlFreeParserCtxt(ctxt);
8239: return(NULL);
1.74 daniel 8240: }
1.229 veillard 8241: inputPush(ctxt, inputStream);
1.69 daniel 8242:
1.229 veillard 8243: return(ctxt);
1.1 veillard 8244: }
8245:
1.229 veillard 8246: /************************************************************************
8247: * *
8248: * Front ends when parsing a Dtd *
8249: * *
8250: ************************************************************************/
1.76 daniel 8251:
8252: /**
1.181 daniel 8253: * xmlSAXParseDTD:
1.76 daniel 8254: * @sax: the SAX handler block
8255: * @ExternalID: a NAME* containing the External ID of the DTD
8256: * @SystemID: a NAME* containing the URL to the DTD
8257: *
8258: * Load and parse an external subset.
8259: *
8260: * Returns the resulting xmlDtdPtr or NULL in case of error.
8261: */
8262:
8263: xmlDtdPtr
1.123 daniel 8264: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8265: const xmlChar *SystemID) {
1.76 daniel 8266: xmlDtdPtr ret = NULL;
8267: xmlParserCtxtPtr ctxt;
1.83 daniel 8268: xmlParserInputPtr input = NULL;
1.76 daniel 8269: xmlCharEncoding enc;
8270:
8271: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8272:
1.97 daniel 8273: ctxt = xmlNewParserCtxt();
1.76 daniel 8274: if (ctxt == NULL) {
8275: return(NULL);
8276: }
8277:
8278: /*
8279: * Set-up the SAX context
8280: */
8281: if (sax != NULL) {
1.93 veillard 8282: if (ctxt->sax != NULL)
1.119 daniel 8283: xmlFree(ctxt->sax);
1.76 daniel 8284: ctxt->sax = sax;
8285: ctxt->userData = NULL;
8286: }
8287:
8288: /*
8289: * Ask the Entity resolver to load the damn thing
8290: */
8291:
8292: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8293: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8294: if (input == NULL) {
1.86 daniel 8295: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8296: xmlFreeParserCtxt(ctxt);
8297: return(NULL);
8298: }
8299:
8300: /*
1.156 daniel 8301: * plug some encoding conversion routines here.
1.76 daniel 8302: */
8303: xmlPushInput(ctxt, input);
1.156 daniel 8304: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8305: xmlSwitchEncoding(ctxt, enc);
8306:
1.95 veillard 8307: if (input->filename == NULL)
1.156 daniel 8308: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8309: input->line = 1;
8310: input->col = 1;
8311: input->base = ctxt->input->cur;
8312: input->cur = ctxt->input->cur;
8313: input->free = NULL;
8314:
8315: /*
8316: * let's parse that entity knowing it's an external subset.
8317: */
1.191 daniel 8318: ctxt->inSubset = 2;
8319: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8320: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8321: ExternalID, SystemID);
1.79 daniel 8322: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8323:
8324: if (ctxt->myDoc != NULL) {
8325: if (ctxt->wellFormed) {
1.191 daniel 8326: ret = ctxt->myDoc->extSubset;
8327: ctxt->myDoc->extSubset = NULL;
1.76 daniel 8328: } else {
8329: ret = NULL;
8330: }
8331: xmlFreeDoc(ctxt->myDoc);
8332: ctxt->myDoc = NULL;
8333: }
1.86 daniel 8334: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8335: xmlFreeParserCtxt(ctxt);
8336:
8337: return(ret);
8338: }
8339:
8340: /**
1.181 daniel 8341: * xmlParseDTD:
1.76 daniel 8342: * @ExternalID: a NAME* containing the External ID of the DTD
8343: * @SystemID: a NAME* containing the URL to the DTD
8344: *
8345: * Load and parse an external subset.
8346: *
8347: * Returns the resulting xmlDtdPtr or NULL in case of error.
8348: */
8349:
8350: xmlDtdPtr
1.123 daniel 8351: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8352: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8353: }
8354:
1.229 veillard 8355: /************************************************************************
8356: * *
8357: * Front ends when parsing an Entity *
8358: * *
8359: ************************************************************************/
8360:
1.59 daniel 8361: /**
1.181 daniel 8362: * xmlSAXParseBalancedChunk:
1.144 daniel 8363: * @ctx: an XML parser context (possibly NULL)
8364: * @sax: the SAX handler bloc (possibly NULL)
8365: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8366: * @input: a parser input stream
8367: * @enc: the encoding
8368: *
8369: * Parse a well-balanced chunk of an XML document
8370: * The user has to provide SAX callback block whose routines will be
8371: * called by the parser
8372: * The allowed sequence for the Well Balanced Chunk is the one defined by
8373: * the content production in the XML grammar:
8374: *
8375: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8376: *
1.176 daniel 8377: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 8378: * the error code otherwise
8379: */
8380:
8381: int
8382: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8383: void *user_data, xmlParserInputPtr input,
8384: xmlCharEncoding enc) {
8385: xmlParserCtxtPtr ctxt;
8386: int ret;
8387:
8388: if (input == NULL) return(-1);
8389:
8390: if (ctx != NULL)
8391: ctxt = ctx;
8392: else {
8393: ctxt = xmlNewParserCtxt();
8394: if (ctxt == NULL)
8395: return(-1);
8396: if (sax == NULL)
8397: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8398: }
8399:
8400: /*
8401: * Set-up the SAX context
8402: */
8403: if (sax != NULL) {
8404: if (ctxt->sax != NULL)
8405: xmlFree(ctxt->sax);
8406: ctxt->sax = sax;
8407: ctxt->userData = user_data;
8408: }
8409:
8410: /*
8411: * plug some encoding conversion routines here.
8412: */
8413: xmlPushInput(ctxt, input);
8414: if (enc != XML_CHAR_ENCODING_NONE)
8415: xmlSwitchEncoding(ctxt, enc);
8416:
8417: /*
8418: * let's parse that entity knowing it's an external subset.
8419: */
8420: xmlParseContent(ctxt);
8421: ret = ctxt->errNo;
8422:
8423: if (ctx == NULL) {
8424: if (sax != NULL)
8425: ctxt->sax = NULL;
8426: else
8427: xmlFreeDoc(ctxt->myDoc);
8428: xmlFreeParserCtxt(ctxt);
8429: }
8430: return(ret);
8431: }
8432:
8433: /**
1.213 veillard 8434: * xmlParseCtxtExternalEntity:
8435: * @ctx: the existing parsing context
8436: * @URL: the URL for the entity to load
8437: * @ID: the System ID for the entity to load
8438: * @list: the return value for the set of parsed nodes
8439: *
8440: * Parse an external general entity within an existing parsing context
8441: * An external general parsed entity is well-formed if it matches the
8442: * production labeled extParsedEnt.
8443: *
8444: * [78] extParsedEnt ::= TextDecl? content
8445: *
8446: * Returns 0 if the entity is well formed, -1 in case of args problem and
8447: * the parser error code otherwise
8448: */
8449:
8450: int
8451: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8452: const xmlChar *ID, xmlNodePtr *list) {
8453: xmlParserCtxtPtr ctxt;
8454: xmlDocPtr newDoc;
8455: xmlSAXHandlerPtr oldsax = NULL;
8456: int ret = 0;
8457:
8458: if (ctx->depth > 40) {
8459: return(XML_ERR_ENTITY_LOOP);
8460: }
8461:
8462: if (list != NULL)
8463: *list = NULL;
8464: if ((URL == NULL) && (ID == NULL))
8465: return(-1);
8466: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8467: return(-1);
8468:
8469:
1.228 veillard 8470: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.213 veillard 8471: if (ctxt == NULL) return(-1);
8472: ctxt->userData = ctxt;
8473: oldsax = ctxt->sax;
8474: ctxt->sax = ctx->sax;
8475: newDoc = xmlNewDoc(BAD_CAST "1.0");
8476: if (newDoc == NULL) {
8477: xmlFreeParserCtxt(ctxt);
8478: return(-1);
8479: }
8480: if (ctx->myDoc != NULL) {
8481: newDoc->intSubset = ctx->myDoc->intSubset;
8482: newDoc->extSubset = ctx->myDoc->extSubset;
8483: }
8484: if (ctx->myDoc->URL != NULL) {
8485: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8486: }
8487: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8488: if (newDoc->children == NULL) {
8489: ctxt->sax = oldsax;
8490: xmlFreeParserCtxt(ctxt);
8491: newDoc->intSubset = NULL;
8492: newDoc->extSubset = NULL;
8493: xmlFreeDoc(newDoc);
8494: return(-1);
8495: }
8496: nodePush(ctxt, newDoc->children);
8497: if (ctx->myDoc == NULL) {
8498: ctxt->myDoc = newDoc;
8499: } else {
8500: ctxt->myDoc = ctx->myDoc;
8501: newDoc->children->doc = ctx->myDoc;
8502: }
8503:
8504: /*
8505: * Parse a possible text declaration first
8506: */
8507: GROW;
8508: if ((RAW == '<') && (NXT(1) == '?') &&
8509: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8510: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8511: xmlParseTextDecl(ctxt);
8512: }
8513:
8514: /*
8515: * Doing validity checking on chunk doesn't make sense
8516: */
8517: ctxt->instate = XML_PARSER_CONTENT;
8518: ctxt->validate = ctx->validate;
8519: ctxt->depth = ctx->depth + 1;
8520: ctxt->replaceEntities = ctx->replaceEntities;
8521: if (ctxt->validate) {
8522: ctxt->vctxt.error = ctx->vctxt.error;
8523: ctxt->vctxt.warning = ctx->vctxt.warning;
8524: /* Allocate the Node stack */
8525: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1.228 veillard 8526: if (ctxt->vctxt.nodeTab == NULL) {
8527: fprintf(stderr, "xmlParseCtxtExternalEntity: out of memory\n");
8528: ctxt->validate = 0;
8529: ctxt->vctxt.error = NULL;
8530: ctxt->vctxt.warning = NULL;
8531: } else {
8532: ctxt->vctxt.nodeNr = 0;
8533: ctxt->vctxt.nodeMax = 4;
8534: ctxt->vctxt.node = NULL;
8535: }
1.213 veillard 8536: } else {
8537: ctxt->vctxt.error = NULL;
8538: ctxt->vctxt.warning = NULL;
8539: }
8540:
8541: xmlParseContent(ctxt);
8542:
8543: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8544: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8545: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8546: ctxt->sax->error(ctxt->userData,
8547: "chunk is not well balanced\n");
8548: ctxt->wellFormed = 0;
8549: ctxt->disableSAX = 1;
8550: } else if (RAW != 0) {
1.230 veillard 8551: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.213 veillard 8552: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8553: ctxt->sax->error(ctxt->userData,
8554: "extra content at the end of well balanced chunk\n");
8555: ctxt->wellFormed = 0;
8556: ctxt->disableSAX = 1;
8557: }
8558: if (ctxt->node != newDoc->children) {
1.230 veillard 8559: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8561: ctxt->sax->error(ctxt->userData,
8562: "chunk is not well balanced\n");
8563: ctxt->wellFormed = 0;
8564: ctxt->disableSAX = 1;
8565: }
8566:
8567: if (!ctxt->wellFormed) {
8568: if (ctxt->errNo == 0)
8569: ret = 1;
8570: else
8571: ret = ctxt->errNo;
8572: } else {
8573: if (list != NULL) {
8574: xmlNodePtr cur;
8575:
8576: /*
8577: * Return the newly created nodeset after unlinking it from
8578: * they pseudo parent.
8579: */
8580: cur = newDoc->children->children;
8581: *list = cur;
8582: while (cur != NULL) {
8583: cur->parent = NULL;
8584: cur = cur->next;
8585: }
8586: newDoc->children->children = NULL;
8587: }
8588: ret = 0;
8589: }
8590: ctxt->sax = oldsax;
8591: xmlFreeParserCtxt(ctxt);
8592: newDoc->intSubset = NULL;
8593: newDoc->extSubset = NULL;
8594: xmlFreeDoc(newDoc);
8595:
8596: return(ret);
8597: }
8598:
8599: /**
1.181 daniel 8600: * xmlParseExternalEntity:
8601: * @doc: the document the chunk pertains to
8602: * @sax: the SAX handler bloc (possibly NULL)
8603: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8604: * @depth: Used for loop detection, use 0
1.181 daniel 8605: * @URL: the URL for the entity to load
8606: * @ID: the System ID for the entity to load
8607: * @list: the return value for the set of parsed nodes
8608: *
8609: * Parse an external general entity
8610: * An external general parsed entity is well-formed if it matches the
8611: * production labeled extParsedEnt.
8612: *
8613: * [78] extParsedEnt ::= TextDecl? content
8614: *
8615: * Returns 0 if the entity is well formed, -1 in case of args problem and
8616: * the parser error code otherwise
8617: */
8618:
8619: int
8620: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 8621: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 8622: xmlParserCtxtPtr ctxt;
8623: xmlDocPtr newDoc;
8624: xmlSAXHandlerPtr oldsax = NULL;
8625: int ret = 0;
8626:
1.185 daniel 8627: if (depth > 40) {
8628: return(XML_ERR_ENTITY_LOOP);
8629: }
8630:
8631:
1.181 daniel 8632:
8633: if (list != NULL)
8634: *list = NULL;
8635: if ((URL == NULL) && (ID == NULL))
1.213 veillard 8636: return(-1);
8637: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 8638: return(-1);
8639:
8640:
1.228 veillard 8641: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.181 daniel 8642: if (ctxt == NULL) return(-1);
8643: ctxt->userData = ctxt;
8644: if (sax != NULL) {
8645: oldsax = ctxt->sax;
8646: ctxt->sax = sax;
8647: if (user_data != NULL)
8648: ctxt->userData = user_data;
8649: }
8650: newDoc = xmlNewDoc(BAD_CAST "1.0");
8651: if (newDoc == NULL) {
8652: xmlFreeParserCtxt(ctxt);
8653: return(-1);
8654: }
8655: if (doc != NULL) {
8656: newDoc->intSubset = doc->intSubset;
8657: newDoc->extSubset = doc->extSubset;
8658: }
8659: if (doc->URL != NULL) {
8660: newDoc->URL = xmlStrdup(doc->URL);
8661: }
8662: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8663: if (newDoc->children == NULL) {
8664: if (sax != NULL)
8665: ctxt->sax = oldsax;
8666: xmlFreeParserCtxt(ctxt);
8667: newDoc->intSubset = NULL;
8668: newDoc->extSubset = NULL;
8669: xmlFreeDoc(newDoc);
8670: return(-1);
8671: }
8672: nodePush(ctxt, newDoc->children);
8673: if (doc == NULL) {
8674: ctxt->myDoc = newDoc;
8675: } else {
8676: ctxt->myDoc = doc;
8677: newDoc->children->doc = doc;
8678: }
8679:
8680: /*
8681: * Parse a possible text declaration first
8682: */
8683: GROW;
8684: if ((RAW == '<') && (NXT(1) == '?') &&
8685: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8686: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8687: xmlParseTextDecl(ctxt);
8688: }
8689:
8690: /*
8691: * Doing validity checking on chunk doesn't make sense
8692: */
8693: ctxt->instate = XML_PARSER_CONTENT;
8694: ctxt->validate = 0;
1.185 daniel 8695: ctxt->depth = depth;
1.181 daniel 8696:
8697: xmlParseContent(ctxt);
8698:
8699: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8700: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8702: ctxt->sax->error(ctxt->userData,
8703: "chunk is not well balanced\n");
8704: ctxt->wellFormed = 0;
8705: ctxt->disableSAX = 1;
8706: } else if (RAW != 0) {
1.230 veillard 8707: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.181 daniel 8708: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8709: ctxt->sax->error(ctxt->userData,
8710: "extra content at the end of well balanced chunk\n");
8711: ctxt->wellFormed = 0;
8712: ctxt->disableSAX = 1;
8713: }
8714: if (ctxt->node != newDoc->children) {
1.230 veillard 8715: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8717: ctxt->sax->error(ctxt->userData,
8718: "chunk is not well balanced\n");
8719: ctxt->wellFormed = 0;
8720: ctxt->disableSAX = 1;
8721: }
8722:
8723: if (!ctxt->wellFormed) {
8724: if (ctxt->errNo == 0)
8725: ret = 1;
8726: else
8727: ret = ctxt->errNo;
8728: } else {
8729: if (list != NULL) {
8730: xmlNodePtr cur;
8731:
8732: /*
8733: * Return the newly created nodeset after unlinking it from
8734: * they pseudo parent.
8735: */
8736: cur = newDoc->children->children;
8737: *list = cur;
8738: while (cur != NULL) {
8739: cur->parent = NULL;
8740: cur = cur->next;
8741: }
8742: newDoc->children->children = NULL;
8743: }
8744: ret = 0;
8745: }
8746: if (sax != NULL)
8747: ctxt->sax = oldsax;
8748: xmlFreeParserCtxt(ctxt);
8749: newDoc->intSubset = NULL;
8750: newDoc->extSubset = NULL;
8751: xmlFreeDoc(newDoc);
8752:
8753: return(ret);
8754: }
8755:
8756: /**
8757: * xmlParseBalancedChunk:
1.176 daniel 8758: * @doc: the document the chunk pertains to
8759: * @sax: the SAX handler bloc (possibly NULL)
8760: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8761: * @depth: Used for loop detection, use 0
1.176 daniel 8762: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8763: * @list: the return value for the set of parsed nodes
8764: *
8765: * Parse a well-balanced chunk of an XML document
8766: * called by the parser
8767: * The allowed sequence for the Well Balanced Chunk is the one defined by
8768: * the content production in the XML grammar:
1.144 daniel 8769: *
1.175 daniel 8770: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8771: *
1.176 daniel 8772: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8773: * the parser error code otherwise
1.144 daniel 8774: */
8775:
1.175 daniel 8776: int
8777: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 8778: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 8779: xmlParserCtxtPtr ctxt;
1.175 daniel 8780: xmlDocPtr newDoc;
1.181 daniel 8781: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 8782: int size;
1.176 daniel 8783: int ret = 0;
1.175 daniel 8784:
1.185 daniel 8785: if (depth > 40) {
8786: return(XML_ERR_ENTITY_LOOP);
8787: }
8788:
1.175 daniel 8789:
1.176 daniel 8790: if (list != NULL)
8791: *list = NULL;
8792: if (string == NULL)
8793: return(-1);
8794:
8795: size = xmlStrlen(string);
8796:
1.183 daniel 8797: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 8798: if (ctxt == NULL) return(-1);
8799: ctxt->userData = ctxt;
1.175 daniel 8800: if (sax != NULL) {
1.176 daniel 8801: oldsax = ctxt->sax;
8802: ctxt->sax = sax;
8803: if (user_data != NULL)
8804: ctxt->userData = user_data;
1.175 daniel 8805: }
8806: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 8807: if (newDoc == NULL) {
8808: xmlFreeParserCtxt(ctxt);
8809: return(-1);
8810: }
1.175 daniel 8811: if (doc != NULL) {
8812: newDoc->intSubset = doc->intSubset;
8813: newDoc->extSubset = doc->extSubset;
8814: }
1.176 daniel 8815: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8816: if (newDoc->children == NULL) {
8817: if (sax != NULL)
8818: ctxt->sax = oldsax;
8819: xmlFreeParserCtxt(ctxt);
8820: newDoc->intSubset = NULL;
8821: newDoc->extSubset = NULL;
8822: xmlFreeDoc(newDoc);
8823: return(-1);
8824: }
8825: nodePush(ctxt, newDoc->children);
8826: if (doc == NULL) {
8827: ctxt->myDoc = newDoc;
8828: } else {
8829: ctxt->myDoc = doc;
8830: newDoc->children->doc = doc;
8831: }
8832: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 8833: ctxt->depth = depth;
1.176 daniel 8834:
8835: /*
8836: * Doing validity checking on chunk doesn't make sense
8837: */
8838: ctxt->validate = 0;
8839:
1.175 daniel 8840: xmlParseContent(ctxt);
1.176 daniel 8841:
8842: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8843: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 8844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8845: ctxt->sax->error(ctxt->userData,
8846: "chunk is not well balanced\n");
8847: ctxt->wellFormed = 0;
1.180 daniel 8848: ctxt->disableSAX = 1;
1.176 daniel 8849: } else if (RAW != 0) {
1.230 veillard 8850: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.176 daniel 8851: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8852: ctxt->sax->error(ctxt->userData,
8853: "extra content at the end of well balanced chunk\n");
8854: ctxt->wellFormed = 0;
1.180 daniel 8855: ctxt->disableSAX = 1;
1.176 daniel 8856: }
8857: if (ctxt->node != newDoc->children) {
1.230 veillard 8858: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 8859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8860: ctxt->sax->error(ctxt->userData,
8861: "chunk is not well balanced\n");
8862: ctxt->wellFormed = 0;
1.180 daniel 8863: ctxt->disableSAX = 1;
1.176 daniel 8864: }
1.175 daniel 8865:
1.176 daniel 8866: if (!ctxt->wellFormed) {
8867: if (ctxt->errNo == 0)
8868: ret = 1;
8869: else
8870: ret = ctxt->errNo;
8871: } else {
8872: if (list != NULL) {
8873: xmlNodePtr cur;
1.175 daniel 8874:
1.176 daniel 8875: /*
8876: * Return the newly created nodeset after unlinking it from
8877: * they pseudo parent.
8878: */
8879: cur = newDoc->children->children;
8880: *list = cur;
8881: while (cur != NULL) {
8882: cur->parent = NULL;
8883: cur = cur->next;
8884: }
8885: newDoc->children->children = NULL;
8886: }
8887: ret = 0;
1.175 daniel 8888: }
1.176 daniel 8889: if (sax != NULL)
8890: ctxt->sax = oldsax;
1.175 daniel 8891: xmlFreeParserCtxt(ctxt);
8892: newDoc->intSubset = NULL;
8893: newDoc->extSubset = NULL;
1.176 daniel 8894: xmlFreeDoc(newDoc);
1.175 daniel 8895:
1.176 daniel 8896: return(ret);
1.144 daniel 8897: }
8898:
8899: /**
1.229 veillard 8900: * xmlSAXParseEntity:
8901: * @sax: the SAX handler block
8902: * @filename: the filename
8903: *
8904: * parse an XML external entity out of context and build a tree.
8905: * It use the given SAX function block to handle the parsing callback.
8906: * If sax is NULL, fallback to the default DOM tree building routines.
8907: *
8908: * [78] extParsedEnt ::= TextDecl? content
8909: *
8910: * This correspond to a "Well Balanced" chunk
1.144 daniel 8911: *
1.229 veillard 8912: * Returns the resulting document tree
1.144 daniel 8913: */
8914:
1.229 veillard 8915: xmlDocPtr
8916: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
8917: xmlDocPtr ret;
8918: xmlParserCtxtPtr ctxt;
8919: char *directory = NULL;
8920:
8921: ctxt = xmlCreateFileParserCtxt(filename);
8922: if (ctxt == NULL) {
8923: return(NULL);
8924: }
8925: if (sax != NULL) {
8926: if (ctxt->sax != NULL)
8927: xmlFree(ctxt->sax);
8928: ctxt->sax = sax;
8929: ctxt->userData = NULL;
8930: }
8931:
8932: if ((ctxt->directory == NULL) && (directory == NULL))
8933: directory = xmlParserGetDirectory(filename);
8934:
8935: xmlParseExtParsedEnt(ctxt);
8936:
8937: if (ctxt->wellFormed)
8938: ret = ctxt->myDoc;
8939: else {
8940: ret = NULL;
8941: xmlFreeDoc(ctxt->myDoc);
8942: ctxt->myDoc = NULL;
8943: }
8944: if (sax != NULL)
8945: ctxt->sax = NULL;
8946: xmlFreeParserCtxt(ctxt);
8947:
8948: return(ret);
1.144 daniel 8949: }
8950:
8951: /**
1.229 veillard 8952: * xmlParseEntity:
8953: * @filename: the filename
8954: *
8955: * parse an XML external entity out of context and build a tree.
8956: *
8957: * [78] extParsedEnt ::= TextDecl? content
8958: *
8959: * This correspond to a "Well Balanced" chunk
1.59 daniel 8960: *
1.68 daniel 8961: * Returns the resulting document tree
1.59 daniel 8962: */
8963:
1.69 daniel 8964: xmlDocPtr
1.229 veillard 8965: xmlParseEntity(const char *filename) {
8966: return(xmlSAXParseEntity(NULL, filename));
1.55 daniel 8967: }
8968:
8969: /**
1.181 daniel 8970: * xmlCreateEntityParserCtxt:
8971: * @URL: the entity URL
8972: * @ID: the entity PUBLIC ID
8973: * @base: a posible base for the target URI
8974: *
8975: * Create a parser context for an external entity
8976: * Automatic support for ZLIB/Compress compressed document is provided
8977: * by default if found at compile-time.
8978: *
8979: * Returns the new parser context or NULL
8980: */
8981: xmlParserCtxtPtr
8982: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
8983: const xmlChar *base) {
8984: xmlParserCtxtPtr ctxt;
8985: xmlParserInputPtr inputStream;
8986: char *directory = NULL;
1.210 veillard 8987: xmlChar *uri;
8988:
1.181 daniel 8989: ctxt = xmlNewParserCtxt();
8990: if (ctxt == NULL) {
8991: return(NULL);
8992: }
8993:
1.210 veillard 8994: uri = xmlBuildURI(URL, base);
8995:
8996: if (uri == NULL) {
8997: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
8998: if (inputStream == NULL) {
8999: xmlFreeParserCtxt(ctxt);
9000: return(NULL);
9001: }
9002:
9003: inputPush(ctxt, inputStream);
9004:
9005: if ((ctxt->directory == NULL) && (directory == NULL))
9006: directory = xmlParserGetDirectory((char *)URL);
9007: if ((ctxt->directory == NULL) && (directory != NULL))
9008: ctxt->directory = directory;
9009: } else {
9010: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9011: if (inputStream == NULL) {
9012: xmlFreeParserCtxt(ctxt);
9013: return(NULL);
9014: }
1.181 daniel 9015:
1.210 veillard 9016: inputPush(ctxt, inputStream);
1.181 daniel 9017:
1.210 veillard 9018: if ((ctxt->directory == NULL) && (directory == NULL))
9019: directory = xmlParserGetDirectory((char *)uri);
9020: if ((ctxt->directory == NULL) && (directory != NULL))
9021: ctxt->directory = directory;
9022: xmlFree(uri);
9023: }
1.181 daniel 9024:
9025: return(ctxt);
9026: }
9027:
1.229 veillard 9028: /************************************************************************
9029: * *
9030: * Front ends when parsing from a file *
9031: * *
9032: ************************************************************************/
9033:
1.181 daniel 9034: /**
9035: * xmlCreateFileParserCtxt:
1.50 daniel 9036: * @filename: the filename
9037: *
1.69 daniel 9038: * Create a parser context for a file content.
9039: * Automatic support for ZLIB/Compress compressed document is provided
9040: * by default if found at compile-time.
1.50 daniel 9041: *
1.69 daniel 9042: * Returns the new parser context or NULL
1.9 httpng 9043: */
1.69 daniel 9044: xmlParserCtxtPtr
9045: xmlCreateFileParserCtxt(const char *filename)
9046: {
9047: xmlParserCtxtPtr ctxt;
1.40 daniel 9048: xmlParserInputPtr inputStream;
1.91 daniel 9049: xmlParserInputBufferPtr buf;
1.111 daniel 9050: char *directory = NULL;
1.9 httpng 9051:
1.91 daniel 9052: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.228 veillard 9053: if (buf == NULL) {
9054: return(NULL);
9055: }
1.9 httpng 9056:
1.97 daniel 9057: ctxt = xmlNewParserCtxt();
1.16 daniel 9058: if (ctxt == NULL) {
1.228 veillard 9059: if (xmlDefaultSAXHandler.error != NULL) {
9060: xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9061: }
1.16 daniel 9062: return(NULL);
9063: }
1.97 daniel 9064:
1.96 daniel 9065: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9066: if (inputStream == NULL) {
1.97 daniel 9067: xmlFreeParserCtxt(ctxt);
1.40 daniel 9068: return(NULL);
9069: }
9070:
1.119 daniel 9071: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9072: inputStream->buf = buf;
9073: inputStream->base = inputStream->buf->buffer->content;
9074: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9075:
1.40 daniel 9076: inputPush(ctxt, inputStream);
1.110 daniel 9077: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9078: directory = xmlParserGetDirectory(filename);
9079: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9080: ctxt->directory = directory;
1.106 daniel 9081:
1.69 daniel 9082: return(ctxt);
9083: }
9084:
9085: /**
1.181 daniel 9086: * xmlSAXParseFile:
1.69 daniel 9087: * @sax: the SAX handler block
9088: * @filename: the filename
9089: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9090: * documents
9091: *
9092: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9093: * compressed document is provided by default if found at compile-time.
9094: * It use the given SAX function block to handle the parsing callback.
9095: * If sax is NULL, fallback to the default DOM tree building routines.
9096: *
9097: * Returns the resulting document tree
9098: */
9099:
1.79 daniel 9100: xmlDocPtr
9101: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9102: int recovery) {
9103: xmlDocPtr ret;
1.229 veillard 9104: xmlParserCtxtPtr ctxt;
9105: char *directory = NULL;
9106:
9107: ctxt = xmlCreateFileParserCtxt(filename);
9108: if (ctxt == NULL) {
9109: return(NULL);
9110: }
9111: if (sax != NULL) {
9112: if (ctxt->sax != NULL)
9113: xmlFree(ctxt->sax);
9114: ctxt->sax = sax;
9115: ctxt->userData = NULL;
9116: }
9117:
9118: if ((ctxt->directory == NULL) && (directory == NULL))
9119: directory = xmlParserGetDirectory(filename);
9120: if ((ctxt->directory == NULL) && (directory != NULL))
9121: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9122:
9123: xmlParseDocument(ctxt);
9124:
9125: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9126: else {
9127: ret = NULL;
9128: xmlFreeDoc(ctxt->myDoc);
9129: ctxt->myDoc = NULL;
9130: }
9131: if (sax != NULL)
9132: ctxt->sax = NULL;
9133: xmlFreeParserCtxt(ctxt);
9134:
9135: return(ret);
9136: }
9137:
9138: /**
9139: * xmlRecoverDoc:
9140: * @cur: a pointer to an array of xmlChar
9141: *
9142: * parse an XML in-memory document and build a tree.
9143: * In the case the document is not Well Formed, a tree is built anyway
9144: *
9145: * Returns the resulting document tree
9146: */
9147:
9148: xmlDocPtr
9149: xmlRecoverDoc(xmlChar *cur) {
9150: return(xmlSAXParseDoc(NULL, cur, 1));
9151: }
9152:
9153: /**
9154: * xmlParseFile:
9155: * @filename: the filename
9156: *
9157: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9158: * compressed document is provided by default if found at compile-time.
9159: *
9160: * Returns the resulting document tree
9161: */
9162:
9163: xmlDocPtr
9164: xmlParseFile(const char *filename) {
9165: return(xmlSAXParseFile(NULL, filename, 0));
9166: }
9167:
9168: /**
9169: * xmlRecoverFile:
9170: * @filename: the filename
9171: *
9172: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9173: * compressed document is provided by default if found at compile-time.
9174: * In the case the document is not Well Formed, a tree is built anyway
9175: *
9176: * Returns the resulting document tree
9177: */
9178:
9179: xmlDocPtr
9180: xmlRecoverFile(const char *filename) {
9181: return(xmlSAXParseFile(NULL, filename, 1));
9182: }
9183:
9184:
9185: /**
9186: * xmlSetupParserForBuffer:
9187: * @ctxt: an XML parser context
9188: * @buffer: a xmlChar * buffer
9189: * @filename: a file name
9190: *
9191: * Setup the parser context to parse a new buffer; Clears any prior
9192: * contents from the parser context. The buffer parameter must not be
9193: * NULL, but the filename parameter can be
9194: */
9195: void
9196: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9197: const char* filename)
9198: {
9199: xmlParserInputPtr input;
9200:
9201: input = xmlNewInputStream(ctxt);
9202: if (input == NULL) {
9203: perror("malloc");
9204: xmlFree(ctxt);
9205: return;
9206: }
9207:
9208: xmlClearParserCtxt(ctxt);
9209: if (filename != NULL)
9210: input->filename = xmlMemStrdup(filename);
9211: input->base = buffer;
9212: input->cur = buffer;
9213: inputPush(ctxt, input);
9214: }
9215:
9216: /**
9217: * xmlSAXUserParseFile:
9218: * @sax: a SAX handler
9219: * @user_data: The user data returned on SAX callbacks
9220: * @filename: a file name
9221: *
9222: * parse an XML file and call the given SAX handler routines.
9223: * Automatic support for ZLIB/Compress compressed document is provided
9224: *
9225: * Returns 0 in case of success or a error number otherwise
9226: */
9227: int
9228: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9229: const char *filename) {
9230: int ret = 0;
9231: xmlParserCtxtPtr ctxt;
9232:
9233: ctxt = xmlCreateFileParserCtxt(filename);
9234: if (ctxt == NULL) return -1;
9235: if (ctxt->sax != &xmlDefaultSAXHandler)
9236: xmlFree(ctxt->sax);
9237: ctxt->sax = sax;
9238: if (user_data != NULL)
9239: ctxt->userData = user_data;
9240:
1.16 daniel 9241: xmlParseDocument(ctxt);
1.229 veillard 9242:
9243: if (ctxt->wellFormed)
9244: ret = 0;
1.59 daniel 9245: else {
1.229 veillard 9246: if (ctxt->errNo != 0)
9247: ret = ctxt->errNo;
9248: else
9249: ret = -1;
1.59 daniel 9250: }
1.86 daniel 9251: if (sax != NULL)
1.229 veillard 9252: ctxt->sax = NULL;
1.69 daniel 9253: xmlFreeParserCtxt(ctxt);
1.20 daniel 9254:
1.229 veillard 9255: return ret;
1.20 daniel 9256: }
9257:
1.229 veillard 9258: /************************************************************************
9259: * *
9260: * Front ends when parsing from memory *
9261: * *
9262: ************************************************************************/
1.32 daniel 9263:
1.50 daniel 9264: /**
1.181 daniel 9265: * xmlCreateMemoryParserCtxt:
1.229 veillard 9266: * @buffer: a pointer to a char array
9267: * @size: the size of the array
1.50 daniel 9268: *
1.69 daniel 9269: * Create a parser context for an XML in-memory document.
1.50 daniel 9270: *
1.69 daniel 9271: * Returns the new parser context or NULL
1.20 daniel 9272: */
1.69 daniel 9273: xmlParserCtxtPtr
9274: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9275: xmlParserCtxtPtr ctxt;
1.40 daniel 9276: xmlParserInputPtr input;
1.209 veillard 9277: xmlParserInputBufferPtr buf;
1.40 daniel 9278:
1.229 veillard 9279: if (buffer == NULL)
9280: return(NULL);
9281: if (size <= 0)
1.181 daniel 9282: return(NULL);
1.40 daniel 9283:
1.97 daniel 9284: ctxt = xmlNewParserCtxt();
1.181 daniel 9285: if (ctxt == NULL)
1.20 daniel 9286: return(NULL);
1.97 daniel 9287:
1.209 veillard 9288: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9289: if (buf == NULL) return(NULL);
9290:
1.96 daniel 9291: input = xmlNewInputStream(ctxt);
1.40 daniel 9292: if (input == NULL) {
1.97 daniel 9293: xmlFreeParserCtxt(ctxt);
1.40 daniel 9294: return(NULL);
9295: }
1.20 daniel 9296:
1.40 daniel 9297: input->filename = NULL;
1.209 veillard 9298: input->buf = buf;
9299: input->base = input->buf->buffer->content;
9300: input->cur = input->buf->buffer->content;
1.20 daniel 9301:
1.40 daniel 9302: inputPush(ctxt, input);
1.69 daniel 9303: return(ctxt);
9304: }
9305:
9306: /**
1.181 daniel 9307: * xmlSAXParseMemory:
1.69 daniel 9308: * @sax: the SAX handler block
9309: * @buffer: an pointer to a char array
1.127 daniel 9310: * @size: the size of the array
9311: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9312: * documents
9313: *
9314: * parse an XML in-memory block and use the given SAX function block
9315: * to handle the parsing callback. If sax is NULL, fallback to the default
9316: * DOM tree building routines.
9317: *
9318: * Returns the resulting document tree
9319: */
9320: xmlDocPtr
9321: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9322: xmlDocPtr ret;
9323: xmlParserCtxtPtr ctxt;
9324:
9325: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9326: if (ctxt == NULL) return(NULL);
1.74 daniel 9327: if (sax != NULL) {
9328: ctxt->sax = sax;
9329: ctxt->userData = NULL;
9330: }
1.20 daniel 9331:
9332: xmlParseDocument(ctxt);
1.40 daniel 9333:
1.72 daniel 9334: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9335: else {
9336: ret = NULL;
1.72 daniel 9337: xmlFreeDoc(ctxt->myDoc);
9338: ctxt->myDoc = NULL;
1.59 daniel 9339: }
1.86 daniel 9340: if (sax != NULL)
9341: ctxt->sax = NULL;
1.69 daniel 9342: xmlFreeParserCtxt(ctxt);
1.16 daniel 9343:
1.9 httpng 9344: return(ret);
1.17 daniel 9345: }
9346:
1.55 daniel 9347: /**
1.181 daniel 9348: * xmlParseMemory:
1.68 daniel 9349: * @buffer: an pointer to a char array
1.55 daniel 9350: * @size: the size of the array
9351: *
9352: * parse an XML in-memory block and build a tree.
9353: *
1.68 daniel 9354: * Returns the resulting document tree
1.55 daniel 9355: */
9356:
9357: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9358: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9359: }
9360:
9361: /**
1.181 daniel 9362: * xmlRecoverMemory:
1.68 daniel 9363: * @buffer: an pointer to a char array
1.59 daniel 9364: * @size: the size of the array
9365: *
9366: * parse an XML in-memory block and build a tree.
9367: * In the case the document is not Well Formed, a tree is built anyway
9368: *
1.68 daniel 9369: * Returns the resulting document tree
1.59 daniel 9370: */
9371:
9372: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9373: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9374: }
9375:
1.123 daniel 9376: /**
9377: * xmlSAXUserParseMemory:
9378: * @sax: a SAX handler
9379: * @user_data: The user data returned on SAX callbacks
9380: * @buffer: an in-memory XML document input
1.127 daniel 9381: * @size: the length of the XML document in bytes
1.123 daniel 9382: *
9383: * A better SAX parsing routine.
9384: * parse an XML in-memory buffer and call the given SAX handler routines.
9385: *
9386: * Returns 0 in case of success or a error number otherwise
9387: */
9388: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9389: char *buffer, int size) {
9390: int ret = 0;
9391: xmlParserCtxtPtr ctxt;
1.218 veillard 9392: xmlSAXHandlerPtr oldsax = NULL;
1.123 daniel 9393:
9394: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9395: if (ctxt == NULL) return -1;
1.216 veillard 9396: if (sax != NULL) {
9397: oldsax = ctxt->sax;
9398: ctxt->sax = sax;
9399: }
1.123 daniel 9400: ctxt->userData = user_data;
9401:
9402: xmlParseDocument(ctxt);
9403:
9404: if (ctxt->wellFormed)
9405: ret = 0;
9406: else {
9407: if (ctxt->errNo != 0)
9408: ret = ctxt->errNo;
9409: else
9410: ret = -1;
9411: }
1.216 veillard 9412: if (sax != NULL) {
9413: ctxt->sax = oldsax;
9414: }
1.123 daniel 9415: xmlFreeParserCtxt(ctxt);
9416:
9417: return ret;
9418: }
9419:
1.132 daniel 9420: /**
1.229 veillard 9421: * xmlCreateDocParserCtxt:
9422: * @cur: a pointer to an array of xmlChar
9423: *
9424: * Creates a parser context for an XML in-memory document.
1.132 daniel 9425: *
1.229 veillard 9426: * Returns the new parser context or NULL
1.132 daniel 9427: */
1.229 veillard 9428: xmlParserCtxtPtr
9429: xmlCreateDocParserCtxt(xmlChar *cur) {
9430: int len;
1.132 daniel 9431:
1.229 veillard 9432: if (cur == NULL)
9433: return(NULL);
9434: len = xmlStrlen(cur);
9435: return(xmlCreateMemoryParserCtxt((char *)cur, len));
1.132 daniel 9436: }
1.98 daniel 9437:
1.50 daniel 9438: /**
1.229 veillard 9439: * xmlSAXParseDoc:
9440: * @sax: the SAX handler block
9441: * @cur: a pointer to an array of xmlChar
9442: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9443: * documents
1.50 daniel 9444: *
1.229 veillard 9445: * parse an XML in-memory document and build a tree.
9446: * It use the given SAX function block to handle the parsing callback.
9447: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 9448: *
1.229 veillard 9449: * Returns the resulting document tree
1.32 daniel 9450: */
9451:
1.229 veillard 9452: xmlDocPtr
9453: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9454: xmlDocPtr ret;
9455: xmlParserCtxtPtr ctxt;
9456:
9457: if (cur == NULL) return(NULL);
1.32 daniel 9458:
9459:
1.229 veillard 9460: ctxt = xmlCreateDocParserCtxt(cur);
9461: if (ctxt == NULL) return(NULL);
9462: if (sax != NULL) {
9463: ctxt->sax = sax;
9464: ctxt->userData = NULL;
9465: }
1.32 daniel 9466:
1.229 veillard 9467: xmlParseDocument(ctxt);
9468: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9469: else {
9470: ret = NULL;
9471: xmlFreeDoc(ctxt->myDoc);
9472: ctxt->myDoc = NULL;
9473: }
9474: if (sax != NULL)
9475: ctxt->sax = NULL;
9476: xmlFreeParserCtxt(ctxt);
9477:
9478: return(ret);
1.32 daniel 9479: }
9480:
1.50 daniel 9481: /**
1.229 veillard 9482: * xmlParseDoc:
9483: * @cur: a pointer to an array of xmlChar
1.50 daniel 9484: *
1.229 veillard 9485: * parse an XML in-memory document and build a tree.
1.50 daniel 9486: *
1.229 veillard 9487: * Returns the resulting document tree
1.32 daniel 9488: */
9489:
1.229 veillard 9490: xmlDocPtr
9491: xmlParseDoc(xmlChar *cur) {
9492: return(xmlSAXParseDoc(NULL, cur, 0));
9493: }
1.32 daniel 9494:
9495:
1.229 veillard 9496: /************************************************************************
9497: * *
9498: * Miscellaneous *
9499: * *
9500: ************************************************************************/
1.32 daniel 9501:
1.237 ! veillard 9502: #ifdef LIBXML_XPATH_ENABLED
! 9503: #include <libxml/xpath.h>
! 9504: #endif
! 9505:
1.235 veillard 9506: static int xmlParserInitialized = 0;
9507:
9508: /**
9509: * xmlInitParser:
9510: *
9511: * Initialization function for the XML parser.
9512: * This is not reentrant. Call once before processing in case of
9513: * use in multithreaded programs.
9514: */
9515:
9516: void
9517: xmlInitParser(void) {
9518: if (xmlParserInitialized) return;
9519:
9520: xmlInitCharEncodingHandlers();
9521: xmlInitializePredefinedEntities();
9522: xmlDefaultSAXHandlerInit();
1.237 ! veillard 9523: xmlRegisterDefaultInputCallbacks();
! 9524: xmlRegisterDefaultOutputCallbacks();
1.235 veillard 9525: #ifdef LIBXML_HTML_ENABLED
9526: htmlInitAutoClose();
9527: htmlDefaultSAXHandlerInit();
1.237 ! veillard 9528: #endif
! 9529: #ifdef LIBXML_XPATH_ENABLED
! 9530: xmlXPathInit();
1.235 veillard 9531: #endif
9532: xmlParserInitialized = 1;
9533: }
9534:
1.50 daniel 9535: /**
1.229 veillard 9536: * xmlCleanupParser:
1.50 daniel 9537: *
1.229 veillard 9538: * Cleanup function for the XML parser. It tries to reclaim all
9539: * parsing related global memory allocated for the parser processing.
9540: * It doesn't deallocate any document related memory. Calling this
9541: * function should not prevent reusing the parser.
1.32 daniel 9542: */
1.229 veillard 9543:
1.55 daniel 9544: void
1.229 veillard 9545: xmlCleanupParser(void) {
1.235 veillard 9546: xmlParserInitialized = 0;
1.229 veillard 9547: xmlCleanupCharEncodingHandlers();
9548: xmlCleanupPredefinedEntities();
1.32 daniel 9549: }
1.220 veillard 9550:
9551: /**
9552: * xmlPedanticParserDefault:
9553: * @val: int 0 or 1
9554: *
9555: * Set and return the previous value for enabling pedantic warnings.
9556: *
9557: * Returns the last value for 0 for no substitution, 1 for substitution.
9558: */
9559:
9560: int
9561: xmlPedanticParserDefault(int val) {
9562: int old = xmlPedanticParserDefaultValue;
9563:
9564: xmlPedanticParserDefaultValue = val;
9565: return(old);
9566: }
1.98 daniel 9567:
9568: /**
1.181 daniel 9569: * xmlSubstituteEntitiesDefault:
1.98 daniel 9570: * @val: int 0 or 1
9571: *
9572: * Set and return the previous value for default entity support.
9573: * Initially the parser always keep entity references instead of substituting
9574: * entity values in the output. This function has to be used to change the
9575: * default parser behaviour
9576: * SAX::subtituteEntities() has to be used for changing that on a file by
9577: * file basis.
9578: *
9579: * Returns the last value for 0 for no substitution, 1 for substitution.
9580: */
9581:
9582: int
9583: xmlSubstituteEntitiesDefault(int val) {
9584: int old = xmlSubstituteEntitiesDefaultValue;
9585:
9586: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 9587: return(old);
9588: }
9589:
9590: /**
9591: * xmlKeepBlanksDefault:
9592: * @val: int 0 or 1
9593: *
9594: * Set and return the previous value for default blanks text nodes support.
9595: * The 1.x version of the parser used an heuristic to try to detect
9596: * ignorable white spaces. As a result the SAX callback was generating
9597: * ignorableWhitespace() callbacks instead of characters() one, and when
9598: * using the DOM output text nodes containing those blanks were not generated.
9599: * The 2.x and later version will switch to the XML standard way and
9600: * ignorableWhitespace() are only generated when running the parser in
9601: * validating mode and when the current element doesn't allow CDATA or
9602: * mixed content.
9603: * This function is provided as a way to force the standard behaviour
9604: * on 1.X libs and to switch back to the old mode for compatibility when
9605: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9606: * by using xmlIsBlankNode() commodity function to detect the "empty"
9607: * nodes generated.
9608: * This value also affect autogeneration of indentation when saving code
9609: * if blanks sections are kept, indentation is not generated.
9610: *
9611: * Returns the last value for 0 for no substitution, 1 for substitution.
9612: */
9613:
9614: int
9615: xmlKeepBlanksDefault(int val) {
9616: int old = xmlKeepBlanksDefaultValue;
9617:
9618: xmlKeepBlanksDefaultValue = val;
9619: xmlIndentTreeOutput = !val;
1.98 daniel 9620: return(old);
9621: }
1.77 daniel 9622:
Webmaster