Annotation of XML/parser.c, revision 1.243
1.1 veillard 1: /*
1.229 veillard 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
1.15 veillard 4: *
1.222 veillard 5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
1.229 veillard 13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscelaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAx callbacks or as standalones functions using a preparsed
26: * document.
27: *
1.15 veillard 28: * See Copyright for the status of this software.
29: *
1.60 daniel 30: * Daniel.Veillard@w3.org
1.1 veillard 31: */
32:
1.26 daniel 33: #ifdef WIN32
1.138 daniel 34: #include "win32config.h"
1.226 veillard 35: #define XML_DIR_SEP '\\'
1.26 daniel 36: #else
1.121 daniel 37: #include "config.h"
1.226 veillard 38: #define XML_DIR_SEP '/'
1.26 daniel 39: #endif
1.121 daniel 40:
1.1 veillard 41: #include <stdio.h>
1.238 veillard 42: #include <stdlib.h>
1.204 veillard 43: #include <string.h>
1.238 veillard 44: #include <libxml/xmlmemory.h>
45: #include <libxml/tree.h>
46: #include <libxml/parser.h>
47: #include <libxml/parserInternals.h>
48: #include <libxml/valid.h>
49: #include <libxml/entities.h>
50: #include <libxml/xmlerror.h>
51: #include <libxml/encoding.h>
52: #include <libxml/xmlIO.h>
53: #include <libxml/uri.h>
54:
1.121 daniel 55: #ifdef HAVE_CTYPE_H
1.1 veillard 56: #include <ctype.h>
1.121 daniel 57: #endif
58: #ifdef HAVE_STDLIB_H
1.50 daniel 59: #include <stdlib.h>
1.121 daniel 60: #endif
61: #ifdef HAVE_SYS_STAT_H
1.9 httpng 62: #include <sys/stat.h>
1.121 daniel 63: #endif
1.9 httpng 64: #ifdef HAVE_FCNTL_H
65: #include <fcntl.h>
66: #endif
1.10 httpng 67: #ifdef HAVE_UNISTD_H
68: #include <unistd.h>
69: #endif
1.20 daniel 70: #ifdef HAVE_ZLIB_H
71: #include <zlib.h>
72: #endif
1.1 veillard 73:
74:
1.140 daniel 75: #define XML_PARSER_BIG_BUFFER_SIZE 1000
76: #define XML_PARSER_BUFFER_SIZE 100
77:
1.229 veillard 78: /*
79: * Various global defaults for parsing
80: */
1.160 daniel 81: int xmlGetWarningsDefaultValue = 1;
1.220 veillard 82: int xmlParserDebugEntities = 0;
1.229 veillard 83: int xmlSubstituteEntitiesDefaultValue = 0;
84: int xmlDoValidityCheckingDefaultValue = 0;
85: int xmlPedanticParserDefaultValue = 0;
86: int xmlKeepBlanksDefaultValue = 1;
1.86 daniel 87:
1.139 daniel 88: /*
89: * List of XML prefixed PI allowed by W3C specs
90: */
91:
92: const char *xmlW3CPIs[] = {
93: "xml-stylesheet",
94: NULL
95: };
1.91 daniel 96:
1.229 veillard 97: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
1.151 daniel 98: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
99: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
100: const xmlChar **str);
1.91 daniel 101:
102:
1.45 daniel 103: /************************************************************************
104: * *
105: * Parser stacks related functions and macros *
106: * *
107: ************************************************************************/
1.79 daniel 108:
1.135 daniel 109: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
110: const xmlChar ** str);
1.79 daniel 111:
1.1 veillard 112: /*
1.40 daniel 113: * Generic function for accessing stacks in the Parser Context
1.1 veillard 114: */
115:
1.140 daniel 116: #define PUSH_AND_POP(scope, type, name) \
117: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 118: if (ctxt->name##Nr >= ctxt->name##Max) { \
119: ctxt->name##Max *= 2; \
1.204 veillard 120: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 121: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
122: if (ctxt->name##Tab == NULL) { \
1.241 veillard 123: xmlGenericError(xmlGenericErrorContext, \
124: "realloc failed !\n"); \
1.145 daniel 125: return(0); \
1.31 daniel 126: } \
127: } \
1.40 daniel 128: ctxt->name##Tab[ctxt->name##Nr] = value; \
129: ctxt->name = value; \
130: return(ctxt->name##Nr++); \
1.31 daniel 131: } \
1.140 daniel 132: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 133: type ret; \
1.40 daniel 134: if (ctxt->name##Nr <= 0) return(0); \
135: ctxt->name##Nr--; \
1.50 daniel 136: if (ctxt->name##Nr > 0) \
137: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
138: else \
139: ctxt->name = NULL; \
1.69 daniel 140: ret = ctxt->name##Tab[ctxt->name##Nr]; \
141: ctxt->name##Tab[ctxt->name##Nr] = 0; \
142: return(ret); \
1.31 daniel 143: } \
144:
1.229 veillard 145: /*
146: * Those macros actually generate the functions
147: */
1.140 daniel 148: PUSH_AND_POP(extern, xmlParserInputPtr, input)
149: PUSH_AND_POP(extern, xmlNodePtr, node)
150: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 151:
1.176 daniel 152: int spacePush(xmlParserCtxtPtr ctxt, int val) {
153: if (ctxt->spaceNr >= ctxt->spaceMax) {
154: ctxt->spaceMax *= 2;
1.204 veillard 155: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 156: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
157: if (ctxt->spaceTab == NULL) {
1.241 veillard 158: xmlGenericError(xmlGenericErrorContext,
159: "realloc failed !\n");
1.176 daniel 160: return(0);
161: }
162: }
163: ctxt->spaceTab[ctxt->spaceNr] = val;
164: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
165: return(ctxt->spaceNr++);
166: }
167:
168: int spacePop(xmlParserCtxtPtr ctxt) {
169: int ret;
170: if (ctxt->spaceNr <= 0) return(0);
171: ctxt->spaceNr--;
172: if (ctxt->spaceNr > 0)
173: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
174: else
175: ctxt->space = NULL;
176: ret = ctxt->spaceTab[ctxt->spaceNr];
177: ctxt->spaceTab[ctxt->spaceNr] = -1;
178: return(ret);
179: }
180:
1.55 daniel 181: /*
182: * Macros for accessing the content. Those should be used only by the parser,
183: * and not exported.
184: *
1.229 veillard 185: * Dirty macros, i.e. one often need to make assumption on the context to
186: * use them
1.55 daniel 187: *
1.123 daniel 188: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 189: * To be used with extreme caution since operations consuming
190: * characters may move the input buffer to a different location !
1.123 daniel 191: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.151 daniel 192: * This should be used internally by the parser
1.55 daniel 193: * only to compare to ASCII values otherwise it would break when
194: * running with UTF-8 encoding.
1.229 veillard 195: * RAW same as CUR but in the input buffer, bypass any token
196: * extraction that may have been done
1.123 daniel 197: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 198: * to compare on ASCII based substring.
1.123 daniel 199: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 200: * strings within the parser.
201: *
1.77 daniel 202: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 203: *
204: * NEXT Skip to the next character, this does the proper decoding
205: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.229 veillard 206: * NEXTL(l) Skip l xmlChars in the input buffer
207: * CUR_CHAR(l) returns the current unicode character (int), set l
208: * to the number of xmlChars used for the encoding [0-5].
209: * CUR_SCHAR same but operate on a string instead of the context
210: * COPY_BUF copy the current unicode char to the target buffer, increment
211: * the index
212: * GROW, SHRINK handling of input buffers
1.55 daniel 213: */
1.45 daniel 214:
1.152 daniel 215: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 216: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 217: #define NXT(val) ctxt->input->cur[(val)]
218: #define CUR_PTR ctxt->input->cur
1.154 daniel 219:
1.240 veillard 220: #define SKIP(val) do { \
221: ctxt->nbChars += (val),ctxt->input->cur += (val); \
1.164 daniel 222: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 223: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
1.168 daniel 224: if ((*ctxt->input->cur == 0) && \
225: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 226: xmlPopInput(ctxt); \
227: } while (0)
1.164 daniel 228:
1.240 veillard 229: #define SHRINK do { \
230: xmlParserInputShrink(ctxt->input); \
1.97 daniel 231: if ((*ctxt->input->cur == 0) && \
232: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 233: xmlPopInput(ctxt); \
234: } while (0)
1.97 daniel 235:
1.240 veillard 236: #define GROW do { \
237: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.97 daniel 238: if ((*ctxt->input->cur == 0) && \
239: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 veillard 240: xmlPopInput(ctxt); \
241: } while (0)
1.55 daniel 242:
1.240 veillard 243: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1.154 daniel 244:
1.240 veillard 245: #define NEXT xmlNextChar(ctxt)
1.154 daniel 246:
1.240 veillard 247: #define NEXTL(l) do { \
1.153 daniel 248: if (*(ctxt->input->cur) == '\n') { \
249: ctxt->input->line++; ctxt->input->col = 1; \
250: } else ctxt->input->col++; \
1.154 daniel 251: ctxt->token = 0; ctxt->input->cur += l; \
252: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.240 veillard 253: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
254: } while (0)
1.154 daniel 255:
1.240 veillard 256: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
257: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1.154 daniel 258:
1.152 daniel 259: #define COPY_BUF(l,b,i,v) \
260: if (l == 1) b[i++] = (xmlChar) v; \
1.240 veillard 261: else i += xmlCopyChar(l,&b[i],v)
1.151 daniel 262:
263: /**
1.229 veillard 264: * xmlSkipBlankChars:
1.151 daniel 265: * @ctxt: the XML parser context
266: *
1.229 veillard 267: * skip all blanks character found at that point in the input streams.
268: * It pops up finished entities in the process if allowable at that point.
269: *
270: * Returns the number of space chars skipped
1.151 daniel 271: */
1.55 daniel 272:
1.229 veillard 273: int
274: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
275: int cur, res = 0;
1.201 daniel 276:
1.176 daniel 277: /*
1.229 veillard 278: * It's Okay to use CUR/NEXT here since all the blanks are on
279: * the ASCII range.
280: */
281: do {
282: cur = CUR;
283: while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
284: NEXT;
285: cur = CUR;
286: res++;
1.151 daniel 287: }
1.229 veillard 288: while ((cur == 0) && (ctxt->inputNr > 1) &&
289: (ctxt->instate != XML_PARSER_COMMENT)) {
1.168 daniel 290: xmlPopInput(ctxt);
1.229 veillard 291: cur = CUR;
292: }
1.222 veillard 293: /*
294: * Need to handle support of entities branching here
295: */
1.155 daniel 296: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1.229 veillard 297: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.222 veillard 298: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1.155 daniel 299: return(res);
1.152 daniel 300: }
301:
1.97 daniel 302: /************************************************************************
303: * *
1.229 veillard 304: * Commodity functions to handle entities *
1.97 daniel 305: * *
306: ************************************************************************/
1.40 daniel 307:
1.50 daniel 308: /**
309: * xmlPopInput:
310: * @ctxt: an XML parser context
311: *
1.40 daniel 312: * xmlPopInput: the current input pointed by ctxt->input came to an end
313: * pop it and return the next char.
1.45 daniel 314: *
1.123 daniel 315: * Returns the current xmlChar in the parser context
1.40 daniel 316: */
1.123 daniel 317: xmlChar
1.55 daniel 318: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 319: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.220 veillard 320: if (xmlParserDebugEntities)
1.241 veillard 321: xmlGenericError(xmlGenericErrorContext,
322: "Popping input %d\n", ctxt->inputNr);
1.69 daniel 323: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 324: if ((*ctxt->input->cur == 0) &&
325: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
326: return(xmlPopInput(ctxt));
1.40 daniel 327: return(CUR);
328: }
329:
1.50 daniel 330: /**
1.229 veillard 331: * xmlPushInput:
1.174 daniel 332: * @ctxt: an XML parser context
1.229 veillard 333: * @input: an XML parser input fragment (entity, XML fragment ...).
1.174 daniel 334: *
1.229 veillard 335: * xmlPushInput: switch to a new input stream which is stacked on top
336: * of the previous one(s).
1.174 daniel 337: */
1.229 veillard 338: void
339: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
340: if (input == NULL) return;
1.174 daniel 341:
1.229 veillard 342: if (xmlParserDebugEntities) {
343: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 344: xmlGenericError(xmlGenericErrorContext,
345: "%s(%d): ", ctxt->input->filename,
1.229 veillard 346: ctxt->input->line);
1.241 veillard 347: xmlGenericError(xmlGenericErrorContext,
348: "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1.229 veillard 349: }
350: inputPush(ctxt, input);
351: GROW;
1.174 daniel 352: }
1.97 daniel 353:
354: /**
355: * xmlParseCharRef:
356: * @ctxt: an XML parser context
357: *
358: * parse Reference declarations
359: *
360: * [66] CharRef ::= '&#' [0-9]+ ';' |
361: * '&#x' [0-9a-fA-F]+ ';'
362: *
1.98 daniel 363: * [ WFC: Legal Character ]
364: * Characters referred to using character references must match the
365: * production for Char.
366: *
1.135 daniel 367: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 368: */
1.97 daniel 369: int
370: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
371: int val = 0;
1.222 veillard 372: int count = 0;
1.97 daniel 373:
1.111 daniel 374: if (ctxt->token != 0) {
375: val = ctxt->token;
376: ctxt->token = 0;
377: return(val);
378: }
1.222 veillard 379: /*
380: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
381: */
1.152 daniel 382: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 383: (NXT(2) == 'x')) {
384: SKIP(3);
1.222 veillard 385: GROW;
386: while (RAW != ';') { /* loop blocked by count */
387: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 388: val = val * 16 + (CUR - '0');
1.222 veillard 389: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1.97 daniel 390: val = val * 16 + (CUR - 'a') + 10;
1.222 veillard 391: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1.97 daniel 392: val = val * 16 + (CUR - 'A') + 10;
393: else {
1.123 daniel 394: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
396: ctxt->sax->error(ctxt->userData,
397: "xmlParseCharRef: invalid hexadecimal value\n");
398: ctxt->wellFormed = 0;
1.180 daniel 399: ctxt->disableSAX = 1;
1.97 daniel 400: val = 0;
401: break;
402: }
403: NEXT;
1.222 veillard 404: count++;
1.97 daniel 405: }
1.164 daniel 406: if (RAW == ';') {
407: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
408: ctxt->nbChars ++;
409: ctxt->input->cur++;
410: }
1.152 daniel 411: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 412: SKIP(2);
1.222 veillard 413: GROW;
414: while (RAW != ';') { /* loop blocked by count */
415: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 416: val = val * 10 + (CUR - '0');
417: else {
1.123 daniel 418: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 419: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
420: ctxt->sax->error(ctxt->userData,
421: "xmlParseCharRef: invalid decimal value\n");
422: ctxt->wellFormed = 0;
1.180 daniel 423: ctxt->disableSAX = 1;
1.97 daniel 424: val = 0;
425: break;
426: }
427: NEXT;
1.222 veillard 428: count++;
1.97 daniel 429: }
1.164 daniel 430: if (RAW == ';') {
431: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
432: ctxt->nbChars ++;
433: ctxt->input->cur++;
434: }
1.97 daniel 435: } else {
1.123 daniel 436: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 438: ctxt->sax->error(ctxt->userData,
439: "xmlParseCharRef: invalid value\n");
1.97 daniel 440: ctxt->wellFormed = 0;
1.180 daniel 441: ctxt->disableSAX = 1;
1.97 daniel 442: }
1.229 veillard 443:
444: /*
445: * [ WFC: Legal Character ]
446: * Characters referred to using character references must match the
447: * production for Char.
448: */
449: if (IS_CHAR(val)) {
450: return(val);
451: } else {
452: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 454: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
455: val);
1.97 daniel 456: ctxt->wellFormed = 0;
1.180 daniel 457: ctxt->disableSAX = 1;
1.97 daniel 458: }
1.229 veillard 459: return(0);
460: }
461:
462: /**
463: * xmlParseStringCharRef:
464: * @ctxt: an XML parser context
465: * @str: a pointer to an index in the string
466: *
467: * parse Reference declarations, variant parsing from a string rather
468: * than an an input flow.
469: *
470: * [66] CharRef ::= '&#' [0-9]+ ';' |
471: * '&#x' [0-9a-fA-F]+ ';'
472: *
473: * [ WFC: Legal Character ]
474: * Characters referred to using character references must match the
475: * production for Char.
476: *
477: * Returns the value parsed (as an int), 0 in case of error, str will be
478: * updated to the current value of the index
479: */
480: int
481: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
482: const xmlChar *ptr;
483: xmlChar cur;
484: int val = 0;
1.98 daniel 485:
1.229 veillard 486: if ((str == NULL) || (*str == NULL)) return(0);
487: ptr = *str;
488: cur = *ptr;
489: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
490: ptr += 3;
491: cur = *ptr;
492: while (cur != ';') { /* Non input consuming loop */
493: if ((cur >= '0') && (cur <= '9'))
494: val = val * 16 + (cur - '0');
495: else if ((cur >= 'a') && (cur <= 'f'))
496: val = val * 16 + (cur - 'a') + 10;
497: else if ((cur >= 'A') && (cur <= 'F'))
498: val = val * 16 + (cur - 'A') + 10;
499: else {
500: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
501: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
502: ctxt->sax->error(ctxt->userData,
503: "xmlParseStringCharRef: invalid hexadecimal value\n");
504: ctxt->wellFormed = 0;
505: ctxt->disableSAX = 1;
506: val = 0;
507: break;
508: }
509: ptr++;
510: cur = *ptr;
511: }
512: if (cur == ';')
513: ptr++;
514: } else if ((cur == '&') && (ptr[1] == '#')){
515: ptr += 2;
516: cur = *ptr;
517: while (cur != ';') { /* Non input consuming loops */
518: if ((cur >= '0') && (cur <= '9'))
519: val = val * 10 + (cur - '0');
520: else {
521: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
522: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
523: ctxt->sax->error(ctxt->userData,
524: "xmlParseStringCharRef: invalid decimal value\n");
525: ctxt->wellFormed = 0;
526: ctxt->disableSAX = 1;
527: val = 0;
528: break;
529: }
530: ptr++;
531: cur = *ptr;
532: }
533: if (cur == ';')
534: ptr++;
535: } else {
536: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 538: ctxt->sax->error(ctxt->userData,
539: "xmlParseCharRef: invalid value\n");
1.97 daniel 540: ctxt->wellFormed = 0;
1.180 daniel 541: ctxt->disableSAX = 1;
1.229 veillard 542: return(0);
1.97 daniel 543: }
1.229 veillard 544: *str = ptr;
1.98 daniel 545:
546: /*
1.229 veillard 547: * [ WFC: Legal Character ]
548: * Characters referred to using character references must match the
549: * production for Char.
1.98 daniel 550: */
1.229 veillard 551: if (IS_CHAR(val)) {
552: return(val);
553: } else {
554: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.98 daniel 555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 556: ctxt->sax->error(ctxt->userData,
557: "CharRef: invalid xmlChar value %d\n", val);
1.98 daniel 558: ctxt->wellFormed = 0;
1.180 daniel 559: ctxt->disableSAX = 1;
1.98 daniel 560: }
1.229 veillard 561: return(0);
1.96 daniel 562: }
563:
564: /**
565: * xmlParserHandlePEReference:
566: * @ctxt: the parser context
567: *
568: * [69] PEReference ::= '%' Name ';'
569: *
1.98 daniel 570: * [ WFC: No Recursion ]
1.229 veillard 571: * A parsed entity must not contain a recursive
1.98 daniel 572: * reference to itself, either directly or indirectly.
573: *
574: * [ WFC: Entity Declared ]
575: * In a document without any DTD, a document with only an internal DTD
576: * subset which contains no parameter entity references, or a document
577: * with "standalone='yes'", ... ... The declaration of a parameter
578: * entity must precede any reference to it...
579: *
580: * [ VC: Entity Declared ]
581: * In a document with an external subset or external parameter entities
582: * with "standalone='no'", ... ... The declaration of a parameter entity
583: * must precede any reference to it...
584: *
585: * [ WFC: In DTD ]
586: * Parameter-entity references may only appear in the DTD.
587: * NOTE: misleading but this is handled.
588: *
589: * A PEReference may have been detected in the current input stream
1.96 daniel 590: * the handling is done accordingly to
591: * http://www.w3.org/TR/REC-xml#entproc
592: * i.e.
593: * - Included in literal in entity values
594: * - Included as Paraemeter Entity reference within DTDs
595: */
596: void
597: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 598: xmlChar *name;
1.96 daniel 599: xmlEntityPtr entity = NULL;
600: xmlParserInputPtr input;
601:
1.126 daniel 602: if (ctxt->token != 0) {
603: return;
604: }
1.152 daniel 605: if (RAW != '%') return;
1.96 daniel 606: switch(ctxt->instate) {
1.109 daniel 607: case XML_PARSER_CDATA_SECTION:
608: return;
1.97 daniel 609: case XML_PARSER_COMMENT:
610: return;
1.140 daniel 611: case XML_PARSER_START_TAG:
612: return;
613: case XML_PARSER_END_TAG:
614: return;
1.96 daniel 615: case XML_PARSER_EOF:
1.123 daniel 616: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
618: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
619: ctxt->wellFormed = 0;
1.180 daniel 620: ctxt->disableSAX = 1;
1.96 daniel 621: return;
622: case XML_PARSER_PROLOG:
1.140 daniel 623: case XML_PARSER_START:
624: case XML_PARSER_MISC:
1.123 daniel 625: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 626: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
627: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
628: ctxt->wellFormed = 0;
1.180 daniel 629: ctxt->disableSAX = 1;
1.96 daniel 630: return;
1.97 daniel 631: case XML_PARSER_ENTITY_DECL:
1.96 daniel 632: case XML_PARSER_CONTENT:
633: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 634: case XML_PARSER_PI:
1.168 daniel 635: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 636: /* we just ignore it there */
637: return;
638: case XML_PARSER_EPILOG:
1.123 daniel 639: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 641: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 642: ctxt->wellFormed = 0;
1.180 daniel 643: ctxt->disableSAX = 1;
1.96 daniel 644: return;
1.97 daniel 645: case XML_PARSER_ENTITY_VALUE:
646: /*
647: * NOTE: in the case of entity values, we don't do the
1.127 daniel 648: * substitution here since we need the literal
1.97 daniel 649: * entity value to be able to save the internal
650: * subset of the document.
1.222 veillard 651: * This will be handled by xmlStringDecodeEntities
1.97 daniel 652: */
653: return;
1.96 daniel 654: case XML_PARSER_DTD:
1.98 daniel 655: /*
656: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
657: * In the internal DTD subset, parameter-entity references
658: * can occur only where markup declarations can occur, not
659: * within markup declarations.
660: * In that case this is handled in xmlParseMarkupDecl
661: */
662: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
663: return;
1.96 daniel 664: }
665:
666: NEXT;
667: name = xmlParseName(ctxt);
1.220 veillard 668: if (xmlParserDebugEntities)
1.241 veillard 669: xmlGenericError(xmlGenericErrorContext,
670: "PE Reference: %s\n", name);
1.96 daniel 671: if (name == NULL) {
1.123 daniel 672: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
674: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
675: ctxt->wellFormed = 0;
1.180 daniel 676: ctxt->disableSAX = 1;
1.96 daniel 677: } else {
1.152 daniel 678: if (RAW == ';') {
1.96 daniel 679: NEXT;
1.98 daniel 680: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
681: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 682: if (entity == NULL) {
1.98 daniel 683:
684: /*
685: * [ WFC: Entity Declared ]
686: * In a document without any DTD, a document with only an
687: * internal DTD subset which contains no parameter entity
688: * references, or a document with "standalone='yes'", ...
689: * ... The declaration of a parameter entity must precede
690: * any reference to it...
691: */
692: if ((ctxt->standalone == 1) ||
693: ((ctxt->hasExternalSubset == 0) &&
694: (ctxt->hasPErefs == 0))) {
695: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
696: ctxt->sax->error(ctxt->userData,
697: "PEReference: %%%s; not found\n", name);
698: ctxt->wellFormed = 0;
1.180 daniel 699: ctxt->disableSAX = 1;
1.98 daniel 700: } else {
701: /*
702: * [ VC: Entity Declared ]
703: * In a document with an external subset or external
704: * parameter entities with "standalone='no'", ...
705: * ... The declaration of a parameter entity must precede
706: * any reference to it...
707: */
1.220 veillard 708: if ((!ctxt->disableSAX) &&
709: (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1.212 veillard 710: ctxt->vctxt.error(ctxt->vctxt.userData,
711: "PEReference: %%%s; not found\n", name);
1.220 veillard 712: } else if ((!ctxt->disableSAX) &&
713: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 714: ctxt->sax->warning(ctxt->userData,
715: "PEReference: %%%s; not found\n", name);
716: ctxt->valid = 0;
717: }
1.96 daniel 718: } else {
1.159 daniel 719: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
720: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 721: /*
1.229 veillard 722: * handle the extra spaces added before and after
1.96 daniel 723: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.229 veillard 724: * this is done independantly.
1.96 daniel 725: */
726: input = xmlNewEntityInputStream(ctxt, entity);
727: xmlPushInput(ctxt, input);
1.164 daniel 728: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
729: (RAW == '<') && (NXT(1) == '?') &&
730: (NXT(2) == 'x') && (NXT(3) == 'm') &&
731: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 732: xmlParseTextDecl(ctxt);
1.164 daniel 733: }
734: if (ctxt->token == 0)
735: ctxt->token = ' ';
1.96 daniel 736: } else {
737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
738: ctxt->sax->error(ctxt->userData,
739: "xmlHandlePEReference: %s is not a parameter entity\n",
740: name);
741: ctxt->wellFormed = 0;
1.180 daniel 742: ctxt->disableSAX = 1;
1.96 daniel 743: }
744: }
745: } else {
1.123 daniel 746: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
748: ctxt->sax->error(ctxt->userData,
749: "xmlHandlePEReference: expecting ';'\n");
750: ctxt->wellFormed = 0;
1.180 daniel 751: ctxt->disableSAX = 1;
1.96 daniel 752: }
1.119 daniel 753: xmlFree(name);
1.97 daniel 754: }
755: }
756:
757: /*
758: * Macro used to grow the current buffer.
759: */
760: #define growBuffer(buffer) { \
761: buffer##_size *= 2; \
1.145 daniel 762: buffer = (xmlChar *) \
763: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 764: if (buffer == NULL) { \
765: perror("realloc failed"); \
1.145 daniel 766: return(NULL); \
1.97 daniel 767: } \
1.96 daniel 768: }
1.77 daniel 769:
770: /**
1.135 daniel 771: * xmlStringDecodeEntities:
772: * @ctxt: the parser context
773: * @str: the input string
774: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
775: * @end: an end marker xmlChar, 0 if none
776: * @end2: an end marker xmlChar, 0 if none
777: * @end3: an end marker xmlChar, 0 if none
778: *
1.222 veillard 779: * Takes a entity string content and process to do the adequate subtitutions.
780: *
1.135 daniel 781: * [67] Reference ::= EntityRef | CharRef
782: *
783: * [69] PEReference ::= '%' Name ';'
784: *
785: * Returns A newly allocated string with the substitution done. The caller
786: * must deallocate it !
787: */
788: xmlChar *
789: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
790: xmlChar end, xmlChar end2, xmlChar end3) {
791: xmlChar *buffer = NULL;
792: int buffer_size = 0;
793:
794: xmlChar *current = NULL;
795: xmlEntityPtr ent;
1.176 daniel 796: int c,l;
797: int nbchars = 0;
1.135 daniel 798:
1.211 veillard 799: if (str == NULL)
800: return(NULL);
801:
1.185 daniel 802: if (ctxt->depth > 40) {
1.230 veillard 803: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
805: ctxt->sax->error(ctxt->userData,
806: "Detected entity reference loop\n");
807: ctxt->wellFormed = 0;
808: ctxt->disableSAX = 1;
809: return(NULL);
810: }
811:
1.135 daniel 812: /*
813: * allocate a translation buffer.
814: */
1.140 daniel 815: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 816: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
817: if (buffer == NULL) {
818: perror("xmlDecodeEntities: malloc failed");
819: return(NULL);
820: }
821:
822: /*
823: * Ok loop until we reach one of the ending char or a size limit.
1.222 veillard 824: * we are operating on already parsed values.
1.135 daniel 825: */
1.176 daniel 826: c = CUR_SCHAR(str, l);
1.222 veillard 827: while ((c != 0) && (c != end) && /* non input consuming loop */
828: (c != end2) && (c != end3)) {
1.135 daniel 829:
1.176 daniel 830: if (c == 0) break;
831: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 832: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 833: if (val != 0) {
834: COPY_BUF(0,buffer,nbchars,val);
835: }
836: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.220 veillard 837: if (xmlParserDebugEntities)
1.241 veillard 838: xmlGenericError(xmlGenericErrorContext,
839: "String decoding Entity Reference: %.30s\n",
1.220 veillard 840: str);
1.135 daniel 841: ent = xmlParseStringEntityRef(ctxt, &str);
1.222 veillard 842: if ((ent != NULL) &&
843: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.219 veillard 844: if (ent->content != NULL) {
845: COPY_BUF(0,buffer,nbchars,ent->content[0]);
846: } else {
847: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
848: ctxt->sax->error(ctxt->userData,
849: "internal error entity has no content\n");
850: }
851: } else if ((ent != NULL) && (ent->content != NULL)) {
1.185 daniel 852: xmlChar *rep;
853:
854: ctxt->depth++;
855: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
856: 0, 0, 0);
857: ctxt->depth--;
858: if (rep != NULL) {
859: current = rep;
1.222 veillard 860: while (*current != 0) { /* non input consuming loop */
1.185 daniel 861: buffer[nbchars++] = *current++;
862: if (nbchars >
863: buffer_size - XML_PARSER_BUFFER_SIZE) {
864: growBuffer(buffer);
865: }
1.135 daniel 866: }
1.185 daniel 867: xmlFree(rep);
1.135 daniel 868: }
869: } else if (ent != NULL) {
870: int i = xmlStrlen(ent->name);
871: const xmlChar *cur = ent->name;
872:
1.176 daniel 873: buffer[nbchars++] = '&';
874: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 875: growBuffer(buffer);
876: }
877: for (;i > 0;i--)
1.176 daniel 878: buffer[nbchars++] = *cur++;
879: buffer[nbchars++] = ';';
1.135 daniel 880: }
1.176 daniel 881: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.220 veillard 882: if (xmlParserDebugEntities)
1.241 veillard 883: xmlGenericError(xmlGenericErrorContext,
884: "String decoding PE Reference: %.30s\n", str);
1.135 daniel 885: ent = xmlParseStringPEReference(ctxt, &str);
886: if (ent != NULL) {
1.185 daniel 887: xmlChar *rep;
888:
889: ctxt->depth++;
890: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
891: 0, 0, 0);
892: ctxt->depth--;
893: if (rep != NULL) {
894: current = rep;
1.222 veillard 895: while (*current != 0) { /* non input consuming loop */
1.185 daniel 896: buffer[nbchars++] = *current++;
897: if (nbchars >
898: buffer_size - XML_PARSER_BUFFER_SIZE) {
899: growBuffer(buffer);
900: }
1.135 daniel 901: }
1.185 daniel 902: xmlFree(rep);
1.135 daniel 903: }
904: }
905: } else {
1.176 daniel 906: COPY_BUF(l,buffer,nbchars,c);
907: str += l;
908: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 909: growBuffer(buffer);
910: }
911: }
1.176 daniel 912: c = CUR_SCHAR(str, l);
1.135 daniel 913: }
1.229 veillard 914: buffer[nbchars++] = 0;
915: return(buffer);
1.172 daniel 916: }
917:
1.229 veillard 918:
919: /************************************************************************
920: * *
1.123 daniel 921: * Commodity functions to handle xmlChars *
1.28 daniel 922: * *
923: ************************************************************************/
924:
1.50 daniel 925: /**
926: * xmlStrndup:
1.123 daniel 927: * @cur: the input xmlChar *
1.50 daniel 928: * @len: the len of @cur
929: *
1.123 daniel 930: * a strndup for array of xmlChar's
1.68 daniel 931: *
1.123 daniel 932: * Returns a new xmlChar * or NULL
1.1 veillard 933: */
1.123 daniel 934: xmlChar *
935: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 936: xmlChar *ret;
937:
938: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 939: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 940: if (ret == NULL) {
1.241 veillard 941: xmlGenericError(xmlGenericErrorContext,
942: "malloc of %ld byte failed\n",
1.123 daniel 943: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 944: return(NULL);
945: }
1.123 daniel 946: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 947: ret[len] = 0;
948: return(ret);
949: }
950:
1.50 daniel 951: /**
952: * xmlStrdup:
1.123 daniel 953: * @cur: the input xmlChar *
1.50 daniel 954: *
1.152 daniel 955: * a strdup for array of xmlChar's. Since they are supposed to be
956: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
957: * a termination mark of '0'.
1.68 daniel 958: *
1.123 daniel 959: * Returns a new xmlChar * or NULL
1.1 veillard 960: */
1.123 daniel 961: xmlChar *
962: xmlStrdup(const xmlChar *cur) {
963: const xmlChar *p = cur;
1.1 veillard 964:
1.135 daniel 965: if (cur == NULL) return(NULL);
1.222 veillard 966: while (*p != 0) p++; /* non input consuming */
1.1 veillard 967: return(xmlStrndup(cur, p - cur));
968: }
969:
1.50 daniel 970: /**
971: * xmlCharStrndup:
972: * @cur: the input char *
973: * @len: the len of @cur
974: *
1.123 daniel 975: * a strndup for char's to xmlChar's
1.68 daniel 976: *
1.123 daniel 977: * Returns a new xmlChar * or NULL
1.45 daniel 978: */
979:
1.123 daniel 980: xmlChar *
1.55 daniel 981: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 982: int i;
1.135 daniel 983: xmlChar *ret;
984:
985: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 986: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 987: if (ret == NULL) {
1.241 veillard 988: xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1.123 daniel 989: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 990: return(NULL);
991: }
992: for (i = 0;i < len;i++)
1.123 daniel 993: ret[i] = (xmlChar) cur[i];
1.45 daniel 994: ret[len] = 0;
995: return(ret);
996: }
997:
1.50 daniel 998: /**
999: * xmlCharStrdup:
1000: * @cur: the input char *
1001: * @len: the len of @cur
1002: *
1.123 daniel 1003: * a strdup for char's to xmlChar's
1.68 daniel 1004: *
1.123 daniel 1005: * Returns a new xmlChar * or NULL
1.45 daniel 1006: */
1007:
1.123 daniel 1008: xmlChar *
1.55 daniel 1009: xmlCharStrdup(const char *cur) {
1.45 daniel 1010: const char *p = cur;
1011:
1.135 daniel 1012: if (cur == NULL) return(NULL);
1.222 veillard 1013: while (*p != '\0') p++; /* non input consuming */
1.45 daniel 1014: return(xmlCharStrndup(cur, p - cur));
1015: }
1016:
1.50 daniel 1017: /**
1018: * xmlStrcmp:
1.123 daniel 1019: * @str1: the first xmlChar *
1020: * @str2: the second xmlChar *
1.50 daniel 1021: *
1.123 daniel 1022: * a strcmp for xmlChar's
1.68 daniel 1023: *
1024: * Returns the integer result of the comparison
1.14 veillard 1025: */
1026:
1.55 daniel 1027: int
1.123 daniel 1028: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1029: register int tmp;
1030:
1.229 veillard 1031: if (str1 == str2) return(0);
1.135 daniel 1032: if (str1 == NULL) return(-1);
1033: if (str2 == NULL) return(1);
1.14 veillard 1034: do {
1.232 veillard 1035: tmp = *str1++ - *str2;
1.14 veillard 1036: if (tmp != 0) return(tmp);
1.232 veillard 1037: } while (*str2++ != 0);
1038: return 0;
1.14 veillard 1039: }
1040:
1.50 daniel 1041: /**
1.236 veillard 1042: * xmlStrEqual:
1043: * @str1: the first xmlChar *
1044: * @str2: the second xmlChar *
1045: *
1046: * Check if both string are equal of have same content
1047: * Should be a bit more readable and faster than xmlStrEqual()
1048: *
1049: * Returns 1 if they are equal, 0 if they are different
1050: */
1051:
1052: int
1053: xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1054: if (str1 == str2) return(1);
1055: if (str1 == NULL) return(0);
1056: if (str2 == NULL) return(0);
1057: do {
1058: if (*str1++ != *str2) return(0);
1059: } while (*str2++);
1060: return(1);
1061: }
1062:
1063: /**
1.50 daniel 1064: * xmlStrncmp:
1.123 daniel 1065: * @str1: the first xmlChar *
1066: * @str2: the second xmlChar *
1.50 daniel 1067: * @len: the max comparison length
1068: *
1.123 daniel 1069: * a strncmp for xmlChar's
1.68 daniel 1070: *
1071: * Returns the integer result of the comparison
1.14 veillard 1072: */
1073:
1.55 daniel 1074: int
1.123 daniel 1075: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1076: register int tmp;
1077:
1078: if (len <= 0) return(0);
1.232 veillard 1079: if (str1 == str2) return(0);
1.135 daniel 1080: if (str1 == NULL) return(-1);
1081: if (str2 == NULL) return(1);
1.14 veillard 1082: do {
1.232 veillard 1083: tmp = *str1++ - *str2;
1084: if (tmp != 0 || --len == 0) return(tmp);
1085: } while (*str2++ != 0);
1086: return 0;
1087: }
1088:
1089: static xmlChar casemap[256] = {
1090: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1091: 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1092: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1093: 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1094: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1095: 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1096: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1097: 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1098: 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1099: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1100: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1101: 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1102: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1103: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1104: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1105: 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1106: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1107: 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1108: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1109: 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1110: 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1111: 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1112: 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1113: 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1114: 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1115: 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1116: 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1117: 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1118: 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1119: 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1120: 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1121: 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1122: };
1123:
1124: /**
1125: * xmlStrcasecmp:
1126: * @str1: the first xmlChar *
1127: * @str2: the second xmlChar *
1128: *
1129: * a strcasecmp for xmlChar's
1130: *
1131: * Returns the integer result of the comparison
1132: */
1133:
1134: int
1135: xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1136: register int tmp;
1137:
1138: if (str1 == str2) return(0);
1139: if (str1 == NULL) return(-1);
1140: if (str2 == NULL) return(1);
1141: do {
1142: tmp = casemap[*str1++] - casemap[*str2];
1.14 veillard 1143: if (tmp != 0) return(tmp);
1.232 veillard 1144: } while (*str2++ != 0);
1145: return 0;
1146: }
1147:
1148: /**
1149: * xmlStrncasecmp:
1150: * @str1: the first xmlChar *
1151: * @str2: the second xmlChar *
1152: * @len: the max comparison length
1153: *
1154: * a strncasecmp for xmlChar's
1155: *
1156: * Returns the integer result of the comparison
1157: */
1158:
1159: int
1160: xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1161: register int tmp;
1162:
1163: if (len <= 0) return(0);
1164: if (str1 == str2) return(0);
1165: if (str1 == NULL) return(-1);
1166: if (str2 == NULL) return(1);
1167: do {
1168: tmp = casemap[*str1++] - casemap[*str2];
1169: if (tmp != 0 || --len == 0) return(tmp);
1170: } while (*str2++ != 0);
1171: return 0;
1.14 veillard 1172: }
1173:
1.50 daniel 1174: /**
1175: * xmlStrchr:
1.123 daniel 1176: * @str: the xmlChar * array
1177: * @val: the xmlChar to search
1.50 daniel 1178: *
1.123 daniel 1179: * a strchr for xmlChar's
1.68 daniel 1180: *
1.123 daniel 1181: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1182: */
1183:
1.123 daniel 1184: const xmlChar *
1185: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 1186: if (str == NULL) return(NULL);
1.222 veillard 1187: while (*str != 0) { /* non input consuming */
1.123 daniel 1188: if (*str == val) return((xmlChar *) str);
1.14 veillard 1189: str++;
1190: }
1191: return(NULL);
1.89 daniel 1192: }
1193:
1194: /**
1195: * xmlStrstr:
1.123 daniel 1196: * @str: the xmlChar * array (haystack)
1197: * @val: the xmlChar to search (needle)
1.89 daniel 1198: *
1.123 daniel 1199: * a strstr for xmlChar's
1.89 daniel 1200: *
1.123 daniel 1201: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1202: */
1203:
1.123 daniel 1204: const xmlChar *
1205: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1206: int n;
1207:
1208: if (str == NULL) return(NULL);
1209: if (val == NULL) return(NULL);
1210: n = xmlStrlen(val);
1211:
1212: if (n == 0) return(str);
1.222 veillard 1213: while (*str != 0) { /* non input consuming */
1.89 daniel 1214: if (*str == *val) {
1.123 daniel 1215: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1216: }
1.232 veillard 1217: str++;
1218: }
1219: return(NULL);
1220: }
1221:
1222: /**
1223: * xmlStrcasestr:
1224: * @str: the xmlChar * array (haystack)
1225: * @val: the xmlChar to search (needle)
1226: *
1227: * a case-ignoring strstr for xmlChar's
1228: *
1229: * Returns the xmlChar * for the first occurence or NULL.
1230: */
1231:
1232: const xmlChar *
1233: xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1234: int n;
1235:
1236: if (str == NULL) return(NULL);
1237: if (val == NULL) return(NULL);
1238: n = xmlStrlen(val);
1239:
1240: if (n == 0) return(str);
1241: while (*str != 0) { /* non input consuming */
1242: if (casemap[*str] == casemap[*val])
1243: if (!xmlStrncasecmp(str, val, n)) return(str);
1.89 daniel 1244: str++;
1245: }
1246: return(NULL);
1247: }
1248:
1249: /**
1250: * xmlStrsub:
1.123 daniel 1251: * @str: the xmlChar * array (haystack)
1.89 daniel 1252: * @start: the index of the first char (zero based)
1253: * @len: the length of the substring
1254: *
1255: * Extract a substring of a given string
1256: *
1.123 daniel 1257: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1258: */
1259:
1.123 daniel 1260: xmlChar *
1261: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1262: int i;
1263:
1264: if (str == NULL) return(NULL);
1265: if (start < 0) return(NULL);
1.90 daniel 1266: if (len < 0) return(NULL);
1.89 daniel 1267:
1268: for (i = 0;i < start;i++) {
1269: if (*str == 0) return(NULL);
1270: str++;
1271: }
1272: if (*str == 0) return(NULL);
1273: return(xmlStrndup(str, len));
1.14 veillard 1274: }
1.28 daniel 1275:
1.50 daniel 1276: /**
1277: * xmlStrlen:
1.123 daniel 1278: * @str: the xmlChar * array
1.50 daniel 1279: *
1.127 daniel 1280: * length of a xmlChar's string
1.68 daniel 1281: *
1.123 daniel 1282: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1283: */
1284:
1.55 daniel 1285: int
1.123 daniel 1286: xmlStrlen(const xmlChar *str) {
1.45 daniel 1287: int len = 0;
1288:
1289: if (str == NULL) return(0);
1.222 veillard 1290: while (*str != 0) { /* non input consuming */
1.45 daniel 1291: str++;
1292: len++;
1293: }
1294: return(len);
1295: }
1296:
1.50 daniel 1297: /**
1298: * xmlStrncat:
1.123 daniel 1299: * @cur: the original xmlChar * array
1300: * @add: the xmlChar * array added
1.50 daniel 1301: * @len: the length of @add
1302: *
1.123 daniel 1303: * a strncat for array of xmlChar's
1.68 daniel 1304: *
1.123 daniel 1305: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1306: */
1307:
1.123 daniel 1308: xmlChar *
1309: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1310: int size;
1.123 daniel 1311: xmlChar *ret;
1.45 daniel 1312:
1313: if ((add == NULL) || (len == 0))
1314: return(cur);
1315: if (cur == NULL)
1316: return(xmlStrndup(add, len));
1317:
1318: size = xmlStrlen(cur);
1.204 veillard 1319: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1320: if (ret == NULL) {
1.241 veillard 1321: xmlGenericError(xmlGenericErrorContext,
1322: "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1323: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1324: return(cur);
1325: }
1.123 daniel 1326: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1327: ret[size + len] = 0;
1328: return(ret);
1329: }
1330:
1.50 daniel 1331: /**
1332: * xmlStrcat:
1.123 daniel 1333: * @cur: the original xmlChar * array
1334: * @add: the xmlChar * array added
1.50 daniel 1335: *
1.152 daniel 1336: * a strcat for array of xmlChar's. Since they are supposed to be
1337: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1338: * a termination mark of '0'.
1.68 daniel 1339: *
1.123 daniel 1340: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1341: */
1.123 daniel 1342: xmlChar *
1343: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1344: const xmlChar *p = add;
1.45 daniel 1345:
1346: if (add == NULL) return(cur);
1347: if (cur == NULL)
1348: return(xmlStrdup(add));
1349:
1.222 veillard 1350: while (*p != 0) p++; /* non input consuming */
1.45 daniel 1351: return(xmlStrncat(cur, add, p - add));
1352: }
1353:
1354: /************************************************************************
1355: * *
1356: * Commodity functions, cleanup needed ? *
1357: * *
1358: ************************************************************************/
1359:
1.50 daniel 1360: /**
1361: * areBlanks:
1362: * @ctxt: an XML parser context
1.123 daniel 1363: * @str: a xmlChar *
1.50 daniel 1364: * @len: the size of @str
1365: *
1.45 daniel 1366: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1367: *
1.68 daniel 1368: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1369: */
1370:
1.123 daniel 1371: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1372: int i, ret;
1.45 daniel 1373: xmlNodePtr lastChild;
1374:
1.176 daniel 1375: /*
1376: * Check for xml:space value.
1377: */
1378: if (*(ctxt->space) == 1)
1379: return(0);
1380:
1381: /*
1382: * Check that the string is made of blanks
1383: */
1.45 daniel 1384: for (i = 0;i < len;i++)
1385: if (!(IS_BLANK(str[i]))) return(0);
1386:
1.176 daniel 1387: /*
1388: * Look if the element is mixed content in the Dtd if available
1389: */
1.104 daniel 1390: if (ctxt->myDoc != NULL) {
1391: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1392: if (ret == 0) return(1);
1393: if (ret == 1) return(0);
1394: }
1.176 daniel 1395:
1.104 daniel 1396: /*
1.176 daniel 1397: * Otherwise, heuristic :-\
1.104 daniel 1398: */
1.179 daniel 1399: if (ctxt->keepBlanks)
1400: return(0);
1401: if (RAW != '<') return(0);
1402: if (ctxt->node == NULL) return(0);
1403: if ((ctxt->node->children == NULL) &&
1404: (RAW == '<') && (NXT(1) == '/')) return(0);
1405:
1.45 daniel 1406: lastChild = xmlGetLastChild(ctxt->node);
1407: if (lastChild == NULL) {
1408: if (ctxt->node->content != NULL) return(0);
1409: } else if (xmlNodeIsText(lastChild))
1410: return(0);
1.157 daniel 1411: else if ((ctxt->node->children != NULL) &&
1412: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 1413: return(0);
1.45 daniel 1414: return(1);
1415: }
1416:
1417: /*
1418: * Forward definition for recusive behaviour.
1419: */
1.77 daniel 1420: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1421: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1422:
1.28 daniel 1423: /************************************************************************
1424: * *
1425: * Extra stuff for namespace support *
1426: * Relates to http://www.w3.org/TR/WD-xml-names *
1427: * *
1428: ************************************************************************/
1429:
1.50 daniel 1430: /**
1.72 daniel 1431: * xmlSplitQName:
1.162 daniel 1432: * @ctxt: an XML parser context
1.72 daniel 1433: * @name: an XML parser context
1.123 daniel 1434: * @prefix: a xmlChar **
1.72 daniel 1435: *
1.206 veillard 1436: * parse an UTF8 encoded XML qualified name string
1.72 daniel 1437: *
1438: * [NS 5] QName ::= (Prefix ':')? LocalPart
1439: *
1440: * [NS 6] Prefix ::= NCName
1441: *
1442: * [NS 7] LocalPart ::= NCName
1443: *
1.127 daniel 1444: * Returns the local part, and prefix is updated
1.72 daniel 1445: * to get the Prefix if any.
1446: */
1447:
1.123 daniel 1448: xmlChar *
1.162 daniel 1449: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1450: xmlChar buf[XML_MAX_NAMELEN + 5];
1.222 veillard 1451: xmlChar *buffer = NULL;
1.162 daniel 1452: int len = 0;
1.222 veillard 1453: int max = XML_MAX_NAMELEN;
1.123 daniel 1454: xmlChar *ret = NULL;
1455: const xmlChar *cur = name;
1.206 veillard 1456: int c;
1.72 daniel 1457:
1458: *prefix = NULL;
1.113 daniel 1459:
1460: /* xml: prefix is not really a namespace */
1461: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1462: (cur[2] == 'l') && (cur[3] == ':'))
1463: return(xmlStrdup(name));
1464:
1.162 daniel 1465: /* nasty but valid */
1466: if (cur[0] == ':')
1467: return(xmlStrdup(name));
1468:
1.206 veillard 1469: c = *cur++;
1.222 veillard 1470: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1.206 veillard 1471: buf[len++] = c;
1472: c = *cur++;
1.162 daniel 1473: }
1.222 veillard 1474: if (len >= max) {
1475: /*
1476: * Okay someone managed to make a huge name, so he's ready to pay
1477: * for the processing speed.
1478: */
1479: max = len * 2;
1480:
1481: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1482: if (buffer == NULL) {
1483: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1484: ctxt->sax->error(ctxt->userData,
1485: "xmlSplitQName: out of memory\n");
1486: return(NULL);
1487: }
1488: memcpy(buffer, buf, len);
1489: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1490: if (len + 10 > max) {
1491: max *= 2;
1492: buffer = (xmlChar *) xmlRealloc(buffer,
1493: max * sizeof(xmlChar));
1494: if (buffer == NULL) {
1495: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1496: ctxt->sax->error(ctxt->userData,
1497: "xmlSplitQName: out of memory\n");
1498: return(NULL);
1499: }
1500: }
1501: buffer[len++] = c;
1502: c = *cur++;
1503: }
1504: buffer[len] = 0;
1505: }
1.72 daniel 1506:
1.222 veillard 1507: if (buffer == NULL)
1508: ret = xmlStrndup(buf, len);
1509: else {
1510: ret = buffer;
1511: buffer = NULL;
1512: max = XML_MAX_NAMELEN;
1513: }
1514:
1.72 daniel 1515:
1.162 daniel 1516: if (c == ':') {
1.206 veillard 1517: c = *cur++;
1518: if (c == 0) return(ret);
1.72 daniel 1519: *prefix = ret;
1.162 daniel 1520: len = 0;
1.72 daniel 1521:
1.222 veillard 1522: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1.206 veillard 1523: buf[len++] = c;
1524: c = *cur++;
1.162 daniel 1525: }
1.222 veillard 1526: if (len >= max) {
1527: /*
1528: * Okay someone managed to make a huge name, so he's ready to pay
1529: * for the processing speed.
1530: */
1.229 veillard 1531: max = len * 2;
1532:
1533: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1534: if (buffer == NULL) {
1.55 daniel 1535: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 1536: ctxt->sax->error(ctxt->userData,
1.229 veillard 1537: "xmlSplitQName: out of memory\n");
1538: return(NULL);
1539: }
1540: memcpy(buffer, buf, len);
1541: while (c != 0) { /* tested bigname2.xml */
1542: if (len + 10 > max) {
1543: max *= 2;
1544: buffer = (xmlChar *) xmlRealloc(buffer,
1545: max * sizeof(xmlChar));
1546: if (buffer == NULL) {
1547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1548: ctxt->sax->error(ctxt->userData,
1549: "xmlSplitQName: out of memory\n");
1550: return(NULL);
1551: }
1552: }
1553: buffer[len++] = c;
1554: c = *cur++;
1.122 daniel 1555: }
1.229 veillard 1556: buffer[len] = 0;
1557: }
1558:
1559: if (buffer == NULL)
1560: ret = xmlStrndup(buf, len);
1561: else {
1562: ret = buffer;
1563: }
1.45 daniel 1564: }
1565:
1.229 veillard 1566: return(ret);
1.45 daniel 1567: }
1568:
1.28 daniel 1569: /************************************************************************
1570: * *
1571: * The parser itself *
1572: * Relates to http://www.w3.org/TR/REC-xml *
1573: * *
1574: ************************************************************************/
1.14 veillard 1575:
1.50 daniel 1576: /**
1577: * xmlParseName:
1578: * @ctxt: an XML parser context
1579: *
1580: * parse an XML name.
1.22 daniel 1581: *
1582: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1583: * CombiningChar | Extender
1584: *
1585: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1586: *
1587: * [6] Names ::= Name (S Name)*
1.68 daniel 1588: *
1589: * Returns the Name parsed or NULL
1.1 veillard 1590: */
1591:
1.123 daniel 1592: xmlChar *
1.55 daniel 1593: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 1594: xmlChar buf[XML_MAX_NAMELEN + 5];
1595: int len = 0, l;
1596: int c;
1.222 veillard 1597: int count = 0;
1.1 veillard 1598:
1.91 daniel 1599: GROW;
1.160 daniel 1600: c = CUR_CHAR(l);
1.190 daniel 1601: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1602: (!IS_LETTER(c) && (c != '_') &&
1603: (c != ':'))) {
1.91 daniel 1604: return(NULL);
1605: }
1.40 daniel 1606:
1.222 veillard 1607: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1.190 daniel 1608: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1609: (c == '.') || (c == '-') ||
1610: (c == '_') || (c == ':') ||
1611: (IS_COMBINING(c)) ||
1612: (IS_EXTENDER(c)))) {
1.222 veillard 1613: if (count++ > 100) {
1614: count = 0;
1615: GROW;
1616: }
1.160 daniel 1617: COPY_BUF(l,buf,len,c);
1618: NEXTL(l);
1619: c = CUR_CHAR(l);
1.91 daniel 1620: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1621: /*
1622: * Okay someone managed to make a huge name, so he's ready to pay
1623: * for the processing speed.
1624: */
1625: xmlChar *buffer;
1626: int max = len * 2;
1627:
1628: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1629: if (buffer == NULL) {
1630: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1631: ctxt->sax->error(ctxt->userData,
1632: "xmlParseName: out of memory\n");
1633: return(NULL);
1634: }
1635: memcpy(buffer, buf, len);
1636: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1.160 daniel 1637: (c == '.') || (c == '-') ||
1638: (c == '_') || (c == ':') ||
1639: (IS_COMBINING(c)) ||
1640: (IS_EXTENDER(c))) {
1.222 veillard 1641: if (count++ > 100) {
1642: count = 0;
1643: GROW;
1644: }
1645: if (len + 10 > max) {
1646: max *= 2;
1647: buffer = (xmlChar *) xmlRealloc(buffer,
1648: max * sizeof(xmlChar));
1649: if (buffer == NULL) {
1650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1651: ctxt->sax->error(ctxt->userData,
1652: "xmlParseName: out of memory\n");
1653: return(NULL);
1654: }
1655: }
1656: COPY_BUF(l,buffer,len,c);
1.160 daniel 1657: NEXTL(l);
1658: c = CUR_CHAR(l);
1.97 daniel 1659: }
1.222 veillard 1660: buffer[len] = 0;
1661: return(buffer);
1.91 daniel 1662: }
1663: }
1664: return(xmlStrndup(buf, len));
1.22 daniel 1665: }
1666:
1.50 daniel 1667: /**
1.135 daniel 1668: * xmlParseStringName:
1669: * @ctxt: an XML parser context
1.229 veillard 1670: * @str: a pointer to the string pointer (IN/OUT)
1.135 daniel 1671: *
1672: * parse an XML name.
1673: *
1674: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1675: * CombiningChar | Extender
1676: *
1677: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1678: *
1679: * [6] Names ::= Name (S Name)*
1680: *
1681: * Returns the Name parsed or NULL. The str pointer
1682: * is updated to the current location in the string.
1683: */
1684:
1685: xmlChar *
1686: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 1687: xmlChar buf[XML_MAX_NAMELEN + 5];
1688: const xmlChar *cur = *str;
1689: int len = 0, l;
1690: int c;
1.135 daniel 1691:
1.176 daniel 1692: c = CUR_SCHAR(cur, l);
1693: if (!IS_LETTER(c) && (c != '_') &&
1694: (c != ':')) {
1.135 daniel 1695: return(NULL);
1696: }
1697:
1.222 veillard 1698: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1699: (c == '.') || (c == '-') ||
1700: (c == '_') || (c == ':') ||
1701: (IS_COMBINING(c)) ||
1702: (IS_EXTENDER(c))) {
1703: COPY_BUF(l,buf,len,c);
1704: cur += l;
1705: c = CUR_SCHAR(cur, l);
1.222 veillard 1706: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1707: /*
1708: * Okay someone managed to make a huge name, so he's ready to pay
1709: * for the processing speed.
1710: */
1711: xmlChar *buffer;
1712: int max = len * 2;
1713:
1714: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1715: if (buffer == NULL) {
1716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1717: ctxt->sax->error(ctxt->userData,
1718: "xmlParseStringName: out of memory\n");
1719: return(NULL);
1720: }
1721: memcpy(buffer, buf, len);
1722: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1723: (c == '.') || (c == '-') ||
1724: (c == '_') || (c == ':') ||
1725: (IS_COMBINING(c)) ||
1726: (IS_EXTENDER(c))) {
1.222 veillard 1727: if (len + 10 > max) {
1728: max *= 2;
1729: buffer = (xmlChar *) xmlRealloc(buffer,
1730: max * sizeof(xmlChar));
1731: if (buffer == NULL) {
1732: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1733: ctxt->sax->error(ctxt->userData,
1734: "xmlParseStringName: out of memory\n");
1735: return(NULL);
1736: }
1737: }
1738: COPY_BUF(l,buffer,len,c);
1.176 daniel 1739: cur += l;
1740: c = CUR_SCHAR(cur, l);
1741: }
1.222 veillard 1742: buffer[len] = 0;
1743: *str = cur;
1744: return(buffer);
1.176 daniel 1745: }
1.135 daniel 1746: }
1.176 daniel 1747: *str = cur;
1748: return(xmlStrndup(buf, len));
1.135 daniel 1749: }
1750:
1751: /**
1.50 daniel 1752: * xmlParseNmtoken:
1753: * @ctxt: an XML parser context
1754: *
1755: * parse an XML Nmtoken.
1.22 daniel 1756: *
1757: * [7] Nmtoken ::= (NameChar)+
1758: *
1759: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1760: *
1761: * Returns the Nmtoken parsed or NULL
1.22 daniel 1762: */
1763:
1.123 daniel 1764: xmlChar *
1.55 daniel 1765: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.222 veillard 1766: xmlChar buf[XML_MAX_NAMELEN + 5];
1767: int len = 0, l;
1768: int c;
1769: int count = 0;
1.22 daniel 1770:
1.91 daniel 1771: GROW;
1.160 daniel 1772: c = CUR_CHAR(l);
1.222 veillard 1773:
1774: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1775: (c == '.') || (c == '-') ||
1776: (c == '_') || (c == ':') ||
1777: (IS_COMBINING(c)) ||
1778: (IS_EXTENDER(c))) {
1.222 veillard 1779: if (count++ > 100) {
1780: count = 0;
1781: GROW;
1782: }
1.160 daniel 1783: COPY_BUF(l,buf,len,c);
1784: NEXTL(l);
1785: c = CUR_CHAR(l);
1.91 daniel 1786: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1787: /*
1788: * Okay someone managed to make a huge token, so he's ready to pay
1789: * for the processing speed.
1790: */
1791: xmlChar *buffer;
1792: int max = len * 2;
1793:
1794: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1795: if (buffer == NULL) {
1796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1797: ctxt->sax->error(ctxt->userData,
1798: "xmlParseNmtoken: out of memory\n");
1799: return(NULL);
1800: }
1801: memcpy(buffer, buf, len);
1802: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1803: (c == '.') || (c == '-') ||
1804: (c == '_') || (c == ':') ||
1805: (IS_COMBINING(c)) ||
1806: (IS_EXTENDER(c))) {
1.222 veillard 1807: if (count++ > 100) {
1808: count = 0;
1809: GROW;
1810: }
1811: if (len + 10 > max) {
1812: max *= 2;
1813: buffer = (xmlChar *) xmlRealloc(buffer,
1814: max * sizeof(xmlChar));
1815: if (buffer == NULL) {
1816: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1817: ctxt->sax->error(ctxt->userData,
1818: "xmlParseName: out of memory\n");
1819: return(NULL);
1820: }
1821: }
1822: COPY_BUF(l,buffer,len,c);
1.160 daniel 1823: NEXTL(l);
1824: c = CUR_CHAR(l);
1825: }
1.222 veillard 1826: buffer[len] = 0;
1827: return(buffer);
1.91 daniel 1828: }
1829: }
1.168 daniel 1830: if (len == 0)
1831: return(NULL);
1.91 daniel 1832: return(xmlStrndup(buf, len));
1.1 veillard 1833: }
1834:
1.50 daniel 1835: /**
1836: * xmlParseEntityValue:
1837: * @ctxt: an XML parser context
1.78 daniel 1838: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1839: *
1.229 veillard 1840: * parse a value for ENTITY declarations
1.24 daniel 1841: *
1842: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1843: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1844: *
1.78 daniel 1845: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1846: */
1847:
1.123 daniel 1848: xmlChar *
1849: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 1850: xmlChar *buf = NULL;
1851: int len = 0;
1.140 daniel 1852: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 1853: int c, l;
1.135 daniel 1854: xmlChar stop;
1.123 daniel 1855: xmlChar *ret = NULL;
1.176 daniel 1856: const xmlChar *cur = NULL;
1.98 daniel 1857: xmlParserInputPtr input;
1.24 daniel 1858:
1.152 daniel 1859: if (RAW == '"') stop = '"';
1860: else if (RAW == '\'') stop = '\'';
1.135 daniel 1861: else {
1862: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1863: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1864: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1865: ctxt->wellFormed = 0;
1.180 daniel 1866: ctxt->disableSAX = 1;
1.135 daniel 1867: return(NULL);
1868: }
1869: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1870: if (buf == NULL) {
1.241 veillard 1871: xmlGenericError(xmlGenericErrorContext,
1872: "malloc of %d byte failed\n", size);
1.135 daniel 1873: return(NULL);
1874: }
1.94 daniel 1875:
1.135 daniel 1876: /*
1877: * The content of the entity definition is copied in a buffer.
1878: */
1.94 daniel 1879:
1.135 daniel 1880: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1881: input = ctxt->input;
1882: GROW;
1883: NEXT;
1.152 daniel 1884: c = CUR_CHAR(l);
1.135 daniel 1885: /*
1886: * NOTE: 4.4.5 Included in Literal
1887: * When a parameter entity reference appears in a literal entity
1888: * value, ... a single or double quote character in the replacement
1889: * text is always treated as a normal data character and will not
1890: * terminate the literal.
1891: * In practice it means we stop the loop only when back at parsing
1892: * the initial entity and the quote is found
1893: */
1.222 veillard 1894: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1895: (ctxt->input != input))) {
1.152 daniel 1896: if (len + 5 >= size) {
1.135 daniel 1897: size *= 2;
1.204 veillard 1898: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 1899: if (buf == NULL) {
1.241 veillard 1900: xmlGenericError(xmlGenericErrorContext,
1901: "realloc of %d byte failed\n", size);
1.135 daniel 1902: return(NULL);
1.94 daniel 1903: }
1.79 daniel 1904: }
1.152 daniel 1905: COPY_BUF(l,buf,len,c);
1906: NEXTL(l);
1.98 daniel 1907: /*
1.135 daniel 1908: * Pop-up of finished entities.
1.98 daniel 1909: */
1.222 veillard 1910: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1.135 daniel 1911: xmlPopInput(ctxt);
1.152 daniel 1912:
1.221 veillard 1913: GROW;
1.152 daniel 1914: c = CUR_CHAR(l);
1.135 daniel 1915: if (c == 0) {
1.94 daniel 1916: GROW;
1.152 daniel 1917: c = CUR_CHAR(l);
1.79 daniel 1918: }
1.135 daniel 1919: }
1920: buf[len] = 0;
1921:
1922: /*
1.176 daniel 1923: * Raise problem w.r.t. '&' and '%' being used in non-entities
1924: * reference constructs. Note Charref will be handled in
1925: * xmlStringDecodeEntities()
1926: */
1927: cur = buf;
1.223 veillard 1928: while (*cur != 0) { /* non input consuming */
1.176 daniel 1929: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1930: xmlChar *name;
1931: xmlChar tmp = *cur;
1932:
1933: cur++;
1934: name = xmlParseStringName(ctxt, &cur);
1935: if ((name == NULL) || (*cur != ';')) {
1.230 veillard 1936: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1.176 daniel 1937: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1938: ctxt->sax->error(ctxt->userData,
1939: "EntityValue: '%c' forbidden except for entities references\n",
1940: tmp);
1941: ctxt->wellFormed = 0;
1.180 daniel 1942: ctxt->disableSAX = 1;
1.176 daniel 1943: }
1944: if ((ctxt->inSubset == 1) && (tmp == '%')) {
1.230 veillard 1945: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1.176 daniel 1946: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1947: ctxt->sax->error(ctxt->userData,
1948: "EntityValue: PEReferences forbidden in internal subset\n",
1949: tmp);
1950: ctxt->wellFormed = 0;
1.180 daniel 1951: ctxt->disableSAX = 1;
1.176 daniel 1952: }
1953: if (name != NULL)
1954: xmlFree(name);
1955: }
1956: cur++;
1957: }
1958:
1959: /*
1.135 daniel 1960: * Then PEReference entities are substituted.
1961: */
1962: if (c != stop) {
1963: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 1964: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 1965: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1966: ctxt->wellFormed = 0;
1.180 daniel 1967: ctxt->disableSAX = 1;
1.170 daniel 1968: xmlFree(buf);
1.135 daniel 1969: } else {
1970: NEXT;
1971: /*
1972: * NOTE: 4.4.7 Bypassed
1973: * When a general entity reference appears in the EntityValue in
1974: * an entity declaration, it is bypassed and left as is.
1.176 daniel 1975: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 1976: */
1977: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1978: 0, 0, 0);
1979: if (orig != NULL)
1980: *orig = buf;
1981: else
1982: xmlFree(buf);
1.24 daniel 1983: }
1984:
1985: return(ret);
1986: }
1987:
1.50 daniel 1988: /**
1989: * xmlParseAttValue:
1990: * @ctxt: an XML parser context
1991: *
1992: * parse a value for an attribute
1.78 daniel 1993: * Note: the parser won't do substitution of entities here, this
1.113 daniel 1994: * will be handled later in xmlStringGetNodeList
1.29 daniel 1995: *
1996: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1997: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 1998: *
1.129 daniel 1999: * 3.3.3 Attribute-Value Normalization:
2000: * Before the value of an attribute is passed to the application or
2001: * checked for validity, the XML processor must normalize it as follows:
2002: * - a character reference is processed by appending the referenced
2003: * character to the attribute value
2004: * - an entity reference is processed by recursively processing the
2005: * replacement text of the entity
2006: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2007: * appending #x20 to the normalized value, except that only a single
2008: * #x20 is appended for a "#xD#xA" sequence that is part of an external
2009: * parsed entity or the literal entity value of an internal parsed entity
2010: * - other characters are processed by appending them to the normalized value
1.130 daniel 2011: * If the declared value is not CDATA, then the XML processor must further
2012: * process the normalized attribute value by discarding any leading and
2013: * trailing space (#x20) characters, and by replacing sequences of space
2014: * (#x20) characters by a single space (#x20) character.
2015: * All attributes for which no declaration has been read should be treated
2016: * by a non-validating parser as if declared CDATA.
1.129 daniel 2017: *
2018: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 2019: */
2020:
1.123 daniel 2021: xmlChar *
1.55 daniel 2022: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 2023: xmlChar limit = 0;
1.198 daniel 2024: xmlChar *buf = NULL;
2025: int len = 0;
2026: int buf_size = 0;
2027: int c, l;
1.129 daniel 2028: xmlChar *current = NULL;
2029: xmlEntityPtr ent;
2030:
1.29 daniel 2031:
1.91 daniel 2032: SHRINK;
1.151 daniel 2033: if (NXT(0) == '"') {
1.96 daniel 2034: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 2035: limit = '"';
1.40 daniel 2036: NEXT;
1.151 daniel 2037: } else if (NXT(0) == '\'') {
1.129 daniel 2038: limit = '\'';
1.96 daniel 2039: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2040: NEXT;
1.29 daniel 2041: } else {
1.123 daniel 2042: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2044: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2045: ctxt->wellFormed = 0;
1.180 daniel 2046: ctxt->disableSAX = 1;
1.129 daniel 2047: return(NULL);
1.29 daniel 2048: }
2049:
1.129 daniel 2050: /*
2051: * allocate a translation buffer.
2052: */
1.198 daniel 2053: buf_size = XML_PARSER_BUFFER_SIZE;
2054: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2055: if (buf == NULL) {
1.129 daniel 2056: perror("xmlParseAttValue: malloc failed");
2057: return(NULL);
2058: }
2059:
2060: /*
2061: * Ok loop until we reach one of the ending char or a size limit.
2062: */
1.198 daniel 2063: c = CUR_CHAR(l);
1.223 veillard 2064: while (((NXT(0) != limit) && /* checked */
2065: (c != '<')) || (ctxt->token != 0)) {
1.198 daniel 2066: if (c == 0) break;
1.205 veillard 2067: if (ctxt->token == '&') {
1.229 veillard 2068: /*
2069: * The reparsing will be done in xmlStringGetNodeList()
2070: * called by the attribute() function in SAX.c
2071: */
1.205 veillard 2072: static xmlChar buffer[6] = "&";
2073:
2074: if (len > buf_size - 10) {
2075: growBuffer(buf);
2076: }
2077: current = &buffer[0];
1.223 veillard 2078: while (*current != 0) { /* non input consuming */
1.205 veillard 2079: buf[len++] = *current++;
2080: }
2081: ctxt->token = 0;
2082: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 2083: int val = xmlParseCharRef(ctxt);
1.229 veillard 2084: if (val == '&') {
2085: /*
2086: * The reparsing will be done in xmlStringGetNodeList()
2087: * called by the attribute() function in SAX.c
2088: */
2089: static xmlChar buffer[6] = "&";
2090:
2091: if (len > buf_size - 10) {
2092: growBuffer(buf);
2093: }
2094: current = &buffer[0];
2095: while (*current != 0) { /* non input consuming */
2096: buf[len++] = *current++;
2097: }
2098: } else {
1.239 veillard 2099: len += xmlCopyChar(0, &buf[len], val);
1.229 veillard 2100: }
1.198 daniel 2101: } else if (c == '&') {
1.129 daniel 2102: ent = xmlParseEntityRef(ctxt);
2103: if ((ent != NULL) &&
2104: (ctxt->replaceEntities != 0)) {
1.185 daniel 2105: xmlChar *rep;
2106:
1.186 daniel 2107: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2108: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 2109: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 2110: if (rep != NULL) {
2111: current = rep;
1.223 veillard 2112: while (*current != 0) { /* non input consuming */
1.198 daniel 2113: buf[len++] = *current++;
2114: if (len > buf_size - 10) {
2115: growBuffer(buf);
1.186 daniel 2116: }
1.185 daniel 2117: }
1.186 daniel 2118: xmlFree(rep);
1.129 daniel 2119: }
1.186 daniel 2120: } else {
2121: if (ent->content != NULL)
1.198 daniel 2122: buf[len++] = ent->content[0];
1.129 daniel 2123: }
2124: } else if (ent != NULL) {
2125: int i = xmlStrlen(ent->name);
2126: const xmlChar *cur = ent->name;
2127:
1.186 daniel 2128: /*
2129: * This may look absurd but is needed to detect
2130: * entities problems
2131: */
1.211 veillard 2132: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2133: (ent->content != NULL)) {
1.186 daniel 2134: xmlChar *rep;
2135: rep = xmlStringDecodeEntities(ctxt, ent->content,
2136: XML_SUBSTITUTE_REF, 0, 0, 0);
2137: if (rep != NULL)
2138: xmlFree(rep);
2139: }
2140:
2141: /*
2142: * Just output the reference
2143: */
1.198 daniel 2144: buf[len++] = '&';
2145: if (len > buf_size - i - 10) {
2146: growBuffer(buf);
1.129 daniel 2147: }
2148: for (;i > 0;i--)
1.198 daniel 2149: buf[len++] = *cur++;
2150: buf[len++] = ';';
1.129 daniel 2151: }
2152: } else {
1.198 daniel 2153: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2154: COPY_BUF(l,buf,len,0x20);
2155: if (len > buf_size - 10) {
2156: growBuffer(buf);
1.129 daniel 2157: }
2158: } else {
1.198 daniel 2159: COPY_BUF(l,buf,len,c);
2160: if (len > buf_size - 10) {
2161: growBuffer(buf);
1.129 daniel 2162: }
2163: }
1.198 daniel 2164: NEXTL(l);
1.129 daniel 2165: }
1.198 daniel 2166: GROW;
2167: c = CUR_CHAR(l);
1.129 daniel 2168: }
1.198 daniel 2169: buf[len++] = 0;
1.152 daniel 2170: if (RAW == '<') {
1.230 veillard 2171: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.129 daniel 2172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2173: ctxt->sax->error(ctxt->userData,
2174: "Unescaped '<' not allowed in attributes values\n");
2175: ctxt->wellFormed = 0;
1.180 daniel 2176: ctxt->disableSAX = 1;
1.152 daniel 2177: } else if (RAW != limit) {
1.230 veillard 2178: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.129 daniel 2179: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2180: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2181: ctxt->wellFormed = 0;
1.180 daniel 2182: ctxt->disableSAX = 1;
1.129 daniel 2183: } else
2184: NEXT;
1.198 daniel 2185: return(buf);
1.29 daniel 2186: }
2187:
1.50 daniel 2188: /**
2189: * xmlParseSystemLiteral:
2190: * @ctxt: an XML parser context
2191: *
2192: * parse an XML Literal
1.21 daniel 2193: *
1.22 daniel 2194: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2195: *
2196: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2197: */
2198:
1.123 daniel 2199: xmlChar *
1.55 daniel 2200: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2201: xmlChar *buf = NULL;
2202: int len = 0;
1.140 daniel 2203: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2204: int cur, l;
1.135 daniel 2205: xmlChar stop;
1.168 daniel 2206: int state = ctxt->instate;
1.223 veillard 2207: int count = 0;
1.21 daniel 2208:
1.91 daniel 2209: SHRINK;
1.152 daniel 2210: if (RAW == '"') {
1.40 daniel 2211: NEXT;
1.135 daniel 2212: stop = '"';
1.152 daniel 2213: } else if (RAW == '\'') {
1.40 daniel 2214: NEXT;
1.135 daniel 2215: stop = '\'';
1.21 daniel 2216: } else {
1.230 veillard 2217: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2218: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2219: ctxt->sax->error(ctxt->userData,
2220: "SystemLiteral \" or ' expected\n");
1.59 daniel 2221: ctxt->wellFormed = 0;
1.180 daniel 2222: ctxt->disableSAX = 1;
1.135 daniel 2223: return(NULL);
1.21 daniel 2224: }
2225:
1.135 daniel 2226: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2227: if (buf == NULL) {
1.241 veillard 2228: xmlGenericError(xmlGenericErrorContext,
2229: "malloc of %d byte failed\n", size);
1.135 daniel 2230: return(NULL);
2231: }
1.168 daniel 2232: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 2233: cur = CUR_CHAR(l);
1.223 veillard 2234: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
1.152 daniel 2235: if (len + 5 >= size) {
1.135 daniel 2236: size *= 2;
1.204 veillard 2237: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2238: if (buf == NULL) {
1.241 veillard 2239: xmlGenericError(xmlGenericErrorContext,
2240: "realloc of %d byte failed\n", size);
1.204 veillard 2241: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2242: return(NULL);
2243: }
2244: }
1.223 veillard 2245: count++;
2246: if (count > 50) {
2247: GROW;
2248: count = 0;
2249: }
1.152 daniel 2250: COPY_BUF(l,buf,len,cur);
2251: NEXTL(l);
2252: cur = CUR_CHAR(l);
1.135 daniel 2253: if (cur == 0) {
2254: GROW;
2255: SHRINK;
1.152 daniel 2256: cur = CUR_CHAR(l);
1.135 daniel 2257: }
2258: }
2259: buf[len] = 0;
1.204 veillard 2260: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2261: if (!IS_CHAR(cur)) {
1.230 veillard 2262: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2263: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2264: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2265: ctxt->wellFormed = 0;
1.180 daniel 2266: ctxt->disableSAX = 1;
1.135 daniel 2267: } else {
2268: NEXT;
2269: }
2270: return(buf);
1.21 daniel 2271: }
2272:
1.50 daniel 2273: /**
2274: * xmlParsePubidLiteral:
2275: * @ctxt: an XML parser context
1.21 daniel 2276: *
1.50 daniel 2277: * parse an XML public literal
1.68 daniel 2278: *
2279: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2280: *
2281: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2282: */
2283:
1.123 daniel 2284: xmlChar *
1.55 daniel 2285: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2286: xmlChar *buf = NULL;
2287: int len = 0;
1.140 daniel 2288: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 2289: xmlChar cur;
2290: xmlChar stop;
1.223 veillard 2291: int count = 0;
1.125 daniel 2292:
1.91 daniel 2293: SHRINK;
1.152 daniel 2294: if (RAW == '"') {
1.40 daniel 2295: NEXT;
1.135 daniel 2296: stop = '"';
1.152 daniel 2297: } else if (RAW == '\'') {
1.40 daniel 2298: NEXT;
1.135 daniel 2299: stop = '\'';
1.21 daniel 2300: } else {
1.230 veillard 2301: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2302: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2303: ctxt->sax->error(ctxt->userData,
2304: "SystemLiteral \" or ' expected\n");
1.59 daniel 2305: ctxt->wellFormed = 0;
1.180 daniel 2306: ctxt->disableSAX = 1;
1.135 daniel 2307: return(NULL);
2308: }
2309: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2310: if (buf == NULL) {
1.241 veillard 2311: xmlGenericError(xmlGenericErrorContext,
2312: "malloc of %d byte failed\n", size);
1.135 daniel 2313: return(NULL);
2314: }
2315: cur = CUR;
1.223 veillard 2316: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
1.135 daniel 2317: if (len + 1 >= size) {
2318: size *= 2;
1.204 veillard 2319: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2320: if (buf == NULL) {
1.241 veillard 2321: xmlGenericError(xmlGenericErrorContext,
2322: "realloc of %d byte failed\n", size);
1.135 daniel 2323: return(NULL);
2324: }
2325: }
2326: buf[len++] = cur;
1.223 veillard 2327: count++;
2328: if (count > 50) {
2329: GROW;
2330: count = 0;
2331: }
1.135 daniel 2332: NEXT;
2333: cur = CUR;
2334: if (cur == 0) {
2335: GROW;
2336: SHRINK;
2337: cur = CUR;
2338: }
2339: }
2340: buf[len] = 0;
2341: if (cur != stop) {
1.230 veillard 2342: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2344: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2345: ctxt->wellFormed = 0;
1.180 daniel 2346: ctxt->disableSAX = 1;
1.135 daniel 2347: } else {
2348: NEXT;
1.21 daniel 2349: }
1.135 daniel 2350: return(buf);
1.21 daniel 2351: }
2352:
1.50 daniel 2353: /**
2354: * xmlParseCharData:
2355: * @ctxt: an XML parser context
2356: * @cdata: int indicating whether we are within a CDATA section
2357: *
2358: * parse a CharData section.
2359: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2360: *
1.151 daniel 2361: * The right angle bracket (>) may be represented using the string ">",
2362: * and must, for compatibility, be escaped using ">" or a character
2363: * reference when it appears in the string "]]>" in content, when that
2364: * string is not marking the end of a CDATA section.
2365: *
1.27 daniel 2366: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2367: */
2368:
1.55 daniel 2369: void
2370: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 2371: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 2372: int nbchar = 0;
1.152 daniel 2373: int cur, l;
1.223 veillard 2374: int count = 0;
1.27 daniel 2375:
1.91 daniel 2376: SHRINK;
1.223 veillard 2377: GROW;
1.152 daniel 2378: cur = CUR_CHAR(l);
1.223 veillard 2379: while (((cur != '<') || (ctxt->token == '<')) && /* checked */
1.190 daniel 2380: ((cur != '&') || (ctxt->token == '&')) &&
1.229 veillard 2381: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
1.97 daniel 2382: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2383: (NXT(2) == '>')) {
2384: if (cdata) break;
2385: else {
1.230 veillard 2386: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 2388: ctxt->sax->error(ctxt->userData,
1.59 daniel 2389: "Sequence ']]>' not allowed in content\n");
1.151 daniel 2390: /* Should this be relaxed ??? I see a "must here */
2391: ctxt->wellFormed = 0;
1.180 daniel 2392: ctxt->disableSAX = 1;
1.59 daniel 2393: }
2394: }
1.152 daniel 2395: COPY_BUF(l,buf,nbchar,cur);
2396: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 2397: /*
2398: * Ok the segment is to be consumed as chars.
2399: */
1.171 daniel 2400: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2401: if (areBlanks(ctxt, buf, nbchar)) {
2402: if (ctxt->sax->ignorableWhitespace != NULL)
2403: ctxt->sax->ignorableWhitespace(ctxt->userData,
2404: buf, nbchar);
2405: } else {
2406: if (ctxt->sax->characters != NULL)
2407: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2408: }
2409: }
2410: nbchar = 0;
2411: }
1.223 veillard 2412: count++;
2413: if (count > 50) {
2414: GROW;
2415: count = 0;
2416: }
1.152 daniel 2417: NEXTL(l);
2418: cur = CUR_CHAR(l);
1.27 daniel 2419: }
1.91 daniel 2420: if (nbchar != 0) {
2421: /*
2422: * Ok the segment is to be consumed as chars.
2423: */
1.171 daniel 2424: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2425: if (areBlanks(ctxt, buf, nbchar)) {
2426: if (ctxt->sax->ignorableWhitespace != NULL)
2427: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2428: } else {
2429: if (ctxt->sax->characters != NULL)
2430: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2431: }
2432: }
1.45 daniel 2433: }
1.27 daniel 2434: }
2435:
1.50 daniel 2436: /**
2437: * xmlParseExternalID:
2438: * @ctxt: an XML parser context
1.123 daniel 2439: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2440: * @strict: indicate whether we should restrict parsing to only
2441: * production [75], see NOTE below
1.50 daniel 2442: *
1.67 daniel 2443: * Parse an External ID or a Public ID
2444: *
2445: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2446: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2447: *
2448: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2449: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2450: *
2451: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2452: *
1.68 daniel 2453: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2454: * case publicID receives PubidLiteral, is strict is off
2455: * it is possible to return NULL and have publicID set.
1.22 daniel 2456: */
2457:
1.123 daniel 2458: xmlChar *
2459: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2460: xmlChar *URI = NULL;
1.22 daniel 2461:
1.91 daniel 2462: SHRINK;
1.152 daniel 2463: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 2464: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2465: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2466: SKIP(6);
1.59 daniel 2467: if (!IS_BLANK(CUR)) {
1.230 veillard 2468: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2470: ctxt->sax->error(ctxt->userData,
1.59 daniel 2471: "Space required after 'SYSTEM'\n");
2472: ctxt->wellFormed = 0;
1.180 daniel 2473: ctxt->disableSAX = 1;
1.59 daniel 2474: }
1.42 daniel 2475: SKIP_BLANKS;
1.39 daniel 2476: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2477: if (URI == NULL) {
1.230 veillard 2478: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2479: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2480: ctxt->sax->error(ctxt->userData,
1.39 daniel 2481: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2482: ctxt->wellFormed = 0;
1.180 daniel 2483: ctxt->disableSAX = 1;
1.59 daniel 2484: }
1.152 daniel 2485: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 2486: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2487: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2488: SKIP(6);
1.59 daniel 2489: if (!IS_BLANK(CUR)) {
1.230 veillard 2490: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2492: ctxt->sax->error(ctxt->userData,
1.59 daniel 2493: "Space required after 'PUBLIC'\n");
2494: ctxt->wellFormed = 0;
1.180 daniel 2495: ctxt->disableSAX = 1;
1.59 daniel 2496: }
1.42 daniel 2497: SKIP_BLANKS;
1.39 daniel 2498: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2499: if (*publicID == NULL) {
1.230 veillard 2500: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.55 daniel 2501: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2502: ctxt->sax->error(ctxt->userData,
1.39 daniel 2503: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2504: ctxt->wellFormed = 0;
1.180 daniel 2505: ctxt->disableSAX = 1;
1.59 daniel 2506: }
1.67 daniel 2507: if (strict) {
2508: /*
2509: * We don't handle [83] so "S SystemLiteral" is required.
2510: */
2511: if (!IS_BLANK(CUR)) {
1.230 veillard 2512: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2513: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2514: ctxt->sax->error(ctxt->userData,
1.67 daniel 2515: "Space required after the Public Identifier\n");
2516: ctxt->wellFormed = 0;
1.180 daniel 2517: ctxt->disableSAX = 1;
1.67 daniel 2518: }
2519: } else {
2520: /*
2521: * We handle [83] so we return immediately, if
2522: * "S SystemLiteral" is not detected. From a purely parsing
2523: * point of view that's a nice mess.
2524: */
1.135 daniel 2525: const xmlChar *ptr;
2526: GROW;
2527:
2528: ptr = CUR_PTR;
1.67 daniel 2529: if (!IS_BLANK(*ptr)) return(NULL);
2530:
1.223 veillard 2531: while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
1.173 daniel 2532: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 2533: }
1.42 daniel 2534: SKIP_BLANKS;
1.39 daniel 2535: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2536: if (URI == NULL) {
1.230 veillard 2537: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2538: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2539: ctxt->sax->error(ctxt->userData,
1.39 daniel 2540: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2541: ctxt->wellFormed = 0;
1.180 daniel 2542: ctxt->disableSAX = 1;
1.59 daniel 2543: }
1.22 daniel 2544: }
1.39 daniel 2545: return(URI);
1.22 daniel 2546: }
2547:
1.50 daniel 2548: /**
2549: * xmlParseComment:
1.69 daniel 2550: * @ctxt: an XML parser context
1.50 daniel 2551: *
1.3 veillard 2552: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2553: * The spec says that "For compatibility, the string "--" (double-hyphen)
2554: * must not occur within comments. "
1.22 daniel 2555: *
2556: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2557: */
1.72 daniel 2558: void
1.114 daniel 2559: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 2560: xmlChar *buf = NULL;
1.195 daniel 2561: int len;
1.140 daniel 2562: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2563: int q, ql;
2564: int r, rl;
2565: int cur, l;
1.140 daniel 2566: xmlParserInputState state;
1.187 daniel 2567: xmlParserInputPtr input = ctxt->input;
1.223 veillard 2568: int count = 0;
1.3 veillard 2569:
2570: /*
1.22 daniel 2571: * Check that there is a comment right here.
1.3 veillard 2572: */
1.152 daniel 2573: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 2574: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2575:
1.140 daniel 2576: state = ctxt->instate;
1.97 daniel 2577: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2578: SHRINK;
1.40 daniel 2579: SKIP(4);
1.135 daniel 2580: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2581: if (buf == NULL) {
1.241 veillard 2582: xmlGenericError(xmlGenericErrorContext,
2583: "malloc of %d byte failed\n", size);
1.140 daniel 2584: ctxt->instate = state;
1.135 daniel 2585: return;
2586: }
1.152 daniel 2587: q = CUR_CHAR(ql);
2588: NEXTL(ql);
2589: r = CUR_CHAR(rl);
2590: NEXTL(rl);
2591: cur = CUR_CHAR(l);
1.195 daniel 2592: len = 0;
1.223 veillard 2593: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2594: ((cur != '>') ||
2595: (r != '-') || (q != '-'))) {
1.195 daniel 2596: if ((r == '-') && (q == '-') && (len > 1)) {
1.230 veillard 2597: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.55 daniel 2598: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2599: ctxt->sax->error(ctxt->userData,
1.38 daniel 2600: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2601: ctxt->wellFormed = 0;
1.180 daniel 2602: ctxt->disableSAX = 1;
1.59 daniel 2603: }
1.152 daniel 2604: if (len + 5 >= size) {
1.135 daniel 2605: size *= 2;
1.204 veillard 2606: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2607: if (buf == NULL) {
1.241 veillard 2608: xmlGenericError(xmlGenericErrorContext,
2609: "realloc of %d byte failed\n", size);
1.140 daniel 2610: ctxt->instate = state;
1.135 daniel 2611: return;
2612: }
2613: }
1.152 daniel 2614: COPY_BUF(ql,buf,len,q);
1.135 daniel 2615: q = r;
1.152 daniel 2616: ql = rl;
1.135 daniel 2617: r = cur;
1.152 daniel 2618: rl = l;
1.223 veillard 2619:
2620: count++;
2621: if (count > 50) {
2622: GROW;
2623: count = 0;
2624: }
1.152 daniel 2625: NEXTL(l);
2626: cur = CUR_CHAR(l);
1.135 daniel 2627: if (cur == 0) {
2628: SHRINK;
2629: GROW;
1.152 daniel 2630: cur = CUR_CHAR(l);
1.135 daniel 2631: }
1.3 veillard 2632: }
1.135 daniel 2633: buf[len] = 0;
2634: if (!IS_CHAR(cur)) {
1.230 veillard 2635: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.55 daniel 2636: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2637: ctxt->sax->error(ctxt->userData,
1.135 daniel 2638: "Comment not terminated \n<!--%.50s\n", buf);
1.59 daniel 2639: ctxt->wellFormed = 0;
1.180 daniel 2640: ctxt->disableSAX = 1;
1.178 daniel 2641: xmlFree(buf);
1.3 veillard 2642: } else {
1.187 daniel 2643: if (input != ctxt->input) {
1.230 veillard 2644: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2646: ctxt->sax->error(ctxt->userData,
2647: "Comment doesn't start and stop in the same entity\n");
2648: ctxt->wellFormed = 0;
2649: ctxt->disableSAX = 1;
2650: }
1.40 daniel 2651: NEXT;
1.171 daniel 2652: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2653: (!ctxt->disableSAX))
1.135 daniel 2654: ctxt->sax->comment(ctxt->userData, buf);
2655: xmlFree(buf);
1.3 veillard 2656: }
1.140 daniel 2657: ctxt->instate = state;
1.3 veillard 2658: }
2659:
1.50 daniel 2660: /**
2661: * xmlParsePITarget:
2662: * @ctxt: an XML parser context
2663: *
2664: * parse the name of a PI
1.22 daniel 2665: *
2666: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2667: *
2668: * Returns the PITarget name or NULL
1.22 daniel 2669: */
2670:
1.123 daniel 2671: xmlChar *
1.55 daniel 2672: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2673: xmlChar *name;
1.22 daniel 2674:
2675: name = xmlParseName(ctxt);
1.139 daniel 2676: if ((name != NULL) &&
1.22 daniel 2677: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2678: ((name[1] == 'm') || (name[1] == 'M')) &&
2679: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 2680: int i;
1.177 daniel 2681: if ((name[0] == 'x') && (name[1] == 'm') &&
2682: (name[2] == 'l') && (name[3] == 0)) {
1.230 veillard 2683: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2684: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2685: ctxt->sax->error(ctxt->userData,
2686: "XML declaration allowed only at the start of the document\n");
2687: ctxt->wellFormed = 0;
1.180 daniel 2688: ctxt->disableSAX = 1;
1.151 daniel 2689: return(name);
2690: } else if (name[3] == 0) {
1.230 veillard 2691: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2692: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2693: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2694: ctxt->wellFormed = 0;
1.180 daniel 2695: ctxt->disableSAX = 1;
1.151 daniel 2696: return(name);
2697: }
1.139 daniel 2698: for (i = 0;;i++) {
2699: if (xmlW3CPIs[i] == NULL) break;
1.236 veillard 2700: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
1.139 daniel 2701: return(name);
2702: }
2703: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
1.230 veillard 2704: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.139 daniel 2705: ctxt->sax->warning(ctxt->userData,
1.122 daniel 2706: "xmlParsePItarget: invalid name prefix 'xml'\n");
2707: }
1.22 daniel 2708: }
2709: return(name);
2710: }
2711:
1.50 daniel 2712: /**
2713: * xmlParsePI:
2714: * @ctxt: an XML parser context
2715: *
2716: * parse an XML Processing Instruction.
1.22 daniel 2717: *
2718: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2719: *
1.69 daniel 2720: * The processing is transfered to SAX once parsed.
1.3 veillard 2721: */
2722:
1.55 daniel 2723: void
2724: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 2725: xmlChar *buf = NULL;
2726: int len = 0;
1.140 daniel 2727: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2728: int cur, l;
1.123 daniel 2729: xmlChar *target;
1.140 daniel 2730: xmlParserInputState state;
1.223 veillard 2731: int count = 0;
1.22 daniel 2732:
1.152 daniel 2733: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 2734: xmlParserInputPtr input = ctxt->input;
1.140 daniel 2735: state = ctxt->instate;
2736: ctxt->instate = XML_PARSER_PI;
1.3 veillard 2737: /*
2738: * this is a Processing Instruction.
2739: */
1.40 daniel 2740: SKIP(2);
1.91 daniel 2741: SHRINK;
1.3 veillard 2742:
2743: /*
1.22 daniel 2744: * Parse the target name and check for special support like
2745: * namespace.
1.3 veillard 2746: */
1.22 daniel 2747: target = xmlParsePITarget(ctxt);
2748: if (target != NULL) {
1.156 daniel 2749: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 2750: if (input != ctxt->input) {
1.230 veillard 2751: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2752: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2753: ctxt->sax->error(ctxt->userData,
2754: "PI declaration doesn't start and stop in the same entity\n");
2755: ctxt->wellFormed = 0;
2756: ctxt->disableSAX = 1;
2757: }
1.156 daniel 2758: SKIP(2);
2759:
2760: /*
2761: * SAX: PI detected.
2762: */
1.171 daniel 2763: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 2764: (ctxt->sax->processingInstruction != NULL))
2765: ctxt->sax->processingInstruction(ctxt->userData,
2766: target, NULL);
2767: ctxt->instate = state;
1.170 daniel 2768: xmlFree(target);
1.156 daniel 2769: return;
2770: }
1.135 daniel 2771: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2772: if (buf == NULL) {
1.241 veillard 2773: xmlGenericError(xmlGenericErrorContext,
2774: "malloc of %d byte failed\n", size);
1.140 daniel 2775: ctxt->instate = state;
1.135 daniel 2776: return;
2777: }
2778: cur = CUR;
2779: if (!IS_BLANK(cur)) {
1.230 veillard 2780: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2782: ctxt->sax->error(ctxt->userData,
2783: "xmlParsePI: PI %s space expected\n", target);
2784: ctxt->wellFormed = 0;
1.180 daniel 2785: ctxt->disableSAX = 1;
1.114 daniel 2786: }
2787: SKIP_BLANKS;
1.152 daniel 2788: cur = CUR_CHAR(l);
1.223 veillard 2789: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2790: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 2791: if (len + 5 >= size) {
1.135 daniel 2792: size *= 2;
1.204 veillard 2793: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2794: if (buf == NULL) {
1.241 veillard 2795: xmlGenericError(xmlGenericErrorContext,
2796: "realloc of %d byte failed\n", size);
1.140 daniel 2797: ctxt->instate = state;
1.135 daniel 2798: return;
2799: }
1.223 veillard 2800: }
2801: count++;
2802: if (count > 50) {
2803: GROW;
2804: count = 0;
1.135 daniel 2805: }
1.152 daniel 2806: COPY_BUF(l,buf,len,cur);
2807: NEXTL(l);
2808: cur = CUR_CHAR(l);
1.135 daniel 2809: if (cur == 0) {
2810: SHRINK;
2811: GROW;
1.152 daniel 2812: cur = CUR_CHAR(l);
1.135 daniel 2813: }
2814: }
2815: buf[len] = 0;
1.152 daniel 2816: if (cur != '?') {
1.230 veillard 2817: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2819: ctxt->sax->error(ctxt->userData,
1.72 daniel 2820: "xmlParsePI: PI %s never end ...\n", target);
2821: ctxt->wellFormed = 0;
1.180 daniel 2822: ctxt->disableSAX = 1;
1.22 daniel 2823: } else {
1.187 daniel 2824: if (input != ctxt->input) {
1.230 veillard 2825: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2827: ctxt->sax->error(ctxt->userData,
2828: "PI declaration doesn't start and stop in the same entity\n");
2829: ctxt->wellFormed = 0;
2830: ctxt->disableSAX = 1;
2831: }
1.72 daniel 2832: SKIP(2);
1.44 daniel 2833:
1.72 daniel 2834: /*
2835: * SAX: PI detected.
2836: */
1.171 daniel 2837: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 2838: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2839: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 2840: target, buf);
1.22 daniel 2841: }
1.135 daniel 2842: xmlFree(buf);
1.119 daniel 2843: xmlFree(target);
1.3 veillard 2844: } else {
1.230 veillard 2845: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.55 daniel 2846: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2847: ctxt->sax->error(ctxt->userData,
2848: "xmlParsePI : no target name\n");
1.59 daniel 2849: ctxt->wellFormed = 0;
1.180 daniel 2850: ctxt->disableSAX = 1;
1.22 daniel 2851: }
1.140 daniel 2852: ctxt->instate = state;
1.22 daniel 2853: }
2854: }
2855:
1.50 daniel 2856: /**
2857: * xmlParseNotationDecl:
2858: * @ctxt: an XML parser context
2859: *
2860: * parse a notation declaration
1.22 daniel 2861: *
2862: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2863: *
2864: * Hence there is actually 3 choices:
2865: * 'PUBLIC' S PubidLiteral
2866: * 'PUBLIC' S PubidLiteral S SystemLiteral
2867: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2868: *
1.67 daniel 2869: * See the NOTE on xmlParseExternalID().
1.22 daniel 2870: */
2871:
1.55 daniel 2872: void
2873: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2874: xmlChar *name;
2875: xmlChar *Pubid;
2876: xmlChar *Systemid;
1.22 daniel 2877:
1.152 daniel 2878: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2879: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2880: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2881: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2882: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 2883: xmlParserInputPtr input = ctxt->input;
1.91 daniel 2884: SHRINK;
1.40 daniel 2885: SKIP(10);
1.67 daniel 2886: if (!IS_BLANK(CUR)) {
1.230 veillard 2887: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2888: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2889: ctxt->sax->error(ctxt->userData,
2890: "Space required after '<!NOTATION'\n");
1.67 daniel 2891: ctxt->wellFormed = 0;
1.180 daniel 2892: ctxt->disableSAX = 1;
1.67 daniel 2893: return;
2894: }
2895: SKIP_BLANKS;
1.22 daniel 2896:
2897: name = xmlParseName(ctxt);
2898: if (name == NULL) {
1.230 veillard 2899: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.55 daniel 2900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2901: ctxt->sax->error(ctxt->userData,
2902: "NOTATION: Name expected here\n");
1.67 daniel 2903: ctxt->wellFormed = 0;
1.180 daniel 2904: ctxt->disableSAX = 1;
1.67 daniel 2905: return;
2906: }
2907: if (!IS_BLANK(CUR)) {
1.230 veillard 2908: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2910: ctxt->sax->error(ctxt->userData,
1.67 daniel 2911: "Space required after the NOTATION name'\n");
1.59 daniel 2912: ctxt->wellFormed = 0;
1.180 daniel 2913: ctxt->disableSAX = 1;
1.22 daniel 2914: return;
2915: }
1.42 daniel 2916: SKIP_BLANKS;
1.67 daniel 2917:
1.22 daniel 2918: /*
1.67 daniel 2919: * Parse the IDs.
1.22 daniel 2920: */
1.160 daniel 2921: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 2922: SKIP_BLANKS;
2923:
1.152 daniel 2924: if (RAW == '>') {
1.187 daniel 2925: if (input != ctxt->input) {
1.230 veillard 2926: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928: ctxt->sax->error(ctxt->userData,
2929: "Notation declaration doesn't start and stop in the same entity\n");
2930: ctxt->wellFormed = 0;
2931: ctxt->disableSAX = 1;
2932: }
1.40 daniel 2933: NEXT;
1.171 daniel 2934: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2935: (ctxt->sax->notationDecl != NULL))
1.74 daniel 2936: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2937: } else {
1.230 veillard 2938: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 2939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2940: ctxt->sax->error(ctxt->userData,
1.67 daniel 2941: "'>' required to close NOTATION declaration\n");
2942: ctxt->wellFormed = 0;
1.180 daniel 2943: ctxt->disableSAX = 1;
1.67 daniel 2944: }
1.119 daniel 2945: xmlFree(name);
2946: if (Systemid != NULL) xmlFree(Systemid);
2947: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 2948: }
2949: }
2950:
1.50 daniel 2951: /**
2952: * xmlParseEntityDecl:
2953: * @ctxt: an XML parser context
2954: *
2955: * parse <!ENTITY declarations
1.22 daniel 2956: *
2957: * [70] EntityDecl ::= GEDecl | PEDecl
2958: *
2959: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2960: *
2961: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2962: *
2963: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2964: *
2965: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2966: *
2967: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2968: *
2969: * [ VC: Notation Declared ]
1.116 daniel 2970: * The Name must match the declared name of a notation.
1.22 daniel 2971: */
2972:
1.55 daniel 2973: void
2974: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2975: xmlChar *name = NULL;
2976: xmlChar *value = NULL;
2977: xmlChar *URI = NULL, *literal = NULL;
2978: xmlChar *ndata = NULL;
1.39 daniel 2979: int isParameter = 0;
1.123 daniel 2980: xmlChar *orig = NULL;
1.22 daniel 2981:
1.94 daniel 2982: GROW;
1.152 daniel 2983: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2984: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2985: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2986: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 2987: xmlParserInputPtr input = ctxt->input;
1.96 daniel 2988: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2989: SHRINK;
1.40 daniel 2990: SKIP(8);
1.59 daniel 2991: if (!IS_BLANK(CUR)) {
1.230 veillard 2992: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2993: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2994: ctxt->sax->error(ctxt->userData,
2995: "Space required after '<!ENTITY'\n");
1.59 daniel 2996: ctxt->wellFormed = 0;
1.180 daniel 2997: ctxt->disableSAX = 1;
1.59 daniel 2998: }
2999: SKIP_BLANKS;
1.40 daniel 3000:
1.152 daniel 3001: if (RAW == '%') {
1.40 daniel 3002: NEXT;
1.59 daniel 3003: if (!IS_BLANK(CUR)) {
1.230 veillard 3004: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3005: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3006: ctxt->sax->error(ctxt->userData,
3007: "Space required after '%'\n");
1.59 daniel 3008: ctxt->wellFormed = 0;
1.180 daniel 3009: ctxt->disableSAX = 1;
1.59 daniel 3010: }
1.42 daniel 3011: SKIP_BLANKS;
1.39 daniel 3012: isParameter = 1;
1.22 daniel 3013: }
3014:
3015: name = xmlParseName(ctxt);
1.24 daniel 3016: if (name == NULL) {
1.230 veillard 3017: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3019: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3020: ctxt->wellFormed = 0;
1.180 daniel 3021: ctxt->disableSAX = 1;
1.24 daniel 3022: return;
3023: }
1.59 daniel 3024: if (!IS_BLANK(CUR)) {
1.230 veillard 3025: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3027: ctxt->sax->error(ctxt->userData,
1.59 daniel 3028: "Space required after the entity name\n");
3029: ctxt->wellFormed = 0;
1.180 daniel 3030: ctxt->disableSAX = 1;
1.59 daniel 3031: }
1.42 daniel 3032: SKIP_BLANKS;
1.24 daniel 3033:
1.22 daniel 3034: /*
1.68 daniel 3035: * handle the various case of definitions...
1.22 daniel 3036: */
1.39 daniel 3037: if (isParameter) {
1.225 veillard 3038: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3039: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3040: if (value) {
1.171 daniel 3041: if ((ctxt->sax != NULL) &&
3042: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3043: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3044: XML_INTERNAL_PARAMETER_ENTITY,
3045: NULL, NULL, value);
3046: }
1.225 veillard 3047: } else {
1.67 daniel 3048: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3049: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3050: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3052: ctxt->sax->error(ctxt->userData,
3053: "Entity value required\n");
3054: ctxt->wellFormed = 0;
1.180 daniel 3055: ctxt->disableSAX = 1;
1.169 daniel 3056: }
1.39 daniel 3057: if (URI) {
1.193 daniel 3058: xmlURIPtr uri;
3059:
3060: uri = xmlParseURI((const char *) URI);
3061: if (uri == NULL) {
1.230 veillard 3062: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3063: if ((ctxt->sax != NULL) &&
3064: (!ctxt->disableSAX) &&
3065: (ctxt->sax->error != NULL))
3066: ctxt->sax->error(ctxt->userData,
3067: "Invalid URI: %s\n", URI);
3068: ctxt->wellFormed = 0;
3069: } else {
3070: if (uri->fragment != NULL) {
1.230 veillard 3071: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3072: if ((ctxt->sax != NULL) &&
3073: (!ctxt->disableSAX) &&
3074: (ctxt->sax->error != NULL))
3075: ctxt->sax->error(ctxt->userData,
3076: "Fragment not allowed: %s\n", URI);
3077: ctxt->wellFormed = 0;
3078: } else {
3079: if ((ctxt->sax != NULL) &&
3080: (!ctxt->disableSAX) &&
3081: (ctxt->sax->entityDecl != NULL))
3082: ctxt->sax->entityDecl(ctxt->userData, name,
3083: XML_EXTERNAL_PARAMETER_ENTITY,
3084: literal, URI, NULL);
3085: }
3086: xmlFreeURI(uri);
3087: }
1.39 daniel 3088: }
1.24 daniel 3089: }
3090: } else {
1.152 daniel 3091: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3092: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 3093: if ((ctxt->sax != NULL) &&
3094: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3095: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3096: XML_INTERNAL_GENERAL_ENTITY,
3097: NULL, NULL, value);
3098: } else {
1.67 daniel 3099: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3100: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3101: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3103: ctxt->sax->error(ctxt->userData,
3104: "Entity value required\n");
3105: ctxt->wellFormed = 0;
1.180 daniel 3106: ctxt->disableSAX = 1;
1.169 daniel 3107: }
1.193 daniel 3108: if (URI) {
3109: xmlURIPtr uri;
3110:
3111: uri = xmlParseURI((const char *)URI);
3112: if (uri == NULL) {
1.230 veillard 3113: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3114: if ((ctxt->sax != NULL) &&
3115: (!ctxt->disableSAX) &&
3116: (ctxt->sax->error != NULL))
3117: ctxt->sax->error(ctxt->userData,
3118: "Invalid URI: %s\n", URI);
3119: ctxt->wellFormed = 0;
3120: } else {
3121: if (uri->fragment != NULL) {
1.230 veillard 3122: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3123: if ((ctxt->sax != NULL) &&
3124: (!ctxt->disableSAX) &&
3125: (ctxt->sax->error != NULL))
3126: ctxt->sax->error(ctxt->userData,
3127: "Fragment not allowed: %s\n", URI);
3128: ctxt->wellFormed = 0;
3129: }
3130: xmlFreeURI(uri);
3131: }
3132: }
1.152 daniel 3133: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.230 veillard 3134: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3136: ctxt->sax->error(ctxt->userData,
1.59 daniel 3137: "Space required before 'NDATA'\n");
3138: ctxt->wellFormed = 0;
1.180 daniel 3139: ctxt->disableSAX = 1;
1.59 daniel 3140: }
1.42 daniel 3141: SKIP_BLANKS;
1.152 daniel 3142: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 3143: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3144: (NXT(4) == 'A')) {
3145: SKIP(5);
1.59 daniel 3146: if (!IS_BLANK(CUR)) {
1.230 veillard 3147: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3149: ctxt->sax->error(ctxt->userData,
1.59 daniel 3150: "Space required after 'NDATA'\n");
3151: ctxt->wellFormed = 0;
1.180 daniel 3152: ctxt->disableSAX = 1;
1.59 daniel 3153: }
1.42 daniel 3154: SKIP_BLANKS;
1.24 daniel 3155: ndata = xmlParseName(ctxt);
1.171 daniel 3156: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 3157: (ctxt->sax->unparsedEntityDecl != NULL))
3158: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3159: literal, URI, ndata);
3160: } else {
1.171 daniel 3161: if ((ctxt->sax != NULL) &&
3162: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3163: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3164: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3165: literal, URI, NULL);
1.24 daniel 3166: }
3167: }
3168: }
1.42 daniel 3169: SKIP_BLANKS;
1.152 daniel 3170: if (RAW != '>') {
1.230 veillard 3171: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3173: ctxt->sax->error(ctxt->userData,
1.31 daniel 3174: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3175: ctxt->wellFormed = 0;
1.180 daniel 3176: ctxt->disableSAX = 1;
1.187 daniel 3177: } else {
3178: if (input != ctxt->input) {
1.230 veillard 3179: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3180: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3181: ctxt->sax->error(ctxt->userData,
3182: "Entity declaration doesn't start and stop in the same entity\n");
3183: ctxt->wellFormed = 0;
3184: ctxt->disableSAX = 1;
3185: }
1.40 daniel 3186: NEXT;
1.187 daniel 3187: }
1.78 daniel 3188: if (orig != NULL) {
3189: /*
1.98 daniel 3190: * Ugly mechanism to save the raw entity value.
1.78 daniel 3191: */
3192: xmlEntityPtr cur = NULL;
3193:
1.98 daniel 3194: if (isParameter) {
3195: if ((ctxt->sax != NULL) &&
3196: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3197: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3198: } else {
3199: if ((ctxt->sax != NULL) &&
3200: (ctxt->sax->getEntity != NULL))
1.120 daniel 3201: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3202: }
3203: if (cur != NULL) {
3204: if (cur->orig != NULL)
1.119 daniel 3205: xmlFree(orig);
1.98 daniel 3206: else
3207: cur->orig = orig;
3208: } else
1.119 daniel 3209: xmlFree(orig);
1.78 daniel 3210: }
1.119 daniel 3211: if (name != NULL) xmlFree(name);
3212: if (value != NULL) xmlFree(value);
3213: if (URI != NULL) xmlFree(URI);
3214: if (literal != NULL) xmlFree(literal);
3215: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3216: }
3217: }
3218:
1.50 daniel 3219: /**
1.59 daniel 3220: * xmlParseDefaultDecl:
3221: * @ctxt: an XML parser context
3222: * @value: Receive a possible fixed default value for the attribute
3223: *
3224: * Parse an attribute default declaration
3225: *
3226: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3227: *
1.99 daniel 3228: * [ VC: Required Attribute ]
1.117 daniel 3229: * if the default declaration is the keyword #REQUIRED, then the
3230: * attribute must be specified for all elements of the type in the
3231: * attribute-list declaration.
1.99 daniel 3232: *
3233: * [ VC: Attribute Default Legal ]
1.102 daniel 3234: * The declared default value must meet the lexical constraints of
3235: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3236: *
3237: * [ VC: Fixed Attribute Default ]
1.117 daniel 3238: * if an attribute has a default value declared with the #FIXED
3239: * keyword, instances of that attribute must match the default value.
1.99 daniel 3240: *
3241: * [ WFC: No < in Attribute Values ]
3242: * handled in xmlParseAttValue()
3243: *
1.59 daniel 3244: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3245: * or XML_ATTRIBUTE_FIXED.
3246: */
3247:
3248: int
1.123 daniel 3249: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3250: int val;
1.123 daniel 3251: xmlChar *ret;
1.59 daniel 3252:
3253: *value = NULL;
1.152 daniel 3254: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 3255: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3256: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3257: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3258: (NXT(8) == 'D')) {
3259: SKIP(9);
3260: return(XML_ATTRIBUTE_REQUIRED);
3261: }
1.152 daniel 3262: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 3263: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3264: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3265: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3266: SKIP(8);
3267: return(XML_ATTRIBUTE_IMPLIED);
3268: }
3269: val = XML_ATTRIBUTE_NONE;
1.152 daniel 3270: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 3271: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3272: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3273: SKIP(6);
3274: val = XML_ATTRIBUTE_FIXED;
3275: if (!IS_BLANK(CUR)) {
1.230 veillard 3276: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3277: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3278: ctxt->sax->error(ctxt->userData,
3279: "Space required after '#FIXED'\n");
1.59 daniel 3280: ctxt->wellFormed = 0;
1.180 daniel 3281: ctxt->disableSAX = 1;
1.59 daniel 3282: }
3283: SKIP_BLANKS;
3284: }
3285: ret = xmlParseAttValue(ctxt);
1.96 daniel 3286: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3287: if (ret == NULL) {
3288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3289: ctxt->sax->error(ctxt->userData,
1.59 daniel 3290: "Attribute default value declaration error\n");
3291: ctxt->wellFormed = 0;
1.180 daniel 3292: ctxt->disableSAX = 1;
1.59 daniel 3293: } else
3294: *value = ret;
3295: return(val);
3296: }
3297:
3298: /**
1.66 daniel 3299: * xmlParseNotationType:
3300: * @ctxt: an XML parser context
3301: *
3302: * parse an Notation attribute type.
3303: *
1.99 daniel 3304: * Note: the leading 'NOTATION' S part has already being parsed...
3305: *
1.66 daniel 3306: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3307: *
1.99 daniel 3308: * [ VC: Notation Attributes ]
1.117 daniel 3309: * Values of this type must match one of the notation names included
1.99 daniel 3310: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3311: *
3312: * Returns: the notation attribute tree built while parsing
3313: */
3314:
3315: xmlEnumerationPtr
3316: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3317: xmlChar *name;
1.66 daniel 3318: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3319:
1.152 daniel 3320: if (RAW != '(') {
1.230 veillard 3321: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3323: ctxt->sax->error(ctxt->userData,
3324: "'(' required to start 'NOTATION'\n");
1.66 daniel 3325: ctxt->wellFormed = 0;
1.180 daniel 3326: ctxt->disableSAX = 1;
1.66 daniel 3327: return(NULL);
3328: }
1.91 daniel 3329: SHRINK;
1.66 daniel 3330: do {
3331: NEXT;
3332: SKIP_BLANKS;
3333: name = xmlParseName(ctxt);
3334: if (name == NULL) {
1.230 veillard 3335: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3337: ctxt->sax->error(ctxt->userData,
1.66 daniel 3338: "Name expected in NOTATION declaration\n");
3339: ctxt->wellFormed = 0;
1.180 daniel 3340: ctxt->disableSAX = 1;
1.66 daniel 3341: return(ret);
3342: }
3343: cur = xmlCreateEnumeration(name);
1.119 daniel 3344: xmlFree(name);
1.66 daniel 3345: if (cur == NULL) return(ret);
3346: if (last == NULL) ret = last = cur;
3347: else {
3348: last->next = cur;
3349: last = cur;
3350: }
3351: SKIP_BLANKS;
1.152 daniel 3352: } while (RAW == '|');
3353: if (RAW != ')') {
1.230 veillard 3354: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3356: ctxt->sax->error(ctxt->userData,
1.66 daniel 3357: "')' required to finish NOTATION declaration\n");
3358: ctxt->wellFormed = 0;
1.180 daniel 3359: ctxt->disableSAX = 1;
1.170 daniel 3360: if ((last != NULL) && (last != ret))
3361: xmlFreeEnumeration(last);
1.66 daniel 3362: return(ret);
3363: }
3364: NEXT;
3365: return(ret);
3366: }
3367:
3368: /**
3369: * xmlParseEnumerationType:
3370: * @ctxt: an XML parser context
3371: *
3372: * parse an Enumeration attribute type.
3373: *
3374: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3375: *
1.99 daniel 3376: * [ VC: Enumeration ]
1.117 daniel 3377: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3378: * the declaration
3379: *
1.66 daniel 3380: * Returns: the enumeration attribute tree built while parsing
3381: */
3382:
3383: xmlEnumerationPtr
3384: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3385: xmlChar *name;
1.66 daniel 3386: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3387:
1.152 daniel 3388: if (RAW != '(') {
1.230 veillard 3389: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3391: ctxt->sax->error(ctxt->userData,
1.66 daniel 3392: "'(' required to start ATTLIST enumeration\n");
3393: ctxt->wellFormed = 0;
1.180 daniel 3394: ctxt->disableSAX = 1;
1.66 daniel 3395: return(NULL);
3396: }
1.91 daniel 3397: SHRINK;
1.66 daniel 3398: do {
3399: NEXT;
3400: SKIP_BLANKS;
3401: name = xmlParseNmtoken(ctxt);
3402: if (name == NULL) {
1.230 veillard 3403: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3405: ctxt->sax->error(ctxt->userData,
1.66 daniel 3406: "NmToken expected in ATTLIST enumeration\n");
3407: ctxt->wellFormed = 0;
1.180 daniel 3408: ctxt->disableSAX = 1;
1.66 daniel 3409: return(ret);
3410: }
3411: cur = xmlCreateEnumeration(name);
1.119 daniel 3412: xmlFree(name);
1.66 daniel 3413: if (cur == NULL) return(ret);
3414: if (last == NULL) ret = last = cur;
3415: else {
3416: last->next = cur;
3417: last = cur;
3418: }
3419: SKIP_BLANKS;
1.152 daniel 3420: } while (RAW == '|');
3421: if (RAW != ')') {
1.230 veillard 3422: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3424: ctxt->sax->error(ctxt->userData,
1.66 daniel 3425: "')' required to finish ATTLIST enumeration\n");
3426: ctxt->wellFormed = 0;
1.180 daniel 3427: ctxt->disableSAX = 1;
1.66 daniel 3428: return(ret);
3429: }
3430: NEXT;
3431: return(ret);
3432: }
3433:
3434: /**
1.50 daniel 3435: * xmlParseEnumeratedType:
3436: * @ctxt: an XML parser context
1.66 daniel 3437: * @tree: the enumeration tree built while parsing
1.50 daniel 3438: *
1.66 daniel 3439: * parse an Enumerated attribute type.
1.22 daniel 3440: *
3441: * [57] EnumeratedType ::= NotationType | Enumeration
3442: *
3443: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3444: *
1.50 daniel 3445: *
1.66 daniel 3446: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3447: */
3448:
1.66 daniel 3449: int
3450: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 3451: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 3452: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3453: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3454: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3455: SKIP(8);
3456: if (!IS_BLANK(CUR)) {
1.230 veillard 3457: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3458: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3459: ctxt->sax->error(ctxt->userData,
3460: "Space required after 'NOTATION'\n");
1.66 daniel 3461: ctxt->wellFormed = 0;
1.180 daniel 3462: ctxt->disableSAX = 1;
1.66 daniel 3463: return(0);
3464: }
3465: SKIP_BLANKS;
3466: *tree = xmlParseNotationType(ctxt);
3467: if (*tree == NULL) return(0);
3468: return(XML_ATTRIBUTE_NOTATION);
3469: }
3470: *tree = xmlParseEnumerationType(ctxt);
3471: if (*tree == NULL) return(0);
3472: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3473: }
3474:
1.50 daniel 3475: /**
3476: * xmlParseAttributeType:
3477: * @ctxt: an XML parser context
1.66 daniel 3478: * @tree: the enumeration tree built while parsing
1.50 daniel 3479: *
1.59 daniel 3480: * parse the Attribute list def for an element
1.22 daniel 3481: *
3482: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3483: *
3484: * [55] StringType ::= 'CDATA'
3485: *
3486: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3487: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3488: *
1.102 daniel 3489: * Validity constraints for attribute values syntax are checked in
3490: * xmlValidateAttributeValue()
3491: *
1.99 daniel 3492: * [ VC: ID ]
1.117 daniel 3493: * Values of type ID must match the Name production. A name must not
1.99 daniel 3494: * appear more than once in an XML document as a value of this type;
3495: * i.e., ID values must uniquely identify the elements which bear them.
3496: *
3497: * [ VC: One ID per Element Type ]
1.117 daniel 3498: * No element type may have more than one ID attribute specified.
1.99 daniel 3499: *
3500: * [ VC: ID Attribute Default ]
1.117 daniel 3501: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3502: *
3503: * [ VC: IDREF ]
1.102 daniel 3504: * Values of type IDREF must match the Name production, and values
1.140 daniel 3505: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 3506: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3507: * values must match the value of some ID attribute.
3508: *
3509: * [ VC: Entity Name ]
1.102 daniel 3510: * Values of type ENTITY must match the Name production, values
1.140 daniel 3511: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 3512: * name of an unparsed entity declared in the DTD.
1.99 daniel 3513: *
3514: * [ VC: Name Token ]
1.102 daniel 3515: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3516: * of type NMTOKENS must match Nmtokens.
3517: *
1.69 daniel 3518: * Returns the attribute type
1.22 daniel 3519: */
1.59 daniel 3520: int
1.66 daniel 3521: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3522: SHRINK;
1.152 daniel 3523: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 3524: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3525: (NXT(4) == 'A')) {
3526: SKIP(5);
1.66 daniel 3527: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 3528: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 3529: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3530: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3531: SKIP(6);
3532: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 3533: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 3534: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3535: (NXT(4) == 'F')) {
3536: SKIP(5);
1.59 daniel 3537: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 3538: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 3539: SKIP(2);
3540: return(XML_ATTRIBUTE_ID);
1.152 daniel 3541: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3542: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3543: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3544: SKIP(6);
1.59 daniel 3545: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 3546: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3547: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3548: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3549: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3550: SKIP(8);
1.59 daniel 3551: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 3552: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 3553: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3554: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3555: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3556: SKIP(8);
3557: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 3558: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 3559: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3560: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3561: (NXT(6) == 'N')) {
3562: SKIP(7);
1.59 daniel 3563: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3564: }
1.66 daniel 3565: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3566: }
3567:
1.50 daniel 3568: /**
3569: * xmlParseAttributeListDecl:
3570: * @ctxt: an XML parser context
3571: *
3572: * : parse the Attribute list def for an element
1.22 daniel 3573: *
3574: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3575: *
3576: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3577: *
1.22 daniel 3578: */
1.55 daniel 3579: void
3580: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3581: xmlChar *elemName;
3582: xmlChar *attrName;
1.103 daniel 3583: xmlEnumerationPtr tree;
1.22 daniel 3584:
1.152 daniel 3585: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3586: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3587: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3588: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3589: (NXT(8) == 'T')) {
1.187 daniel 3590: xmlParserInputPtr input = ctxt->input;
3591:
1.40 daniel 3592: SKIP(9);
1.59 daniel 3593: if (!IS_BLANK(CUR)) {
1.230 veillard 3594: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3595: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3596: ctxt->sax->error(ctxt->userData,
3597: "Space required after '<!ATTLIST'\n");
1.59 daniel 3598: ctxt->wellFormed = 0;
1.180 daniel 3599: ctxt->disableSAX = 1;
1.59 daniel 3600: }
1.42 daniel 3601: SKIP_BLANKS;
1.59 daniel 3602: elemName = xmlParseName(ctxt);
3603: if (elemName == NULL) {
1.230 veillard 3604: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3606: ctxt->sax->error(ctxt->userData,
3607: "ATTLIST: no name for Element\n");
1.59 daniel 3608: ctxt->wellFormed = 0;
1.180 daniel 3609: ctxt->disableSAX = 1;
1.22 daniel 3610: return;
3611: }
1.42 daniel 3612: SKIP_BLANKS;
1.220 veillard 3613: GROW;
1.152 daniel 3614: while (RAW != '>') {
1.123 daniel 3615: const xmlChar *check = CUR_PTR;
1.59 daniel 3616: int type;
3617: int def;
1.123 daniel 3618: xmlChar *defaultValue = NULL;
1.59 daniel 3619:
1.220 veillard 3620: GROW;
1.103 daniel 3621: tree = NULL;
1.59 daniel 3622: attrName = xmlParseName(ctxt);
3623: if (attrName == NULL) {
1.230 veillard 3624: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3626: ctxt->sax->error(ctxt->userData,
3627: "ATTLIST: no name for Attribute\n");
1.59 daniel 3628: ctxt->wellFormed = 0;
1.180 daniel 3629: ctxt->disableSAX = 1;
1.59 daniel 3630: break;
3631: }
1.97 daniel 3632: GROW;
1.59 daniel 3633: if (!IS_BLANK(CUR)) {
1.230 veillard 3634: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3636: ctxt->sax->error(ctxt->userData,
1.59 daniel 3637: "Space required after the attribute name\n");
3638: ctxt->wellFormed = 0;
1.180 daniel 3639: ctxt->disableSAX = 1;
1.170 daniel 3640: if (attrName != NULL)
3641: xmlFree(attrName);
3642: if (defaultValue != NULL)
3643: xmlFree(defaultValue);
1.59 daniel 3644: break;
3645: }
3646: SKIP_BLANKS;
3647:
1.66 daniel 3648: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 3649: if (type <= 0) {
3650: if (attrName != NULL)
3651: xmlFree(attrName);
3652: if (defaultValue != NULL)
3653: xmlFree(defaultValue);
3654: break;
3655: }
1.22 daniel 3656:
1.97 daniel 3657: GROW;
1.59 daniel 3658: if (!IS_BLANK(CUR)) {
1.230 veillard 3659: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3661: ctxt->sax->error(ctxt->userData,
1.59 daniel 3662: "Space required after the attribute type\n");
3663: ctxt->wellFormed = 0;
1.180 daniel 3664: ctxt->disableSAX = 1;
1.170 daniel 3665: if (attrName != NULL)
3666: xmlFree(attrName);
3667: if (defaultValue != NULL)
3668: xmlFree(defaultValue);
3669: if (tree != NULL)
3670: xmlFreeEnumeration(tree);
1.59 daniel 3671: break;
3672: }
1.42 daniel 3673: SKIP_BLANKS;
1.59 daniel 3674:
3675: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 3676: if (def <= 0) {
3677: if (attrName != NULL)
3678: xmlFree(attrName);
3679: if (defaultValue != NULL)
3680: xmlFree(defaultValue);
3681: if (tree != NULL)
3682: xmlFreeEnumeration(tree);
3683: break;
3684: }
1.59 daniel 3685:
1.97 daniel 3686: GROW;
1.152 daniel 3687: if (RAW != '>') {
1.59 daniel 3688: if (!IS_BLANK(CUR)) {
1.230 veillard 3689: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3691: ctxt->sax->error(ctxt->userData,
1.59 daniel 3692: "Space required after the attribute default value\n");
3693: ctxt->wellFormed = 0;
1.180 daniel 3694: ctxt->disableSAX = 1;
1.170 daniel 3695: if (attrName != NULL)
3696: xmlFree(attrName);
3697: if (defaultValue != NULL)
3698: xmlFree(defaultValue);
3699: if (tree != NULL)
3700: xmlFreeEnumeration(tree);
1.59 daniel 3701: break;
3702: }
3703: SKIP_BLANKS;
3704: }
1.40 daniel 3705: if (check == CUR_PTR) {
1.230 veillard 3706: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3707: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3708: ctxt->sax->error(ctxt->userData,
1.59 daniel 3709: "xmlParseAttributeListDecl: detected internal error\n");
1.170 daniel 3710: if (attrName != NULL)
3711: xmlFree(attrName);
3712: if (defaultValue != NULL)
3713: xmlFree(defaultValue);
3714: if (tree != NULL)
3715: xmlFreeEnumeration(tree);
1.22 daniel 3716: break;
3717: }
1.171 daniel 3718: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3719: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3720: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3721: type, def, defaultValue, tree);
1.59 daniel 3722: if (attrName != NULL)
1.119 daniel 3723: xmlFree(attrName);
1.59 daniel 3724: if (defaultValue != NULL)
1.119 daniel 3725: xmlFree(defaultValue);
1.97 daniel 3726: GROW;
1.22 daniel 3727: }
1.187 daniel 3728: if (RAW == '>') {
3729: if (input != ctxt->input) {
1.230 veillard 3730: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3731: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3732: ctxt->sax->error(ctxt->userData,
3733: "Attribute list declaration doesn't start and stop in the same entity\n");
3734: ctxt->wellFormed = 0;
3735: ctxt->disableSAX = 1;
3736: }
1.40 daniel 3737: NEXT;
1.187 daniel 3738: }
1.22 daniel 3739:
1.119 daniel 3740: xmlFree(elemName);
1.22 daniel 3741: }
3742: }
3743:
1.50 daniel 3744: /**
1.61 daniel 3745: * xmlParseElementMixedContentDecl:
3746: * @ctxt: an XML parser context
3747: *
3748: * parse the declaration for a Mixed Element content
3749: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3750: *
3751: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3752: * '(' S? '#PCDATA' S? ')'
3753: *
1.99 daniel 3754: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3755: *
3756: * [ VC: No Duplicate Types ]
1.117 daniel 3757: * The same name must not appear more than once in a single
3758: * mixed-content declaration.
1.99 daniel 3759: *
1.61 daniel 3760: * returns: the list of the xmlElementContentPtr describing the element choices
3761: */
3762: xmlElementContentPtr
1.62 daniel 3763: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3764: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3765: xmlChar *elem = NULL;
1.61 daniel 3766:
1.97 daniel 3767: GROW;
1.152 daniel 3768: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 3769: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3770: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3771: (NXT(6) == 'A')) {
3772: SKIP(7);
3773: SKIP_BLANKS;
1.91 daniel 3774: SHRINK;
1.152 daniel 3775: if (RAW == ')') {
1.187 daniel 3776: ctxt->entity = ctxt->input;
1.63 daniel 3777: NEXT;
3778: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 3779: if (RAW == '*') {
1.136 daniel 3780: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3781: NEXT;
3782: }
1.63 daniel 3783: return(ret);
3784: }
1.152 daniel 3785: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 3786: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3787: if (ret == NULL) return(NULL);
1.99 daniel 3788: }
1.152 daniel 3789: while (RAW == '|') {
1.64 daniel 3790: NEXT;
1.61 daniel 3791: if (elem == NULL) {
3792: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3793: if (ret == NULL) return(NULL);
3794: ret->c1 = cur;
1.64 daniel 3795: cur = ret;
1.61 daniel 3796: } else {
1.64 daniel 3797: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3798: if (n == NULL) return(NULL);
3799: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3800: cur->c2 = n;
3801: cur = n;
1.119 daniel 3802: xmlFree(elem);
1.61 daniel 3803: }
3804: SKIP_BLANKS;
3805: elem = xmlParseName(ctxt);
3806: if (elem == NULL) {
1.230 veillard 3807: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3809: ctxt->sax->error(ctxt->userData,
1.61 daniel 3810: "xmlParseElementMixedContentDecl : Name expected\n");
3811: ctxt->wellFormed = 0;
1.180 daniel 3812: ctxt->disableSAX = 1;
1.61 daniel 3813: xmlFreeElementContent(cur);
3814: return(NULL);
3815: }
3816: SKIP_BLANKS;
1.97 daniel 3817: GROW;
1.61 daniel 3818: }
1.152 daniel 3819: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 3820: if (elem != NULL) {
1.61 daniel 3821: cur->c2 = xmlNewElementContent(elem,
3822: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3823: xmlFree(elem);
1.66 daniel 3824: }
1.65 daniel 3825: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 3826: ctxt->entity = ctxt->input;
1.64 daniel 3827: SKIP(2);
1.61 daniel 3828: } else {
1.119 daniel 3829: if (elem != NULL) xmlFree(elem);
1.230 veillard 3830: xmlFreeElementContent(ret);
3831: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3833: ctxt->sax->error(ctxt->userData,
1.63 daniel 3834: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3835: ctxt->wellFormed = 0;
1.180 daniel 3836: ctxt->disableSAX = 1;
1.61 daniel 3837: return(NULL);
3838: }
3839:
3840: } else {
1.230 veillard 3841: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3842: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3843: ctxt->sax->error(ctxt->userData,
1.61 daniel 3844: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3845: ctxt->wellFormed = 0;
1.180 daniel 3846: ctxt->disableSAX = 1;
1.61 daniel 3847: }
3848: return(ret);
3849: }
3850:
3851: /**
3852: * xmlParseElementChildrenContentDecl:
1.50 daniel 3853: * @ctxt: an XML parser context
3854: *
1.61 daniel 3855: * parse the declaration for a Mixed Element content
3856: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3857: *
1.61 daniel 3858: *
1.22 daniel 3859: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3860: *
3861: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3862: *
3863: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3864: *
3865: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3866: *
1.99 daniel 3867: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3868: * TODO Parameter-entity replacement text must be properly nested
3869: * with parenthetized groups. That is to say, if either of the
3870: * opening or closing parentheses in a choice, seq, or Mixed
3871: * construct is contained in the replacement text for a parameter
3872: * entity, both must be contained in the same replacement text. For
3873: * interoperability, if a parameter-entity reference appears in a
3874: * choice, seq, or Mixed construct, its replacement text should not
3875: * be empty, and neither the first nor last non-blank character of
3876: * the replacement text should be a connector (| or ,).
3877: *
1.62 daniel 3878: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3879: * hierarchy.
3880: */
3881: xmlElementContentPtr
1.62 daniel 3882: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3883: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 3884: xmlChar *elem;
3885: xmlChar type = 0;
1.62 daniel 3886:
3887: SKIP_BLANKS;
1.94 daniel 3888: GROW;
1.152 daniel 3889: if (RAW == '(') {
1.63 daniel 3890: /* Recurse on first child */
1.62 daniel 3891: NEXT;
3892: SKIP_BLANKS;
3893: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3894: SKIP_BLANKS;
1.101 daniel 3895: GROW;
1.62 daniel 3896: } else {
3897: elem = xmlParseName(ctxt);
3898: if (elem == NULL) {
1.230 veillard 3899: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3901: ctxt->sax->error(ctxt->userData,
1.62 daniel 3902: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3903: ctxt->wellFormed = 0;
1.180 daniel 3904: ctxt->disableSAX = 1;
1.62 daniel 3905: return(NULL);
3906: }
3907: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3908: GROW;
1.152 daniel 3909: if (RAW == '?') {
1.104 daniel 3910: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3911: NEXT;
1.152 daniel 3912: } else if (RAW == '*') {
1.104 daniel 3913: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3914: NEXT;
1.152 daniel 3915: } else if (RAW == '+') {
1.104 daniel 3916: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3917: NEXT;
3918: } else {
1.104 daniel 3919: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3920: }
1.119 daniel 3921: xmlFree(elem);
1.101 daniel 3922: GROW;
1.62 daniel 3923: }
3924: SKIP_BLANKS;
1.91 daniel 3925: SHRINK;
1.152 daniel 3926: while (RAW != ')') {
1.63 daniel 3927: /*
3928: * Each loop we parse one separator and one element.
3929: */
1.152 daniel 3930: if (RAW == ',') {
1.62 daniel 3931: if (type == 0) type = CUR;
3932:
3933: /*
3934: * Detect "Name | Name , Name" error
3935: */
3936: else if (type != CUR) {
1.230 veillard 3937: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3939: ctxt->sax->error(ctxt->userData,
1.62 daniel 3940: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3941: type);
3942: ctxt->wellFormed = 0;
1.180 daniel 3943: ctxt->disableSAX = 1;
1.170 daniel 3944: if ((op != NULL) && (op != ret))
3945: xmlFreeElementContent(op);
1.211 veillard 3946: if ((last != NULL) && (last != ret) &&
3947: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3948: xmlFreeElementContent(last);
3949: if (ret != NULL)
3950: xmlFreeElementContent(ret);
1.62 daniel 3951: return(NULL);
3952: }
1.64 daniel 3953: NEXT;
1.62 daniel 3954:
1.63 daniel 3955: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3956: if (op == NULL) {
3957: xmlFreeElementContent(ret);
3958: return(NULL);
3959: }
3960: if (last == NULL) {
3961: op->c1 = ret;
1.65 daniel 3962: ret = cur = op;
1.63 daniel 3963: } else {
3964: cur->c2 = op;
3965: op->c1 = last;
3966: cur =op;
1.65 daniel 3967: last = NULL;
1.63 daniel 3968: }
1.152 daniel 3969: } else if (RAW == '|') {
1.62 daniel 3970: if (type == 0) type = CUR;
3971:
3972: /*
1.63 daniel 3973: * Detect "Name , Name | Name" error
1.62 daniel 3974: */
3975: else if (type != CUR) {
1.230 veillard 3976: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3978: ctxt->sax->error(ctxt->userData,
1.62 daniel 3979: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3980: type);
3981: ctxt->wellFormed = 0;
1.180 daniel 3982: ctxt->disableSAX = 1;
1.211 veillard 3983: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 3984: xmlFreeElementContent(op);
1.211 veillard 3985: if ((last != NULL) && (last != ret) &&
3986: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3987: xmlFreeElementContent(last);
3988: if (ret != NULL)
3989: xmlFreeElementContent(ret);
1.62 daniel 3990: return(NULL);
3991: }
1.64 daniel 3992: NEXT;
1.62 daniel 3993:
1.63 daniel 3994: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3995: if (op == NULL) {
1.170 daniel 3996: if ((op != NULL) && (op != ret))
3997: xmlFreeElementContent(op);
1.211 veillard 3998: if ((last != NULL) && (last != ret) &&
3999: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4000: xmlFreeElementContent(last);
4001: if (ret != NULL)
4002: xmlFreeElementContent(ret);
1.63 daniel 4003: return(NULL);
4004: }
4005: if (last == NULL) {
4006: op->c1 = ret;
1.65 daniel 4007: ret = cur = op;
1.63 daniel 4008: } else {
4009: cur->c2 = op;
4010: op->c1 = last;
4011: cur =op;
1.65 daniel 4012: last = NULL;
1.63 daniel 4013: }
1.62 daniel 4014: } else {
1.230 veillard 4015: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 4016: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4017: ctxt->sax->error(ctxt->userData,
1.62 daniel 4018: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4019: ctxt->wellFormed = 0;
1.180 daniel 4020: ctxt->disableSAX = 1;
1.170 daniel 4021: if ((op != NULL) && (op != ret))
4022: xmlFreeElementContent(op);
1.211 veillard 4023: if ((last != NULL) && (last != ret) &&
4024: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4025: xmlFreeElementContent(last);
4026: if (ret != NULL)
4027: xmlFreeElementContent(ret);
1.62 daniel 4028: return(NULL);
4029: }
1.101 daniel 4030: GROW;
1.62 daniel 4031: SKIP_BLANKS;
1.101 daniel 4032: GROW;
1.152 daniel 4033: if (RAW == '(') {
1.63 daniel 4034: /* Recurse on second child */
1.62 daniel 4035: NEXT;
4036: SKIP_BLANKS;
1.65 daniel 4037: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4038: SKIP_BLANKS;
4039: } else {
4040: elem = xmlParseName(ctxt);
4041: if (elem == NULL) {
1.230 veillard 4042: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4044: ctxt->sax->error(ctxt->userData,
1.122 daniel 4045: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.62 daniel 4046: ctxt->wellFormed = 0;
1.180 daniel 4047: ctxt->disableSAX = 1;
1.170 daniel 4048: if ((op != NULL) && (op != ret))
4049: xmlFreeElementContent(op);
1.211 veillard 4050: if ((last != NULL) && (last != ret) &&
4051: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4052: xmlFreeElementContent(last);
4053: if (ret != NULL)
4054: xmlFreeElementContent(ret);
1.62 daniel 4055: return(NULL);
4056: }
1.65 daniel 4057: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4058: xmlFree(elem);
1.152 daniel 4059: if (RAW == '?') {
1.105 daniel 4060: last->ocur = XML_ELEMENT_CONTENT_OPT;
4061: NEXT;
1.152 daniel 4062: } else if (RAW == '*') {
1.105 daniel 4063: last->ocur = XML_ELEMENT_CONTENT_MULT;
4064: NEXT;
1.152 daniel 4065: } else if (RAW == '+') {
1.105 daniel 4066: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4067: NEXT;
4068: } else {
4069: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4070: }
1.63 daniel 4071: }
4072: SKIP_BLANKS;
1.97 daniel 4073: GROW;
1.64 daniel 4074: }
1.65 daniel 4075: if ((cur != NULL) && (last != NULL)) {
4076: cur->c2 = last;
1.62 daniel 4077: }
1.187 daniel 4078: ctxt->entity = ctxt->input;
1.62 daniel 4079: NEXT;
1.152 daniel 4080: if (RAW == '?') {
1.62 daniel 4081: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4082: NEXT;
1.152 daniel 4083: } else if (RAW == '*') {
1.62 daniel 4084: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4085: NEXT;
1.152 daniel 4086: } else if (RAW == '+') {
1.62 daniel 4087: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4088: NEXT;
4089: }
4090: return(ret);
1.61 daniel 4091: }
4092:
4093: /**
4094: * xmlParseElementContentDecl:
4095: * @ctxt: an XML parser context
4096: * @name: the name of the element being defined.
4097: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4098: *
1.61 daniel 4099: * parse the declaration for an Element content either Mixed or Children,
4100: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4101: *
4102: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4103: *
1.61 daniel 4104: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4105: */
4106:
1.61 daniel 4107: int
1.123 daniel 4108: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4109: xmlElementContentPtr *result) {
4110:
4111: xmlElementContentPtr tree = NULL;
1.187 daniel 4112: xmlParserInputPtr input = ctxt->input;
1.61 daniel 4113: int res;
4114:
4115: *result = NULL;
4116:
1.152 daniel 4117: if (RAW != '(') {
1.230 veillard 4118: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4120: ctxt->sax->error(ctxt->userData,
1.61 daniel 4121: "xmlParseElementContentDecl : '(' expected\n");
4122: ctxt->wellFormed = 0;
1.180 daniel 4123: ctxt->disableSAX = 1;
1.61 daniel 4124: return(-1);
4125: }
4126: NEXT;
1.97 daniel 4127: GROW;
1.61 daniel 4128: SKIP_BLANKS;
1.152 daniel 4129: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4130: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4131: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4132: (NXT(6) == 'A')) {
1.62 daniel 4133: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4134: res = XML_ELEMENT_TYPE_MIXED;
4135: } else {
1.62 daniel 4136: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4137: res = XML_ELEMENT_TYPE_ELEMENT;
4138: }
1.187 daniel 4139: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
1.230 veillard 4140: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4141: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4142: ctxt->sax->error(ctxt->userData,
4143: "Element content declaration doesn't start and stop in the same entity\n");
4144: ctxt->wellFormed = 0;
4145: ctxt->disableSAX = 1;
4146: }
1.61 daniel 4147: SKIP_BLANKS;
1.63 daniel 4148: *result = tree;
1.61 daniel 4149: return(res);
1.22 daniel 4150: }
4151:
1.50 daniel 4152: /**
4153: * xmlParseElementDecl:
4154: * @ctxt: an XML parser context
4155: *
4156: * parse an Element declaration.
1.22 daniel 4157: *
4158: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4159: *
1.99 daniel 4160: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4161: * No element type may be declared more than once
1.69 daniel 4162: *
4163: * Returns the type of the element, or -1 in case of error
1.22 daniel 4164: */
1.59 daniel 4165: int
1.55 daniel 4166: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4167: xmlChar *name;
1.59 daniel 4168: int ret = -1;
1.61 daniel 4169: xmlElementContentPtr content = NULL;
1.22 daniel 4170:
1.97 daniel 4171: GROW;
1.152 daniel 4172: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4173: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4174: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4175: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4176: (NXT(8) == 'T')) {
1.187 daniel 4177: xmlParserInputPtr input = ctxt->input;
4178:
1.40 daniel 4179: SKIP(9);
1.59 daniel 4180: if (!IS_BLANK(CUR)) {
1.230 veillard 4181: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4183: ctxt->sax->error(ctxt->userData,
1.59 daniel 4184: "Space required after 'ELEMENT'\n");
4185: ctxt->wellFormed = 0;
1.180 daniel 4186: ctxt->disableSAX = 1;
1.59 daniel 4187: }
1.42 daniel 4188: SKIP_BLANKS;
1.22 daniel 4189: name = xmlParseName(ctxt);
4190: if (name == NULL) {
1.230 veillard 4191: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4193: ctxt->sax->error(ctxt->userData,
1.59 daniel 4194: "xmlParseElementDecl: no name for Element\n");
4195: ctxt->wellFormed = 0;
1.180 daniel 4196: ctxt->disableSAX = 1;
1.59 daniel 4197: return(-1);
4198: }
4199: if (!IS_BLANK(CUR)) {
1.230 veillard 4200: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4201: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4202: ctxt->sax->error(ctxt->userData,
1.59 daniel 4203: "Space required after the element name\n");
4204: ctxt->wellFormed = 0;
1.180 daniel 4205: ctxt->disableSAX = 1;
1.22 daniel 4206: }
1.42 daniel 4207: SKIP_BLANKS;
1.152 daniel 4208: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 4209: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4210: (NXT(4) == 'Y')) {
4211: SKIP(5);
1.22 daniel 4212: /*
4213: * Element must always be empty.
4214: */
1.59 daniel 4215: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 4216: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 4217: (NXT(2) == 'Y')) {
4218: SKIP(3);
1.22 daniel 4219: /*
4220: * Element is a generic container.
4221: */
1.59 daniel 4222: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 4223: } else if (RAW == '(') {
1.61 daniel 4224: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4225: } else {
1.98 daniel 4226: /*
4227: * [ WFC: PEs in Internal Subset ] error handling.
4228: */
1.152 daniel 4229: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 4230: (ctxt->inputNr == 1)) {
1.230 veillard 4231: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4233: ctxt->sax->error(ctxt->userData,
4234: "PEReference: forbidden within markup decl in internal subset\n");
4235: } else {
1.230 veillard 4236: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4238: ctxt->sax->error(ctxt->userData,
4239: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4240: }
1.61 daniel 4241: ctxt->wellFormed = 0;
1.180 daniel 4242: ctxt->disableSAX = 1;
1.119 daniel 4243: if (name != NULL) xmlFree(name);
1.61 daniel 4244: return(-1);
1.22 daniel 4245: }
1.142 daniel 4246:
4247: SKIP_BLANKS;
4248: /*
4249: * Pop-up of finished entities.
4250: */
1.152 daniel 4251: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 4252: xmlPopInput(ctxt);
1.42 daniel 4253: SKIP_BLANKS;
1.142 daniel 4254:
1.152 daniel 4255: if (RAW != '>') {
1.230 veillard 4256: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 4257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4258: ctxt->sax->error(ctxt->userData,
1.31 daniel 4259: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4260: ctxt->wellFormed = 0;
1.180 daniel 4261: ctxt->disableSAX = 1;
1.61 daniel 4262: } else {
1.187 daniel 4263: if (input != ctxt->input) {
1.230 veillard 4264: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4265: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4266: ctxt->sax->error(ctxt->userData,
4267: "Element declaration doesn't start and stop in the same entity\n");
4268: ctxt->wellFormed = 0;
4269: ctxt->disableSAX = 1;
4270: }
4271:
1.40 daniel 4272: NEXT;
1.171 daniel 4273: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4274: (ctxt->sax->elementDecl != NULL))
1.76 daniel 4275: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4276: content);
1.61 daniel 4277: }
1.84 daniel 4278: if (content != NULL) {
4279: xmlFreeElementContent(content);
4280: }
1.61 daniel 4281: if (name != NULL) {
1.119 daniel 4282: xmlFree(name);
1.61 daniel 4283: }
1.22 daniel 4284: }
1.59 daniel 4285: return(ret);
1.22 daniel 4286: }
4287:
1.50 daniel 4288: /**
4289: * xmlParseMarkupDecl:
4290: * @ctxt: an XML parser context
4291: *
4292: * parse Markup declarations
1.22 daniel 4293: *
4294: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4295: * NotationDecl | PI | Comment
4296: *
1.98 daniel 4297: * [ VC: Proper Declaration/PE Nesting ]
1.229 veillard 4298: * Parameter-entity replacement text must be properly nested with
1.98 daniel 4299: * markup declarations. That is to say, if either the first character
4300: * or the last character of a markup declaration (markupdecl above) is
4301: * contained in the replacement text for a parameter-entity reference,
4302: * both must be contained in the same replacement text.
4303: *
4304: * [ WFC: PEs in Internal Subset ]
4305: * In the internal DTD subset, parameter-entity references can occur
4306: * only where markup declarations can occur, not within markup declarations.
4307: * (This does not apply to references that occur in external parameter
4308: * entities or to the external subset.)
1.22 daniel 4309: */
1.55 daniel 4310: void
4311: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4312: GROW;
1.22 daniel 4313: xmlParseElementDecl(ctxt);
4314: xmlParseAttributeListDecl(ctxt);
4315: xmlParseEntityDecl(ctxt);
4316: xmlParseNotationDecl(ctxt);
4317: xmlParsePI(ctxt);
1.114 daniel 4318: xmlParseComment(ctxt);
1.98 daniel 4319: /*
4320: * This is only for internal subset. On external entities,
4321: * the replacement is done before parsing stage
4322: */
4323: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4324: xmlParsePEReference(ctxt);
1.97 daniel 4325: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4326: }
4327:
1.50 daniel 4328: /**
1.76 daniel 4329: * xmlParseTextDecl:
4330: * @ctxt: an XML parser context
4331: *
4332: * parse an XML declaration header for external entities
4333: *
4334: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 4335: *
4336: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 4337: */
4338:
1.172 daniel 4339: void
1.76 daniel 4340: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4341: xmlChar *version;
1.76 daniel 4342:
4343: /*
4344: * We know that '<?xml' is here.
4345: */
1.193 daniel 4346: if ((RAW == '<') && (NXT(1) == '?') &&
4347: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4348: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4349: SKIP(5);
4350: } else {
1.230 veillard 4351: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
1.193 daniel 4352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4353: ctxt->sax->error(ctxt->userData,
4354: "Text declaration '<?xml' required\n");
4355: ctxt->wellFormed = 0;
4356: ctxt->disableSAX = 1;
4357:
4358: return;
4359: }
1.76 daniel 4360:
4361: if (!IS_BLANK(CUR)) {
1.230 veillard 4362: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4364: ctxt->sax->error(ctxt->userData,
4365: "Space needed after '<?xml'\n");
1.76 daniel 4366: ctxt->wellFormed = 0;
1.180 daniel 4367: ctxt->disableSAX = 1;
1.76 daniel 4368: }
4369: SKIP_BLANKS;
4370:
4371: /*
4372: * We may have the VersionInfo here.
4373: */
4374: version = xmlParseVersionInfo(ctxt);
4375: if (version == NULL)
4376: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 4377: ctxt->input->version = version;
1.76 daniel 4378:
4379: /*
4380: * We must have the encoding declaration
4381: */
4382: if (!IS_BLANK(CUR)) {
1.230 veillard 4383: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4385: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.76 daniel 4386: ctxt->wellFormed = 0;
1.180 daniel 4387: ctxt->disableSAX = 1;
1.76 daniel 4388: }
1.195 daniel 4389: xmlParseEncodingDecl(ctxt);
1.193 daniel 4390: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4391: /*
4392: * The XML REC instructs us to stop parsing right here
4393: */
4394: return;
4395: }
1.76 daniel 4396:
4397: SKIP_BLANKS;
1.152 daniel 4398: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 4399: SKIP(2);
1.152 daniel 4400: } else if (RAW == '>') {
1.76 daniel 4401: /* Deprecated old WD ... */
1.230 veillard 4402: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4404: ctxt->sax->error(ctxt->userData,
4405: "XML declaration must end-up with '?>'\n");
1.76 daniel 4406: ctxt->wellFormed = 0;
1.180 daniel 4407: ctxt->disableSAX = 1;
1.76 daniel 4408: NEXT;
4409: } else {
1.230 veillard 4410: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4411: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4412: ctxt->sax->error(ctxt->userData,
4413: "parsing XML declaration: '?>' expected\n");
1.76 daniel 4414: ctxt->wellFormed = 0;
1.180 daniel 4415: ctxt->disableSAX = 1;
1.76 daniel 4416: MOVETO_ENDTAG(CUR_PTR);
4417: NEXT;
4418: }
4419: }
4420:
4421: /*
4422: * xmlParseConditionalSections
4423: * @ctxt: an XML parser context
4424: *
4425: * [61] conditionalSect ::= includeSect | ignoreSect
4426: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4427: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4428: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4429: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4430: */
4431:
4432: void
4433: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 4434: SKIP(3);
4435: SKIP_BLANKS;
1.168 daniel 4436: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4437: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4438: (NXT(6) == 'E')) {
1.165 daniel 4439: SKIP(7);
1.168 daniel 4440: SKIP_BLANKS;
4441: if (RAW != '[') {
1.230 veillard 4442: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4444: ctxt->sax->error(ctxt->userData,
4445: "XML conditional section '[' expected\n");
4446: ctxt->wellFormed = 0;
1.180 daniel 4447: ctxt->disableSAX = 1;
1.168 daniel 4448: } else {
4449: NEXT;
4450: }
1.220 veillard 4451: if (xmlParserDebugEntities) {
4452: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4453: xmlGenericError(xmlGenericErrorContext,
4454: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4455: ctxt->input->line);
1.241 veillard 4456: xmlGenericError(xmlGenericErrorContext,
4457: "Entering INCLUDE Conditional Section\n");
1.220 veillard 4458: }
4459:
1.165 daniel 4460: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4461: (NXT(2) != '>'))) {
4462: const xmlChar *check = CUR_PTR;
4463: int cons = ctxt->input->consumed;
4464: int tok = ctxt->token;
4465:
4466: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4467: xmlParseConditionalSections(ctxt);
4468: } else if (IS_BLANK(CUR)) {
4469: NEXT;
4470: } else if (RAW == '%') {
4471: xmlParsePEReference(ctxt);
4472: } else
4473: xmlParseMarkupDecl(ctxt);
4474:
4475: /*
4476: * Pop-up of finished entities.
4477: */
4478: while ((RAW == 0) && (ctxt->inputNr > 1))
4479: xmlPopInput(ctxt);
4480:
4481: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4482: (tok == ctxt->token)) {
1.230 veillard 4483: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.165 daniel 4484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4485: ctxt->sax->error(ctxt->userData,
4486: "Content error in the external subset\n");
4487: ctxt->wellFormed = 0;
1.180 daniel 4488: ctxt->disableSAX = 1;
1.165 daniel 4489: break;
4490: }
4491: }
1.220 veillard 4492: if (xmlParserDebugEntities) {
4493: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4494: xmlGenericError(xmlGenericErrorContext,
4495: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4496: ctxt->input->line);
1.241 veillard 4497: xmlGenericError(xmlGenericErrorContext,
4498: "Leaving INCLUDE Conditional Section\n");
1.220 veillard 4499: }
4500:
1.168 daniel 4501: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4502: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 4503: int state;
4504:
1.168 daniel 4505: SKIP(6);
4506: SKIP_BLANKS;
4507: if (RAW != '[') {
1.230 veillard 4508: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4510: ctxt->sax->error(ctxt->userData,
4511: "XML conditional section '[' expected\n");
4512: ctxt->wellFormed = 0;
1.180 daniel 4513: ctxt->disableSAX = 1;
1.168 daniel 4514: } else {
4515: NEXT;
4516: }
1.220 veillard 4517: if (xmlParserDebugEntities) {
4518: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4519: xmlGenericError(xmlGenericErrorContext,
4520: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4521: ctxt->input->line);
1.241 veillard 4522: xmlGenericError(xmlGenericErrorContext,
4523: "Entering IGNORE Conditional Section\n");
1.220 veillard 4524: }
1.171 daniel 4525:
1.143 daniel 4526: /*
1.171 daniel 4527: * Parse up to the end of the conditionnal section
4528: * But disable SAX event generating DTD building in the meantime
1.143 daniel 4529: */
1.171 daniel 4530: state = ctxt->disableSAX;
1.220 veillard 4531: ctxt->disableSAX = 1;
1.165 daniel 4532: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4533: (NXT(2) != '>'))) {
1.171 daniel 4534: const xmlChar *check = CUR_PTR;
4535: int cons = ctxt->input->consumed;
4536: int tok = ctxt->token;
4537:
4538: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4539: xmlParseConditionalSections(ctxt);
4540: } else if (IS_BLANK(CUR)) {
4541: NEXT;
4542: } else if (RAW == '%') {
4543: xmlParsePEReference(ctxt);
4544: } else
4545: xmlParseMarkupDecl(ctxt);
4546:
1.165 daniel 4547: /*
4548: * Pop-up of finished entities.
4549: */
4550: while ((RAW == 0) && (ctxt->inputNr > 1))
4551: xmlPopInput(ctxt);
1.143 daniel 4552:
1.171 daniel 4553: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4554: (tok == ctxt->token)) {
1.230 veillard 4555: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.171 daniel 4556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4557: ctxt->sax->error(ctxt->userData,
4558: "Content error in the external subset\n");
4559: ctxt->wellFormed = 0;
1.180 daniel 4560: ctxt->disableSAX = 1;
1.171 daniel 4561: break;
4562: }
1.165 daniel 4563: }
1.171 daniel 4564: ctxt->disableSAX = state;
1.220 veillard 4565: if (xmlParserDebugEntities) {
4566: if ((ctxt->input != NULL) && (ctxt->input->filename))
1.241 veillard 4567: xmlGenericError(xmlGenericErrorContext,
4568: "%s(%d): ", ctxt->input->filename,
1.220 veillard 4569: ctxt->input->line);
1.241 veillard 4570: xmlGenericError(xmlGenericErrorContext,
4571: "Leaving IGNORE Conditional Section\n");
1.220 veillard 4572: }
4573:
1.168 daniel 4574: } else {
1.230 veillard 4575: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4576: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4577: ctxt->sax->error(ctxt->userData,
4578: "XML conditional section INCLUDE or IGNORE keyword expected\n");
4579: ctxt->wellFormed = 0;
1.180 daniel 4580: ctxt->disableSAX = 1;
1.143 daniel 4581: }
4582:
1.152 daniel 4583: if (RAW == 0)
1.143 daniel 4584: SHRINK;
4585:
1.152 daniel 4586: if (RAW == 0) {
1.230 veillard 4587: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4588: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4589: ctxt->sax->error(ctxt->userData,
4590: "XML conditional section not closed\n");
4591: ctxt->wellFormed = 0;
1.180 daniel 4592: ctxt->disableSAX = 1;
1.143 daniel 4593: } else {
4594: SKIP(3);
1.76 daniel 4595: }
4596: }
4597:
4598: /**
1.124 daniel 4599: * xmlParseExternalSubset:
1.76 daniel 4600: * @ctxt: an XML parser context
1.124 daniel 4601: * @ExternalID: the external identifier
4602: * @SystemID: the system identifier (or URL)
1.76 daniel 4603: *
4604: * parse Markup declarations from an external subset
4605: *
4606: * [30] extSubset ::= textDecl? extSubsetDecl
4607: *
4608: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4609: */
4610: void
1.123 daniel 4611: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4612: const xmlChar *SystemID) {
1.132 daniel 4613: GROW;
1.152 daniel 4614: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 4615: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4616: (NXT(4) == 'l')) {
1.172 daniel 4617: xmlParseTextDecl(ctxt);
1.193 daniel 4618: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4619: /*
4620: * The XML REC instructs us to stop parsing right here
4621: */
4622: ctxt->instate = XML_PARSER_EOF;
4623: return;
4624: }
1.76 daniel 4625: }
1.79 daniel 4626: if (ctxt->myDoc == NULL) {
1.116 daniel 4627: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4628: }
4629: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4630: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4631:
1.96 daniel 4632: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4633: ctxt->external = 1;
1.152 daniel 4634: while (((RAW == '<') && (NXT(1) == '?')) ||
4635: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 4636: IS_BLANK(CUR)) {
1.123 daniel 4637: const xmlChar *check = CUR_PTR;
1.115 daniel 4638: int cons = ctxt->input->consumed;
1.164 daniel 4639: int tok = ctxt->token;
1.115 daniel 4640:
1.221 veillard 4641: GROW;
1.152 daniel 4642: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 4643: xmlParseConditionalSections(ctxt);
4644: } else if (IS_BLANK(CUR)) {
4645: NEXT;
1.152 daniel 4646: } else if (RAW == '%') {
1.76 daniel 4647: xmlParsePEReference(ctxt);
4648: } else
4649: xmlParseMarkupDecl(ctxt);
1.77 daniel 4650:
4651: /*
4652: * Pop-up of finished entities.
4653: */
1.166 daniel 4654: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 4655: xmlPopInput(ctxt);
4656:
1.164 daniel 4657: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4658: (tok == ctxt->token)) {
1.230 veillard 4659: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4660: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4661: ctxt->sax->error(ctxt->userData,
4662: "Content error in the external subset\n");
4663: ctxt->wellFormed = 0;
1.180 daniel 4664: ctxt->disableSAX = 1;
1.115 daniel 4665: break;
4666: }
1.76 daniel 4667: }
4668:
1.152 daniel 4669: if (RAW != 0) {
1.230 veillard 4670: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4672: ctxt->sax->error(ctxt->userData,
4673: "Extra content at the end of the document\n");
4674: ctxt->wellFormed = 0;
1.180 daniel 4675: ctxt->disableSAX = 1;
1.76 daniel 4676: }
4677:
4678: }
4679:
4680: /**
1.77 daniel 4681: * xmlParseReference:
4682: * @ctxt: an XML parser context
4683: *
4684: * parse and handle entity references in content, depending on the SAX
4685: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4686: * CharRef, a predefined entity, if there is no reference() callback.
4687: * or if the parser was asked to switch to that mode.
1.77 daniel 4688: *
4689: * [67] Reference ::= EntityRef | CharRef
4690: */
4691: void
4692: xmlParseReference(xmlParserCtxtPtr ctxt) {
4693: xmlEntityPtr ent;
1.123 daniel 4694: xmlChar *val;
1.152 daniel 4695: if (RAW != '&') return;
1.77 daniel 4696:
4697: if (NXT(1) == '#') {
1.152 daniel 4698: int i = 0;
1.153 daniel 4699: xmlChar out[10];
4700: int hex = NXT(2);
1.77 daniel 4701: int val = xmlParseCharRef(ctxt);
1.152 daniel 4702:
1.198 daniel 4703: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 4704: /*
4705: * So we are using non-UTF-8 buffers
4706: * Check that the char fit on 8bits, if not
4707: * generate a CharRef.
4708: */
4709: if (val <= 0xFF) {
4710: out[0] = val;
4711: out[1] = 0;
1.171 daniel 4712: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4713: (!ctxt->disableSAX))
1.153 daniel 4714: ctxt->sax->characters(ctxt->userData, out, 1);
4715: } else {
4716: if ((hex == 'x') || (hex == 'X'))
4717: sprintf((char *)out, "#x%X", val);
4718: else
4719: sprintf((char *)out, "#%d", val);
1.171 daniel 4720: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4721: (!ctxt->disableSAX))
1.153 daniel 4722: ctxt->sax->reference(ctxt->userData, out);
4723: }
4724: } else {
4725: /*
4726: * Just encode the value in UTF-8
4727: */
4728: COPY_BUF(0 ,out, i, val);
4729: out[i] = 0;
1.171 daniel 4730: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4731: (!ctxt->disableSAX))
1.153 daniel 4732: ctxt->sax->characters(ctxt->userData, out, i);
4733: }
1.77 daniel 4734: } else {
4735: ent = xmlParseEntityRef(ctxt);
4736: if (ent == NULL) return;
4737: if ((ent->name != NULL) &&
1.159 daniel 4738: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 4739: xmlNodePtr list = NULL;
4740: int ret;
4741:
4742:
4743: /*
4744: * The first reference to the entity trigger a parsing phase
4745: * where the ent->children is filled with the result from
4746: * the parsing.
4747: */
4748: if (ent->children == NULL) {
4749: xmlChar *value;
4750: value = ent->content;
4751:
4752: /*
4753: * Check that this entity is well formed
4754: */
4755: if ((value != NULL) &&
4756: (value[1] == 0) && (value[0] == '<') &&
1.236 veillard 4757: (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
1.180 daniel 4758: /*
1.222 veillard 4759: * DONE: get definite answer on this !!!
1.180 daniel 4760: * Lots of entity decls are used to declare a single
4761: * char
4762: * <!ENTITY lt "<">
4763: * Which seems to be valid since
4764: * 2.4: The ampersand character (&) and the left angle
4765: * bracket (<) may appear in their literal form only
4766: * when used ... They are also legal within the literal
4767: * entity value of an internal entity declaration;i
4768: * see "4.3.2 Well-Formed Parsed Entities".
4769: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4770: * Looking at the OASIS test suite and James Clark
4771: * tests, this is broken. However the XML REC uses
4772: * it. Is the XML REC not well-formed ????
4773: * This is a hack to avoid this problem
1.222 veillard 4774: *
4775: * ANSWER: since lt gt amp .. are already defined,
4776: * this is a redefinition and hence the fact that the
4777: * contentis not well balanced is not a Wf error, this
4778: * is lousy but acceptable.
1.180 daniel 4779: */
4780: list = xmlNewDocText(ctxt->myDoc, value);
4781: if (list != NULL) {
4782: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4783: (ent->children == NULL)) {
4784: ent->children = list;
4785: ent->last = list;
4786: list->parent = (xmlNodePtr) ent;
4787: } else {
4788: xmlFreeNodeList(list);
4789: }
4790: } else if (list != NULL) {
4791: xmlFreeNodeList(list);
4792: }
1.181 daniel 4793: } else {
1.180 daniel 4794: /*
4795: * 4.3.2: An internal general parsed entity is well-formed
4796: * if its replacement text matches the production labeled
4797: * content.
4798: */
1.185 daniel 4799: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4800: ctxt->depth++;
1.180 daniel 4801: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 4802: ctxt->sax, NULL, ctxt->depth,
4803: value, &list);
4804: ctxt->depth--;
4805: } else if (ent->etype ==
4806: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4807: ctxt->depth++;
1.180 daniel 4808: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 4809: ctxt->sax, NULL, ctxt->depth,
1.228 veillard 4810: ent->URI, ent->ExternalID, &list);
1.185 daniel 4811: ctxt->depth--;
4812: } else {
1.180 daniel 4813: ret = -1;
4814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4815: ctxt->sax->error(ctxt->userData,
4816: "Internal: invalid entity type\n");
4817: }
1.185 daniel 4818: if (ret == XML_ERR_ENTITY_LOOP) {
1.230 veillard 4819: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 4820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4821: ctxt->sax->error(ctxt->userData,
4822: "Detected entity reference loop\n");
4823: ctxt->wellFormed = 0;
4824: ctxt->disableSAX = 1;
4825: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 4826: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4827: (ent->children == NULL)) {
4828: ent->children = list;
4829: while (list != NULL) {
4830: list->parent = (xmlNodePtr) ent;
4831: if (list->next == NULL)
4832: ent->last = list;
4833: list = list->next;
4834: }
4835: } else {
4836: xmlFreeNodeList(list);
4837: }
4838: } else if (ret > 0) {
1.230 veillard 4839: ctxt->errNo = ret;
1.180 daniel 4840: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4841: ctxt->sax->error(ctxt->userData,
4842: "Entity value required\n");
4843: ctxt->wellFormed = 0;
4844: ctxt->disableSAX = 1;
4845: } else if (list != NULL) {
4846: xmlFreeNodeList(list);
4847: }
4848: }
4849: }
1.113 daniel 4850: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 4851: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 4852: /*
4853: * Create a node.
4854: */
4855: ctxt->sax->reference(ctxt->userData, ent->name);
4856: return;
4857: } else if (ctxt->replaceEntities) {
1.222 veillard 4858: if ((ctxt->node != NULL) && (ent->children != NULL)) {
4859: /*
4860: * Seems we are generating the DOM content, do
4861: * a simple tree copy
4862: */
4863: xmlNodePtr new;
4864: new = xmlCopyNodeList(ent->children);
4865:
4866: xmlAddChildList(ctxt->node, new);
4867: /*
4868: * This is to avoid a nasty side effect, see
4869: * characters() in SAX.c
4870: */
4871: ctxt->nodemem = 0;
4872: ctxt->nodelen = 0;
4873: return;
4874: } else {
4875: /*
4876: * Probably running in SAX mode
4877: */
4878: xmlParserInputPtr input;
1.79 daniel 4879:
1.222 veillard 4880: input = xmlNewEntityInputStream(ctxt, ent);
4881: xmlPushInput(ctxt, input);
4882: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4883: (RAW == '<') && (NXT(1) == '?') &&
4884: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4885: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4886: xmlParseTextDecl(ctxt);
4887: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4888: /*
4889: * The XML REC instructs us to stop parsing right here
4890: */
4891: ctxt->instate = XML_PARSER_EOF;
4892: return;
4893: }
4894: if (input->standalone == 1) {
1.230 veillard 4895: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
1.222 veillard 4896: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4897: ctxt->sax->error(ctxt->userData,
4898: "external parsed entities cannot be standalone\n");
4899: ctxt->wellFormed = 0;
4900: ctxt->disableSAX = 1;
4901: }
1.167 daniel 4902: }
1.222 veillard 4903: return;
1.167 daniel 4904: }
1.113 daniel 4905: }
1.222 veillard 4906: } else {
4907: val = ent->content;
4908: if (val == NULL) return;
4909: /*
4910: * inline the entity.
4911: */
4912: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4913: (!ctxt->disableSAX))
4914: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
1.77 daniel 4915: }
4916: }
1.24 daniel 4917: }
4918:
1.50 daniel 4919: /**
4920: * xmlParseEntityRef:
4921: * @ctxt: an XML parser context
4922: *
4923: * parse ENTITY references declarations
1.24 daniel 4924: *
4925: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4926: *
1.98 daniel 4927: * [ WFC: Entity Declared ]
4928: * In a document without any DTD, a document with only an internal DTD
4929: * subset which contains no parameter entity references, or a document
4930: * with "standalone='yes'", the Name given in the entity reference
4931: * must match that in an entity declaration, except that well-formed
4932: * documents need not declare any of the following entities: amp, lt,
4933: * gt, apos, quot. The declaration of a parameter entity must precede
4934: * any reference to it. Similarly, the declaration of a general entity
4935: * must precede any reference to it which appears in a default value in an
4936: * attribute-list declaration. Note that if entities are declared in the
4937: * external subset or in external parameter entities, a non-validating
4938: * processor is not obligated to read and process their declarations;
4939: * for such documents, the rule that an entity must be declared is a
4940: * well-formedness constraint only if standalone='yes'.
4941: *
4942: * [ WFC: Parsed Entity ]
4943: * An entity reference must not contain the name of an unparsed entity
4944: *
1.77 daniel 4945: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4946: */
1.77 daniel 4947: xmlEntityPtr
1.55 daniel 4948: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 4949: xmlChar *name;
1.72 daniel 4950: xmlEntityPtr ent = NULL;
1.24 daniel 4951:
1.91 daniel 4952: GROW;
1.111 daniel 4953:
1.152 daniel 4954: if (RAW == '&') {
1.40 daniel 4955: NEXT;
1.24 daniel 4956: name = xmlParseName(ctxt);
4957: if (name == NULL) {
1.230 veillard 4958: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4960: ctxt->sax->error(ctxt->userData,
4961: "xmlParseEntityRef: no name\n");
1.59 daniel 4962: ctxt->wellFormed = 0;
1.180 daniel 4963: ctxt->disableSAX = 1;
1.24 daniel 4964: } else {
1.152 daniel 4965: if (RAW == ';') {
1.40 daniel 4966: NEXT;
1.24 daniel 4967: /*
1.77 daniel 4968: * Ask first SAX for entity resolution, otherwise try the
4969: * predefined set.
4970: */
4971: if (ctxt->sax != NULL) {
4972: if (ctxt->sax->getEntity != NULL)
4973: ent = ctxt->sax->getEntity(ctxt->userData, name);
4974: if (ent == NULL)
4975: ent = xmlGetPredefinedEntity(name);
4976: }
4977: /*
1.98 daniel 4978: * [ WFC: Entity Declared ]
4979: * In a document without any DTD, a document with only an
4980: * internal DTD subset which contains no parameter entity
4981: * references, or a document with "standalone='yes'", the
4982: * Name given in the entity reference must match that in an
4983: * entity declaration, except that well-formed documents
4984: * need not declare any of the following entities: amp, lt,
4985: * gt, apos, quot.
4986: * The declaration of a parameter entity must precede any
4987: * reference to it.
4988: * Similarly, the declaration of a general entity must
4989: * precede any reference to it which appears in a default
4990: * value in an attribute-list declaration. Note that if
4991: * entities are declared in the external subset or in
4992: * external parameter entities, a non-validating processor
4993: * is not obligated to read and process their declarations;
4994: * for such documents, the rule that an entity must be
4995: * declared is a well-formedness constraint only if
4996: * standalone='yes'.
1.59 daniel 4997: */
1.77 daniel 4998: if (ent == NULL) {
1.98 daniel 4999: if ((ctxt->standalone == 1) ||
5000: ((ctxt->hasExternalSubset == 0) &&
5001: (ctxt->hasPErefs == 0))) {
1.230 veillard 5002: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 5003: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 5004: ctxt->sax->error(ctxt->userData,
5005: "Entity '%s' not defined\n", name);
5006: ctxt->wellFormed = 0;
1.180 daniel 5007: ctxt->disableSAX = 1;
1.77 daniel 5008: } else {
1.230 veillard 5009: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.98 daniel 5010: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5011: ctxt->sax->warning(ctxt->userData,
5012: "Entity '%s' not defined\n", name);
1.59 daniel 5013: }
1.77 daniel 5014: }
1.59 daniel 5015:
5016: /*
1.98 daniel 5017: * [ WFC: Parsed Entity ]
5018: * An entity reference must not contain the name of an
5019: * unparsed entity
5020: */
1.159 daniel 5021: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5022: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 5023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5024: ctxt->sax->error(ctxt->userData,
5025: "Entity reference to unparsed entity %s\n", name);
5026: ctxt->wellFormed = 0;
1.180 daniel 5027: ctxt->disableSAX = 1;
1.98 daniel 5028: }
5029:
5030: /*
5031: * [ WFC: No External Entity References ]
5032: * Attribute values cannot contain direct or indirect
5033: * entity references to external entities.
5034: */
5035: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5036: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5037: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5039: ctxt->sax->error(ctxt->userData,
5040: "Attribute references external entity '%s'\n", name);
5041: ctxt->wellFormed = 0;
1.180 daniel 5042: ctxt->disableSAX = 1;
1.98 daniel 5043: }
5044: /*
5045: * [ WFC: No < in Attribute Values ]
5046: * The replacement text of any entity referred to directly or
5047: * indirectly in an attribute value (other than "<") must
5048: * not contain a <.
1.59 daniel 5049: */
1.98 daniel 5050: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5051: (ent != NULL) &&
1.236 veillard 5052: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5053: (ent->content != NULL) &&
5054: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5055: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5057: ctxt->sax->error(ctxt->userData,
5058: "'<' in entity '%s' is not allowed in attributes values\n", name);
5059: ctxt->wellFormed = 0;
1.180 daniel 5060: ctxt->disableSAX = 1;
1.98 daniel 5061: }
5062:
5063: /*
5064: * Internal check, no parameter entities here ...
5065: */
5066: else {
1.159 daniel 5067: switch (ent->etype) {
1.59 daniel 5068: case XML_INTERNAL_PARAMETER_ENTITY:
5069: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5070: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5071: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5072: ctxt->sax->error(ctxt->userData,
1.59 daniel 5073: "Attempt to reference the parameter entity '%s'\n", name);
5074: ctxt->wellFormed = 0;
1.180 daniel 5075: ctxt->disableSAX = 1;
5076: break;
5077: default:
1.59 daniel 5078: break;
5079: }
5080: }
5081:
5082: /*
1.98 daniel 5083: * [ WFC: No Recursion ]
1.229 veillard 5084: * A parsed entity must not contain a recursive reference
1.117 daniel 5085: * to itself, either directly or indirectly.
1.229 veillard 5086: * Done somewhere else
1.59 daniel 5087: */
1.77 daniel 5088:
1.24 daniel 5089: } else {
1.230 veillard 5090: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5092: ctxt->sax->error(ctxt->userData,
1.59 daniel 5093: "xmlParseEntityRef: expecting ';'\n");
5094: ctxt->wellFormed = 0;
1.180 daniel 5095: ctxt->disableSAX = 1;
1.24 daniel 5096: }
1.119 daniel 5097: xmlFree(name);
1.24 daniel 5098: }
5099: }
1.77 daniel 5100: return(ent);
1.24 daniel 5101: }
1.229 veillard 5102:
1.135 daniel 5103: /**
5104: * xmlParseStringEntityRef:
5105: * @ctxt: an XML parser context
5106: * @str: a pointer to an index in the string
5107: *
5108: * parse ENTITY references declarations, but this version parses it from
5109: * a string value.
5110: *
5111: * [68] EntityRef ::= '&' Name ';'
5112: *
5113: * [ WFC: Entity Declared ]
5114: * In a document without any DTD, a document with only an internal DTD
5115: * subset which contains no parameter entity references, or a document
5116: * with "standalone='yes'", the Name given in the entity reference
5117: * must match that in an entity declaration, except that well-formed
5118: * documents need not declare any of the following entities: amp, lt,
5119: * gt, apos, quot. The declaration of a parameter entity must precede
5120: * any reference to it. Similarly, the declaration of a general entity
5121: * must precede any reference to it which appears in a default value in an
5122: * attribute-list declaration. Note that if entities are declared in the
5123: * external subset or in external parameter entities, a non-validating
5124: * processor is not obligated to read and process their declarations;
5125: * for such documents, the rule that an entity must be declared is a
5126: * well-formedness constraint only if standalone='yes'.
5127: *
5128: * [ WFC: Parsed Entity ]
5129: * An entity reference must not contain the name of an unparsed entity
5130: *
5131: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5132: * is updated to the current location in the string.
5133: */
5134: xmlEntityPtr
5135: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5136: xmlChar *name;
5137: const xmlChar *ptr;
5138: xmlChar cur;
5139: xmlEntityPtr ent = NULL;
5140:
1.156 daniel 5141: if ((str == NULL) || (*str == NULL))
5142: return(NULL);
1.135 daniel 5143: ptr = *str;
5144: cur = *ptr;
5145: if (cur == '&') {
5146: ptr++;
5147: cur = *ptr;
5148: name = xmlParseStringName(ctxt, &ptr);
5149: if (name == NULL) {
1.230 veillard 5150: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5151: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5152: ctxt->sax->error(ctxt->userData,
5153: "xmlParseEntityRef: no name\n");
5154: ctxt->wellFormed = 0;
1.180 daniel 5155: ctxt->disableSAX = 1;
1.135 daniel 5156: } else {
1.185 daniel 5157: if (*ptr == ';') {
5158: ptr++;
1.135 daniel 5159: /*
5160: * Ask first SAX for entity resolution, otherwise try the
5161: * predefined set.
5162: */
5163: if (ctxt->sax != NULL) {
5164: if (ctxt->sax->getEntity != NULL)
5165: ent = ctxt->sax->getEntity(ctxt->userData, name);
5166: if (ent == NULL)
5167: ent = xmlGetPredefinedEntity(name);
5168: }
5169: /*
5170: * [ WFC: Entity Declared ]
5171: * In a document without any DTD, a document with only an
5172: * internal DTD subset which contains no parameter entity
5173: * references, or a document with "standalone='yes'", the
5174: * Name given in the entity reference must match that in an
5175: * entity declaration, except that well-formed documents
5176: * need not declare any of the following entities: amp, lt,
5177: * gt, apos, quot.
5178: * The declaration of a parameter entity must precede any
5179: * reference to it.
5180: * Similarly, the declaration of a general entity must
5181: * precede any reference to it which appears in a default
5182: * value in an attribute-list declaration. Note that if
5183: * entities are declared in the external subset or in
5184: * external parameter entities, a non-validating processor
5185: * is not obligated to read and process their declarations;
5186: * for such documents, the rule that an entity must be
5187: * declared is a well-formedness constraint only if
5188: * standalone='yes'.
5189: */
5190: if (ent == NULL) {
5191: if ((ctxt->standalone == 1) ||
5192: ((ctxt->hasExternalSubset == 0) &&
5193: (ctxt->hasPErefs == 0))) {
1.230 veillard 5194: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5195: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5196: ctxt->sax->error(ctxt->userData,
5197: "Entity '%s' not defined\n", name);
5198: ctxt->wellFormed = 0;
1.180 daniel 5199: ctxt->disableSAX = 1;
1.135 daniel 5200: } else {
1.230 veillard 5201: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.135 daniel 5202: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5203: ctxt->sax->warning(ctxt->userData,
5204: "Entity '%s' not defined\n", name);
5205: }
5206: }
5207:
5208: /*
5209: * [ WFC: Parsed Entity ]
5210: * An entity reference must not contain the name of an
5211: * unparsed entity
5212: */
1.159 daniel 5213: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5214: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.135 daniel 5215: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5216: ctxt->sax->error(ctxt->userData,
5217: "Entity reference to unparsed entity %s\n", name);
5218: ctxt->wellFormed = 0;
1.180 daniel 5219: ctxt->disableSAX = 1;
1.135 daniel 5220: }
5221:
5222: /*
5223: * [ WFC: No External Entity References ]
5224: * Attribute values cannot contain direct or indirect
5225: * entity references to external entities.
5226: */
5227: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5228: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5229: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.135 daniel 5230: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5231: ctxt->sax->error(ctxt->userData,
5232: "Attribute references external entity '%s'\n", name);
5233: ctxt->wellFormed = 0;
1.180 daniel 5234: ctxt->disableSAX = 1;
1.135 daniel 5235: }
5236: /*
5237: * [ WFC: No < in Attribute Values ]
5238: * The replacement text of any entity referred to directly or
5239: * indirectly in an attribute value (other than "<") must
5240: * not contain a <.
5241: */
5242: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5243: (ent != NULL) &&
1.236 veillard 5244: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.135 daniel 5245: (ent->content != NULL) &&
5246: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5247: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.135 daniel 5248: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5249: ctxt->sax->error(ctxt->userData,
5250: "'<' in entity '%s' is not allowed in attributes values\n", name);
5251: ctxt->wellFormed = 0;
1.180 daniel 5252: ctxt->disableSAX = 1;
1.135 daniel 5253: }
5254:
5255: /*
5256: * Internal check, no parameter entities here ...
5257: */
5258: else {
1.159 daniel 5259: switch (ent->etype) {
1.135 daniel 5260: case XML_INTERNAL_PARAMETER_ENTITY:
5261: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5262: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.135 daniel 5263: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5264: ctxt->sax->error(ctxt->userData,
5265: "Attempt to reference the parameter entity '%s'\n", name);
5266: ctxt->wellFormed = 0;
1.180 daniel 5267: ctxt->disableSAX = 1;
5268: break;
5269: default:
1.135 daniel 5270: break;
5271: }
5272: }
5273:
5274: /*
5275: * [ WFC: No Recursion ]
1.229 veillard 5276: * A parsed entity must not contain a recursive reference
1.135 daniel 5277: * to itself, either directly or indirectly.
1.229 veillard 5278: * Done somewhwere else
1.135 daniel 5279: */
5280:
5281: } else {
1.230 veillard 5282: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5284: ctxt->sax->error(ctxt->userData,
5285: "xmlParseEntityRef: expecting ';'\n");
5286: ctxt->wellFormed = 0;
1.180 daniel 5287: ctxt->disableSAX = 1;
1.135 daniel 5288: }
5289: xmlFree(name);
5290: }
5291: }
1.185 daniel 5292: *str = ptr;
1.135 daniel 5293: return(ent);
5294: }
1.24 daniel 5295:
1.50 daniel 5296: /**
5297: * xmlParsePEReference:
5298: * @ctxt: an XML parser context
5299: *
5300: * parse PEReference declarations
1.77 daniel 5301: * The entity content is handled directly by pushing it's content as
5302: * a new input stream.
1.22 daniel 5303: *
5304: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5305: *
1.98 daniel 5306: * [ WFC: No Recursion ]
1.229 veillard 5307: * A parsed entity must not contain a recursive
1.98 daniel 5308: * reference to itself, either directly or indirectly.
5309: *
5310: * [ WFC: Entity Declared ]
5311: * In a document without any DTD, a document with only an internal DTD
5312: * subset which contains no parameter entity references, or a document
5313: * with "standalone='yes'", ... ... The declaration of a parameter
5314: * entity must precede any reference to it...
5315: *
5316: * [ VC: Entity Declared ]
5317: * In a document with an external subset or external parameter entities
5318: * with "standalone='no'", ... ... The declaration of a parameter entity
5319: * must precede any reference to it...
5320: *
5321: * [ WFC: In DTD ]
5322: * Parameter-entity references may only appear in the DTD.
5323: * NOTE: misleading but this is handled.
1.22 daniel 5324: */
1.77 daniel 5325: void
1.55 daniel 5326: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5327: xmlChar *name;
1.72 daniel 5328: xmlEntityPtr entity = NULL;
1.50 daniel 5329: xmlParserInputPtr input;
1.22 daniel 5330:
1.152 daniel 5331: if (RAW == '%') {
1.40 daniel 5332: NEXT;
1.22 daniel 5333: name = xmlParseName(ctxt);
5334: if (name == NULL) {
1.230 veillard 5335: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5337: ctxt->sax->error(ctxt->userData,
5338: "xmlParsePEReference: no name\n");
1.59 daniel 5339: ctxt->wellFormed = 0;
1.180 daniel 5340: ctxt->disableSAX = 1;
1.22 daniel 5341: } else {
1.152 daniel 5342: if (RAW == ';') {
1.40 daniel 5343: NEXT;
1.98 daniel 5344: if ((ctxt->sax != NULL) &&
5345: (ctxt->sax->getParameterEntity != NULL))
5346: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5347: name);
1.45 daniel 5348: if (entity == NULL) {
1.98 daniel 5349: /*
5350: * [ WFC: Entity Declared ]
5351: * In a document without any DTD, a document with only an
5352: * internal DTD subset which contains no parameter entity
5353: * references, or a document with "standalone='yes'", ...
5354: * ... The declaration of a parameter entity must precede
5355: * any reference to it...
5356: */
5357: if ((ctxt->standalone == 1) ||
5358: ((ctxt->hasExternalSubset == 0) &&
5359: (ctxt->hasPErefs == 0))) {
1.230 veillard 5360: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.220 veillard 5361: if ((!ctxt->disableSAX) &&
5362: (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5363: ctxt->sax->error(ctxt->userData,
5364: "PEReference: %%%s; not found\n", name);
5365: ctxt->wellFormed = 0;
1.180 daniel 5366: ctxt->disableSAX = 1;
1.98 daniel 5367: } else {
5368: /*
5369: * [ VC: Entity Declared ]
5370: * In a document with an external subset or external
5371: * parameter entities with "standalone='no'", ...
5372: * ... The declaration of a parameter entity must precede
5373: * any reference to it...
5374: */
1.220 veillard 5375: if ((!ctxt->disableSAX) &&
5376: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 5377: ctxt->sax->warning(ctxt->userData,
5378: "PEReference: %%%s; not found\n", name);
5379: ctxt->valid = 0;
5380: }
1.50 daniel 5381: } else {
1.98 daniel 5382: /*
5383: * Internal checking in case the entity quest barfed
5384: */
1.159 daniel 5385: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5386: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5387: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5388: ctxt->sax->warning(ctxt->userData,
5389: "Internal: %%%s; is not a parameter entity\n", name);
5390: } else {
1.164 daniel 5391: /*
5392: * TODO !!!
5393: * handle the extra spaces added before and after
5394: * c.f. http://www.w3.org/TR/REC-xml#as-PE
5395: */
1.98 daniel 5396: input = xmlNewEntityInputStream(ctxt, entity);
5397: xmlPushInput(ctxt, input);
1.164 daniel 5398: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5399: (RAW == '<') && (NXT(1) == '?') &&
5400: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5401: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5402: xmlParseTextDecl(ctxt);
1.193 daniel 5403: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5404: /*
5405: * The XML REC instructs us to stop parsing
5406: * right here
5407: */
5408: ctxt->instate = XML_PARSER_EOF;
5409: xmlFree(name);
5410: return;
5411: }
1.164 daniel 5412: }
5413: if (ctxt->token == 0)
5414: ctxt->token = ' ';
1.98 daniel 5415: }
1.45 daniel 5416: }
1.98 daniel 5417: ctxt->hasPErefs = 1;
1.22 daniel 5418: } else {
1.230 veillard 5419: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5421: ctxt->sax->error(ctxt->userData,
1.59 daniel 5422: "xmlParsePEReference: expecting ';'\n");
5423: ctxt->wellFormed = 0;
1.180 daniel 5424: ctxt->disableSAX = 1;
1.22 daniel 5425: }
1.119 daniel 5426: xmlFree(name);
1.3 veillard 5427: }
5428: }
5429: }
5430:
1.50 daniel 5431: /**
1.135 daniel 5432: * xmlParseStringPEReference:
5433: * @ctxt: an XML parser context
5434: * @str: a pointer to an index in the string
5435: *
5436: * parse PEReference declarations
5437: *
5438: * [69] PEReference ::= '%' Name ';'
5439: *
5440: * [ WFC: No Recursion ]
1.229 veillard 5441: * A parsed entity must not contain a recursive
1.135 daniel 5442: * reference to itself, either directly or indirectly.
5443: *
5444: * [ WFC: Entity Declared ]
5445: * In a document without any DTD, a document with only an internal DTD
5446: * subset which contains no parameter entity references, or a document
5447: * with "standalone='yes'", ... ... The declaration of a parameter
5448: * entity must precede any reference to it...
5449: *
5450: * [ VC: Entity Declared ]
5451: * In a document with an external subset or external parameter entities
5452: * with "standalone='no'", ... ... The declaration of a parameter entity
5453: * must precede any reference to it...
5454: *
5455: * [ WFC: In DTD ]
5456: * Parameter-entity references may only appear in the DTD.
5457: * NOTE: misleading but this is handled.
5458: *
5459: * Returns the string of the entity content.
5460: * str is updated to the current value of the index
5461: */
5462: xmlEntityPtr
5463: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5464: const xmlChar *ptr;
5465: xmlChar cur;
5466: xmlChar *name;
5467: xmlEntityPtr entity = NULL;
5468:
5469: if ((str == NULL) || (*str == NULL)) return(NULL);
5470: ptr = *str;
5471: cur = *ptr;
5472: if (cur == '%') {
5473: ptr++;
5474: cur = *ptr;
5475: name = xmlParseStringName(ctxt, &ptr);
5476: if (name == NULL) {
1.230 veillard 5477: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5479: ctxt->sax->error(ctxt->userData,
5480: "xmlParseStringPEReference: no name\n");
5481: ctxt->wellFormed = 0;
1.180 daniel 5482: ctxt->disableSAX = 1;
1.135 daniel 5483: } else {
5484: cur = *ptr;
5485: if (cur == ';') {
5486: ptr++;
5487: cur = *ptr;
5488: if ((ctxt->sax != NULL) &&
5489: (ctxt->sax->getParameterEntity != NULL))
5490: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5491: name);
5492: if (entity == NULL) {
5493: /*
5494: * [ WFC: Entity Declared ]
5495: * In a document without any DTD, a document with only an
5496: * internal DTD subset which contains no parameter entity
5497: * references, or a document with "standalone='yes'", ...
5498: * ... The declaration of a parameter entity must precede
5499: * any reference to it...
5500: */
5501: if ((ctxt->standalone == 1) ||
5502: ((ctxt->hasExternalSubset == 0) &&
5503: (ctxt->hasPErefs == 0))) {
1.230 veillard 5504: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5506: ctxt->sax->error(ctxt->userData,
5507: "PEReference: %%%s; not found\n", name);
5508: ctxt->wellFormed = 0;
1.180 daniel 5509: ctxt->disableSAX = 1;
1.135 daniel 5510: } else {
5511: /*
5512: * [ VC: Entity Declared ]
5513: * In a document with an external subset or external
5514: * parameter entities with "standalone='no'", ...
5515: * ... The declaration of a parameter entity must
5516: * precede any reference to it...
5517: */
5518: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5519: ctxt->sax->warning(ctxt->userData,
5520: "PEReference: %%%s; not found\n", name);
5521: ctxt->valid = 0;
5522: }
5523: } else {
5524: /*
5525: * Internal checking in case the entity quest barfed
5526: */
1.159 daniel 5527: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5528: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 5529: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5530: ctxt->sax->warning(ctxt->userData,
5531: "Internal: %%%s; is not a parameter entity\n", name);
5532: }
5533: }
5534: ctxt->hasPErefs = 1;
5535: } else {
1.230 veillard 5536: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5538: ctxt->sax->error(ctxt->userData,
5539: "xmlParseStringPEReference: expecting ';'\n");
5540: ctxt->wellFormed = 0;
1.180 daniel 5541: ctxt->disableSAX = 1;
1.135 daniel 5542: }
5543: xmlFree(name);
5544: }
5545: }
5546: *str = ptr;
5547: return(entity);
5548: }
5549:
5550: /**
1.181 daniel 5551: * xmlParseDocTypeDecl:
1.50 daniel 5552: * @ctxt: an XML parser context
5553: *
5554: * parse a DOCTYPE declaration
1.21 daniel 5555: *
1.22 daniel 5556: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5557: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 5558: *
5559: * [ VC: Root Element Type ]
1.99 daniel 5560: * The Name in the document type declaration must match the element
1.98 daniel 5561: * type of the root element.
1.21 daniel 5562: */
5563:
1.55 daniel 5564: void
5565: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 5566: xmlChar *name = NULL;
1.123 daniel 5567: xmlChar *ExternalID = NULL;
5568: xmlChar *URI = NULL;
1.21 daniel 5569:
5570: /*
5571: * We know that '<!DOCTYPE' has been detected.
5572: */
1.40 daniel 5573: SKIP(9);
1.21 daniel 5574:
1.42 daniel 5575: SKIP_BLANKS;
1.21 daniel 5576:
5577: /*
5578: * Parse the DOCTYPE name.
5579: */
5580: name = xmlParseName(ctxt);
5581: if (name == NULL) {
1.230 veillard 5582: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5583: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5584: ctxt->sax->error(ctxt->userData,
5585: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 5586: ctxt->wellFormed = 0;
1.180 daniel 5587: ctxt->disableSAX = 1;
1.21 daniel 5588: }
1.165 daniel 5589: ctxt->intSubName = name;
1.21 daniel 5590:
1.42 daniel 5591: SKIP_BLANKS;
1.21 daniel 5592:
5593: /*
1.22 daniel 5594: * Check for SystemID and ExternalID
5595: */
1.67 daniel 5596: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 5597:
5598: if ((URI != NULL) || (ExternalID != NULL)) {
5599: ctxt->hasExternalSubset = 1;
5600: }
1.165 daniel 5601: ctxt->extSubURI = URI;
5602: ctxt->extSubSystem = ExternalID;
1.98 daniel 5603:
1.42 daniel 5604: SKIP_BLANKS;
1.36 daniel 5605:
1.76 daniel 5606: /*
1.165 daniel 5607: * Create and update the internal subset.
1.76 daniel 5608: */
1.171 daniel 5609: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5610: (!ctxt->disableSAX))
1.74 daniel 5611: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 5612:
5613: /*
1.140 daniel 5614: * Is there any internal subset declarations ?
5615: * they are handled separately in xmlParseInternalSubset()
5616: */
1.152 daniel 5617: if (RAW == '[')
1.140 daniel 5618: return;
5619:
5620: /*
5621: * We should be at the end of the DOCTYPE declaration.
5622: */
1.152 daniel 5623: if (RAW != '>') {
1.230 veillard 5624: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.140 daniel 5625: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5626: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5627: ctxt->wellFormed = 0;
1.180 daniel 5628: ctxt->disableSAX = 1;
1.140 daniel 5629: }
5630: NEXT;
5631: }
5632:
5633: /**
1.181 daniel 5634: * xmlParseInternalsubset:
1.140 daniel 5635: * @ctxt: an XML parser context
5636: *
5637: * parse the internal subset declaration
5638: *
5639: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5640: */
5641:
5642: void
5643: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5644: /*
1.22 daniel 5645: * Is there any DTD definition ?
5646: */
1.152 daniel 5647: if (RAW == '[') {
1.96 daniel 5648: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 5649: NEXT;
1.22 daniel 5650: /*
5651: * Parse the succession of Markup declarations and
5652: * PEReferences.
5653: * Subsequence (markupdecl | PEReference | S)*
5654: */
1.152 daniel 5655: while (RAW != ']') {
1.123 daniel 5656: const xmlChar *check = CUR_PTR;
1.115 daniel 5657: int cons = ctxt->input->consumed;
1.22 daniel 5658:
1.42 daniel 5659: SKIP_BLANKS;
1.22 daniel 5660: xmlParseMarkupDecl(ctxt);
1.50 daniel 5661: xmlParsePEReference(ctxt);
1.22 daniel 5662:
1.115 daniel 5663: /*
5664: * Pop-up of finished entities.
5665: */
1.152 daniel 5666: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 5667: xmlPopInput(ctxt);
5668:
1.118 daniel 5669: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.230 veillard 5670: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5672: ctxt->sax->error(ctxt->userData,
1.140 daniel 5673: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 5674: ctxt->wellFormed = 0;
1.180 daniel 5675: ctxt->disableSAX = 1;
1.22 daniel 5676: break;
5677: }
5678: }
1.209 veillard 5679: if (RAW == ']') {
5680: NEXT;
5681: SKIP_BLANKS;
5682: }
1.22 daniel 5683: }
5684:
5685: /*
5686: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 5687: */
1.152 daniel 5688: if (RAW != '>') {
1.230 veillard 5689: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.55 daniel 5690: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5691: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 5692: ctxt->wellFormed = 0;
1.180 daniel 5693: ctxt->disableSAX = 1;
1.21 daniel 5694: }
1.40 daniel 5695: NEXT;
1.21 daniel 5696: }
5697:
1.50 daniel 5698: /**
5699: * xmlParseAttribute:
5700: * @ctxt: an XML parser context
1.123 daniel 5701: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 5702: *
5703: * parse an attribute
1.3 veillard 5704: *
1.22 daniel 5705: * [41] Attribute ::= Name Eq AttValue
5706: *
1.98 daniel 5707: * [ WFC: No External Entity References ]
5708: * Attribute values cannot contain direct or indirect entity references
5709: * to external entities.
5710: *
5711: * [ WFC: No < in Attribute Values ]
5712: * The replacement text of any entity referred to directly or indirectly in
5713: * an attribute value (other than "<") must not contain a <.
5714: *
5715: * [ VC: Attribute Value Type ]
1.117 daniel 5716: * The attribute must have been declared; the value must be of the type
1.99 daniel 5717: * declared for it.
1.98 daniel 5718: *
1.22 daniel 5719: * [25] Eq ::= S? '=' S?
5720: *
1.29 daniel 5721: * With namespace:
5722: *
5723: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 5724: *
5725: * Also the case QName == xmlns:??? is handled independently as a namespace
5726: * definition.
1.69 daniel 5727: *
1.72 daniel 5728: * Returns the attribute name, and the value in *value.
1.3 veillard 5729: */
5730:
1.123 daniel 5731: xmlChar *
5732: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5733: xmlChar *name, *val;
1.3 veillard 5734:
1.72 daniel 5735: *value = NULL;
5736: name = xmlParseName(ctxt);
1.22 daniel 5737: if (name == NULL) {
1.230 veillard 5738: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5740: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 5741: ctxt->wellFormed = 0;
1.180 daniel 5742: ctxt->disableSAX = 1;
1.52 daniel 5743: return(NULL);
1.3 veillard 5744: }
5745:
5746: /*
1.29 daniel 5747: * read the value
1.3 veillard 5748: */
1.42 daniel 5749: SKIP_BLANKS;
1.152 daniel 5750: if (RAW == '=') {
1.40 daniel 5751: NEXT;
1.42 daniel 5752: SKIP_BLANKS;
1.72 daniel 5753: val = xmlParseAttValue(ctxt);
1.96 daniel 5754: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 5755: } else {
1.230 veillard 5756: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.55 daniel 5757: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5758: ctxt->sax->error(ctxt->userData,
1.59 daniel 5759: "Specification mandate value for attribute %s\n", name);
5760: ctxt->wellFormed = 0;
1.180 daniel 5761: ctxt->disableSAX = 1;
1.170 daniel 5762: xmlFree(name);
1.52 daniel 5763: return(NULL);
1.43 daniel 5764: }
5765:
1.172 daniel 5766: /*
5767: * Check that xml:lang conforms to the specification
1.222 veillard 5768: * No more registered as an error, just generate a warning now
5769: * since this was deprecated in XML second edition
1.172 daniel 5770: */
1.236 veillard 5771: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
1.172 daniel 5772: if (!xmlCheckLanguageID(val)) {
1.222 veillard 5773: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5774: ctxt->sax->warning(ctxt->userData,
5775: "Malformed value for xml:lang : %s\n", val);
1.172 daniel 5776: }
5777: }
5778:
1.176 daniel 5779: /*
5780: * Check that xml:space conforms to the specification
5781: */
1.236 veillard 5782: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5783: if (xmlStrEqual(val, BAD_CAST "default"))
1.176 daniel 5784: *(ctxt->space) = 0;
1.236 veillard 5785: else if (xmlStrEqual(val, BAD_CAST "preserve"))
1.176 daniel 5786: *(ctxt->space) = 1;
5787: else {
1.230 veillard 5788: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.176 daniel 5789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5790: ctxt->sax->error(ctxt->userData,
5791: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5792: val);
5793: ctxt->wellFormed = 0;
1.180 daniel 5794: ctxt->disableSAX = 1;
1.176 daniel 5795: }
5796: }
5797:
1.72 daniel 5798: *value = val;
5799: return(name);
1.3 veillard 5800: }
5801:
1.50 daniel 5802: /**
5803: * xmlParseStartTag:
5804: * @ctxt: an XML parser context
5805: *
5806: * parse a start of tag either for rule element or
5807: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 5808: *
5809: * [40] STag ::= '<' Name (S Attribute)* S? '>'
5810: *
1.98 daniel 5811: * [ WFC: Unique Att Spec ]
5812: * No attribute name may appear more than once in the same start-tag or
5813: * empty-element tag.
5814: *
1.29 daniel 5815: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5816: *
1.98 daniel 5817: * [ WFC: Unique Att Spec ]
5818: * No attribute name may appear more than once in the same start-tag or
5819: * empty-element tag.
5820: *
1.29 daniel 5821: * With namespace:
5822: *
5823: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5824: *
5825: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 5826: *
1.192 daniel 5827: * Returns the element name parsed
1.2 veillard 5828: */
5829:
1.123 daniel 5830: xmlChar *
1.69 daniel 5831: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5832: xmlChar *name;
5833: xmlChar *attname;
5834: xmlChar *attvalue;
5835: const xmlChar **atts = NULL;
1.72 daniel 5836: int nbatts = 0;
5837: int maxatts = 0;
5838: int i;
1.2 veillard 5839:
1.152 daniel 5840: if (RAW != '<') return(NULL);
1.40 daniel 5841: NEXT;
1.3 veillard 5842:
1.72 daniel 5843: name = xmlParseName(ctxt);
1.59 daniel 5844: if (name == NULL) {
1.230 veillard 5845: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5846: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5847: ctxt->sax->error(ctxt->userData,
1.59 daniel 5848: "xmlParseStartTag: invalid element name\n");
5849: ctxt->wellFormed = 0;
1.180 daniel 5850: ctxt->disableSAX = 1;
1.83 daniel 5851: return(NULL);
1.50 daniel 5852: }
5853:
5854: /*
1.3 veillard 5855: * Now parse the attributes, it ends up with the ending
5856: *
5857: * (S Attribute)* S?
5858: */
1.42 daniel 5859: SKIP_BLANKS;
1.91 daniel 5860: GROW;
1.168 daniel 5861:
1.153 daniel 5862: while ((IS_CHAR(RAW)) &&
1.152 daniel 5863: (RAW != '>') &&
5864: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 5865: const xmlChar *q = CUR_PTR;
1.91 daniel 5866: int cons = ctxt->input->consumed;
1.29 daniel 5867:
1.72 daniel 5868: attname = xmlParseAttribute(ctxt, &attvalue);
5869: if ((attname != NULL) && (attvalue != NULL)) {
5870: /*
1.98 daniel 5871: * [ WFC: Unique Att Spec ]
5872: * No attribute name may appear more than once in the same
5873: * start-tag or empty-element tag.
1.72 daniel 5874: */
5875: for (i = 0; i < nbatts;i += 2) {
1.236 veillard 5876: if (xmlStrEqual(atts[i], attname)) {
1.230 veillard 5877: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.72 daniel 5878: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5879: ctxt->sax->error(ctxt->userData,
5880: "Attribute %s redefined\n",
5881: attname);
1.72 daniel 5882: ctxt->wellFormed = 0;
1.180 daniel 5883: ctxt->disableSAX = 1;
1.119 daniel 5884: xmlFree(attname);
5885: xmlFree(attvalue);
1.98 daniel 5886: goto failed;
1.72 daniel 5887: }
5888: }
5889:
5890: /*
5891: * Add the pair to atts
5892: */
5893: if (atts == NULL) {
5894: maxatts = 10;
1.123 daniel 5895: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5896: if (atts == NULL) {
1.241 veillard 5897: xmlGenericError(xmlGenericErrorContext,
5898: "malloc of %ld byte failed\n",
1.123 daniel 5899: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5900: return(NULL);
1.72 daniel 5901: }
1.127 daniel 5902: } else if (nbatts + 4 > maxatts) {
1.72 daniel 5903: maxatts *= 2;
1.233 veillard 5904: atts = (const xmlChar **) xmlRealloc((void *) atts,
5905: maxatts * sizeof(xmlChar *));
1.72 daniel 5906: if (atts == NULL) {
1.241 veillard 5907: xmlGenericError(xmlGenericErrorContext,
5908: "realloc of %ld byte failed\n",
1.123 daniel 5909: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5910: return(NULL);
1.72 daniel 5911: }
5912: }
5913: atts[nbatts++] = attname;
5914: atts[nbatts++] = attvalue;
5915: atts[nbatts] = NULL;
5916: atts[nbatts + 1] = NULL;
1.176 daniel 5917: } else {
5918: if (attname != NULL)
5919: xmlFree(attname);
5920: if (attvalue != NULL)
5921: xmlFree(attvalue);
1.72 daniel 5922: }
5923:
1.116 daniel 5924: failed:
1.168 daniel 5925:
5926: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5927: break;
5928: if (!IS_BLANK(RAW)) {
1.230 veillard 5929: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.168 daniel 5930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5931: ctxt->sax->error(ctxt->userData,
5932: "attributes construct error\n");
5933: ctxt->wellFormed = 0;
1.180 daniel 5934: ctxt->disableSAX = 1;
1.168 daniel 5935: }
1.42 daniel 5936: SKIP_BLANKS;
1.91 daniel 5937: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.230 veillard 5938: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5939: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5940: ctxt->sax->error(ctxt->userData,
1.31 daniel 5941: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 5942: ctxt->wellFormed = 0;
1.180 daniel 5943: ctxt->disableSAX = 1;
1.29 daniel 5944: break;
1.3 veillard 5945: }
1.91 daniel 5946: GROW;
1.3 veillard 5947: }
5948:
1.43 daniel 5949: /*
1.72 daniel 5950: * SAX: Start of Element !
1.43 daniel 5951: */
1.171 daniel 5952: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5953: (!ctxt->disableSAX))
1.74 daniel 5954: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5955:
1.72 daniel 5956: if (atts != NULL) {
1.123 daniel 5957: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.233 veillard 5958: xmlFree((void *) atts);
1.72 daniel 5959: }
1.83 daniel 5960: return(name);
1.3 veillard 5961: }
5962:
1.50 daniel 5963: /**
5964: * xmlParseEndTag:
5965: * @ctxt: an XML parser context
5966: *
5967: * parse an end of tag
1.27 daniel 5968: *
5969: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5970: *
5971: * With namespace
5972: *
1.72 daniel 5973: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5974: */
5975:
1.55 daniel 5976: void
1.140 daniel 5977: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5978: xmlChar *name;
1.140 daniel 5979: xmlChar *oldname;
1.7 veillard 5980:
1.91 daniel 5981: GROW;
1.152 daniel 5982: if ((RAW != '<') || (NXT(1) != '/')) {
1.230 veillard 5983: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.55 daniel 5984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5985: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5986: ctxt->wellFormed = 0;
1.180 daniel 5987: ctxt->disableSAX = 1;
1.27 daniel 5988: return;
5989: }
1.40 daniel 5990: SKIP(2);
1.7 veillard 5991:
1.72 daniel 5992: name = xmlParseName(ctxt);
1.7 veillard 5993:
5994: /*
5995: * We should definitely be at the ending "S? '>'" part
5996: */
1.91 daniel 5997: GROW;
1.42 daniel 5998: SKIP_BLANKS;
1.153 daniel 5999: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.230 veillard 6000: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6001: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6002: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 6003: ctxt->wellFormed = 0;
1.180 daniel 6004: ctxt->disableSAX = 1;
1.7 veillard 6005: } else
1.40 daniel 6006: NEXT;
1.7 veillard 6007:
1.72 daniel 6008: /*
1.98 daniel 6009: * [ WFC: Element Type Match ]
6010: * The Name in an element's end-tag must match the element type in the
6011: * start-tag.
6012: *
1.83 daniel 6013: */
1.147 daniel 6014: if ((name == NULL) || (ctxt->name == NULL) ||
1.236 veillard 6015: (!xmlStrEqual(name, ctxt->name))) {
1.230 veillard 6016: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.147 daniel 6017: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6018: if ((name != NULL) && (ctxt->name != NULL)) {
6019: ctxt->sax->error(ctxt->userData,
6020: "Opening and ending tag mismatch: %s and %s\n",
6021: ctxt->name, name);
6022: } else if (ctxt->name != NULL) {
6023: ctxt->sax->error(ctxt->userData,
6024: "Ending tag eror for: %s\n", ctxt->name);
6025: } else {
6026: ctxt->sax->error(ctxt->userData,
6027: "Ending tag error: internal error ???\n");
6028: }
1.122 daniel 6029:
1.147 daniel 6030: }
1.83 daniel 6031: ctxt->wellFormed = 0;
1.180 daniel 6032: ctxt->disableSAX = 1;
1.83 daniel 6033: }
6034:
6035: /*
1.72 daniel 6036: * SAX: End of Tag
6037: */
1.171 daniel 6038: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6039: (!ctxt->disableSAX))
1.74 daniel 6040: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6041:
6042: if (name != NULL)
1.119 daniel 6043: xmlFree(name);
1.140 daniel 6044: oldname = namePop(ctxt);
1.176 daniel 6045: spacePop(ctxt);
1.140 daniel 6046: if (oldname != NULL) {
6047: #ifdef DEBUG_STACK
1.241 veillard 6048: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6049: #endif
6050: xmlFree(oldname);
6051: }
1.7 veillard 6052: return;
6053: }
6054:
1.50 daniel 6055: /**
6056: * xmlParseCDSect:
6057: * @ctxt: an XML parser context
6058: *
6059: * Parse escaped pure raw content.
1.29 daniel 6060: *
6061: * [18] CDSect ::= CDStart CData CDEnd
6062: *
6063: * [19] CDStart ::= '<![CDATA['
6064: *
6065: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6066: *
6067: * [21] CDEnd ::= ']]>'
1.3 veillard 6068: */
1.55 daniel 6069: void
6070: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6071: xmlChar *buf = NULL;
6072: int len = 0;
1.140 daniel 6073: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6074: int r, rl;
6075: int s, sl;
6076: int cur, l;
1.234 veillard 6077: int count = 0;
1.3 veillard 6078:
1.106 daniel 6079: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6080: (NXT(2) == '[') && (NXT(3) == 'C') &&
6081: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6082: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6083: (NXT(8) == '[')) {
6084: SKIP(9);
1.29 daniel 6085: } else
1.45 daniel 6086: return;
1.109 daniel 6087:
6088: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6089: r = CUR_CHAR(rl);
6090: if (!IS_CHAR(r)) {
1.230 veillard 6091: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6093: ctxt->sax->error(ctxt->userData,
1.135 daniel 6094: "CData section not finished\n");
1.59 daniel 6095: ctxt->wellFormed = 0;
1.180 daniel 6096: ctxt->disableSAX = 1;
1.109 daniel 6097: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6098: return;
1.3 veillard 6099: }
1.152 daniel 6100: NEXTL(rl);
6101: s = CUR_CHAR(sl);
6102: if (!IS_CHAR(s)) {
1.230 veillard 6103: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6104: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6105: ctxt->sax->error(ctxt->userData,
1.135 daniel 6106: "CData section not finished\n");
1.59 daniel 6107: ctxt->wellFormed = 0;
1.180 daniel 6108: ctxt->disableSAX = 1;
1.109 daniel 6109: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6110: return;
1.3 veillard 6111: }
1.152 daniel 6112: NEXTL(sl);
6113: cur = CUR_CHAR(l);
1.135 daniel 6114: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6115: if (buf == NULL) {
1.241 veillard 6116: xmlGenericError(xmlGenericErrorContext,
6117: "malloc of %d byte failed\n", size);
1.135 daniel 6118: return;
6119: }
1.108 veillard 6120: while (IS_CHAR(cur) &&
1.110 daniel 6121: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6122: if (len + 5 >= size) {
1.135 daniel 6123: size *= 2;
1.204 veillard 6124: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6125: if (buf == NULL) {
1.241 veillard 6126: xmlGenericError(xmlGenericErrorContext,
6127: "realloc of %d byte failed\n", size);
1.135 daniel 6128: return;
6129: }
6130: }
1.152 daniel 6131: COPY_BUF(rl,buf,len,r);
1.110 daniel 6132: r = s;
1.152 daniel 6133: rl = sl;
1.110 daniel 6134: s = cur;
1.152 daniel 6135: sl = l;
1.234 veillard 6136: count++;
6137: if (count > 50) {
6138: GROW;
6139: count = 0;
6140: }
1.152 daniel 6141: NEXTL(l);
6142: cur = CUR_CHAR(l);
1.3 veillard 6143: }
1.135 daniel 6144: buf[len] = 0;
1.109 daniel 6145: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6146: if (cur != '>') {
1.230 veillard 6147: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6148: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6149: ctxt->sax->error(ctxt->userData,
1.135 daniel 6150: "CData section not finished\n%.50s\n", buf);
1.59 daniel 6151: ctxt->wellFormed = 0;
1.180 daniel 6152: ctxt->disableSAX = 1;
1.135 daniel 6153: xmlFree(buf);
1.45 daniel 6154: return;
1.3 veillard 6155: }
1.152 daniel 6156: NEXTL(l);
1.16 daniel 6157:
1.45 daniel 6158: /*
1.135 daniel 6159: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6160: */
1.171 daniel 6161: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 6162: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6163: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6164: }
1.135 daniel 6165: xmlFree(buf);
1.2 veillard 6166: }
6167:
1.50 daniel 6168: /**
6169: * xmlParseContent:
6170: * @ctxt: an XML parser context
6171: *
6172: * Parse a content:
1.2 veillard 6173: *
1.27 daniel 6174: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6175: */
6176:
1.55 daniel 6177: void
6178: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6179: GROW;
1.176 daniel 6180: while (((RAW != 0) || (ctxt->token != 0)) &&
6181: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 6182: const xmlChar *test = CUR_PTR;
1.91 daniel 6183: int cons = ctxt->input->consumed;
1.123 daniel 6184: xmlChar tok = ctxt->token;
1.27 daniel 6185:
6186: /*
1.152 daniel 6187: * Handle possible processed charrefs.
6188: */
6189: if (ctxt->token != 0) {
6190: xmlParseCharData(ctxt, 0);
6191: }
6192: /*
1.27 daniel 6193: * First case : a Processing Instruction.
6194: */
1.152 daniel 6195: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6196: xmlParsePI(ctxt);
6197: }
1.72 daniel 6198:
1.27 daniel 6199: /*
6200: * Second case : a CDSection
6201: */
1.152 daniel 6202: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6203: (NXT(2) == '[') && (NXT(3) == 'C') &&
6204: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6205: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6206: (NXT(8) == '[')) {
1.45 daniel 6207: xmlParseCDSect(ctxt);
1.27 daniel 6208: }
1.72 daniel 6209:
1.27 daniel 6210: /*
6211: * Third case : a comment
6212: */
1.152 daniel 6213: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6214: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6215: xmlParseComment(ctxt);
1.97 daniel 6216: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6217: }
1.72 daniel 6218:
1.27 daniel 6219: /*
6220: * Fourth case : a sub-element.
6221: */
1.152 daniel 6222: else if (RAW == '<') {
1.72 daniel 6223: xmlParseElement(ctxt);
1.45 daniel 6224: }
1.72 daniel 6225:
1.45 daniel 6226: /*
1.50 daniel 6227: * Fifth case : a reference. If if has not been resolved,
6228: * parsing returns it's Name, create the node
1.45 daniel 6229: */
1.97 daniel 6230:
1.152 daniel 6231: else if (RAW == '&') {
1.77 daniel 6232: xmlParseReference(ctxt);
1.27 daniel 6233: }
1.72 daniel 6234:
1.27 daniel 6235: /*
6236: * Last case, text. Note that References are handled directly.
6237: */
6238: else {
1.45 daniel 6239: xmlParseCharData(ctxt, 0);
1.3 veillard 6240: }
1.14 veillard 6241:
1.91 daniel 6242: GROW;
1.14 veillard 6243: /*
1.45 daniel 6244: * Pop-up of finished entities.
1.14 veillard 6245: */
1.152 daniel 6246: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6247: xmlPopInput(ctxt);
1.135 daniel 6248: SHRINK;
1.45 daniel 6249:
1.113 daniel 6250: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6251: (tok == ctxt->token)) {
1.230 veillard 6252: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6253: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6254: ctxt->sax->error(ctxt->userData,
1.59 daniel 6255: "detected an error in element content\n");
6256: ctxt->wellFormed = 0;
1.180 daniel 6257: ctxt->disableSAX = 1;
1.224 veillard 6258: ctxt->instate = XML_PARSER_EOF;
1.29 daniel 6259: break;
6260: }
1.3 veillard 6261: }
1.2 veillard 6262: }
6263:
1.50 daniel 6264: /**
6265: * xmlParseElement:
6266: * @ctxt: an XML parser context
6267: *
6268: * parse an XML element, this is highly recursive
1.26 daniel 6269: *
6270: * [39] element ::= EmptyElemTag | STag content ETag
6271: *
1.98 daniel 6272: * [ WFC: Element Type Match ]
6273: * The Name in an element's end-tag must match the element type in the
6274: * start-tag.
6275: *
6276: * [ VC: Element Valid ]
1.117 daniel 6277: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6278: * where the Name matches the element type and one of the following holds:
6279: * - The declaration matches EMPTY and the element has no content.
6280: * - The declaration matches children and the sequence of child elements
6281: * belongs to the language generated by the regular expression in the
6282: * content model, with optional white space (characters matching the
6283: * nonterminal S) between each pair of child elements.
6284: * - The declaration matches Mixed and the content consists of character
6285: * data and child elements whose types match names in the content model.
6286: * - The declaration matches ANY, and the types of any child elements have
6287: * been declared.
1.2 veillard 6288: */
1.26 daniel 6289:
1.72 daniel 6290: void
1.69 daniel 6291: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6292: const xmlChar *openTag = CUR_PTR;
6293: xmlChar *name;
1.140 daniel 6294: xmlChar *oldname;
1.32 daniel 6295: xmlParserNodeInfo node_info;
1.118 daniel 6296: xmlNodePtr ret;
1.2 veillard 6297:
1.32 daniel 6298: /* Capture start position */
1.118 daniel 6299: if (ctxt->record_info) {
6300: node_info.begin_pos = ctxt->input->consumed +
6301: (CUR_PTR - ctxt->input->base);
6302: node_info.begin_line = ctxt->input->line;
6303: }
1.32 daniel 6304:
1.176 daniel 6305: if (ctxt->spaceNr == 0)
6306: spacePush(ctxt, -1);
6307: else
6308: spacePush(ctxt, *ctxt->space);
6309:
1.83 daniel 6310: name = xmlParseStartTag(ctxt);
6311: if (name == NULL) {
1.176 daniel 6312: spacePop(ctxt);
1.83 daniel 6313: return;
6314: }
1.140 daniel 6315: namePush(ctxt, name);
1.118 daniel 6316: ret = ctxt->node;
1.2 veillard 6317:
6318: /*
1.99 daniel 6319: * [ VC: Root Element Type ]
6320: * The Name in the document type declaration must match the element
6321: * type of the root element.
6322: */
1.105 daniel 6323: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6324: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6325: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6326:
6327: /*
1.2 veillard 6328: * Check for an Empty Element.
6329: */
1.152 daniel 6330: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6331: SKIP(2);
1.171 daniel 6332: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6333: (!ctxt->disableSAX))
1.83 daniel 6334: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6335: oldname = namePop(ctxt);
1.176 daniel 6336: spacePop(ctxt);
1.140 daniel 6337: if (oldname != NULL) {
6338: #ifdef DEBUG_STACK
1.241 veillard 6339: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6340: #endif
6341: xmlFree(oldname);
1.211 veillard 6342: }
6343: if ( ret != NULL && ctxt->record_info ) {
6344: node_info.end_pos = ctxt->input->consumed +
6345: (CUR_PTR - ctxt->input->base);
6346: node_info.end_line = ctxt->input->line;
6347: node_info.node = ret;
6348: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 6349: }
1.72 daniel 6350: return;
1.2 veillard 6351: }
1.152 daniel 6352: if (RAW == '>') {
1.91 daniel 6353: NEXT;
6354: } else {
1.230 veillard 6355: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6356: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6357: ctxt->sax->error(ctxt->userData,
6358: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6359: openTag);
1.59 daniel 6360: ctxt->wellFormed = 0;
1.180 daniel 6361: ctxt->disableSAX = 1;
1.45 daniel 6362:
6363: /*
6364: * end of parsing of this node.
6365: */
6366: nodePop(ctxt);
1.140 daniel 6367: oldname = namePop(ctxt);
1.176 daniel 6368: spacePop(ctxt);
1.140 daniel 6369: if (oldname != NULL) {
6370: #ifdef DEBUG_STACK
1.241 veillard 6371: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6372: #endif
6373: xmlFree(oldname);
6374: }
1.118 daniel 6375:
6376: /*
6377: * Capture end position and add node
6378: */
6379: if ( ret != NULL && ctxt->record_info ) {
6380: node_info.end_pos = ctxt->input->consumed +
6381: (CUR_PTR - ctxt->input->base);
6382: node_info.end_line = ctxt->input->line;
6383: node_info.node = ret;
6384: xmlParserAddNodeInfo(ctxt, &node_info);
6385: }
1.72 daniel 6386: return;
1.2 veillard 6387: }
6388:
6389: /*
6390: * Parse the content of the element:
6391: */
1.45 daniel 6392: xmlParseContent(ctxt);
1.153 daniel 6393: if (!IS_CHAR(RAW)) {
1.230 veillard 6394: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.55 daniel 6395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6396: ctxt->sax->error(ctxt->userData,
1.57 daniel 6397: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6398: ctxt->wellFormed = 0;
1.180 daniel 6399: ctxt->disableSAX = 1;
1.45 daniel 6400:
6401: /*
6402: * end of parsing of this node.
6403: */
6404: nodePop(ctxt);
1.140 daniel 6405: oldname = namePop(ctxt);
1.176 daniel 6406: spacePop(ctxt);
1.140 daniel 6407: if (oldname != NULL) {
6408: #ifdef DEBUG_STACK
1.241 veillard 6409: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 6410: #endif
6411: xmlFree(oldname);
6412: }
1.72 daniel 6413: return;
1.2 veillard 6414: }
6415:
6416: /*
1.27 daniel 6417: * parse the end of tag: '</' should be here.
1.2 veillard 6418: */
1.140 daniel 6419: xmlParseEndTag(ctxt);
1.118 daniel 6420:
6421: /*
6422: * Capture end position and add node
6423: */
6424: if ( ret != NULL && ctxt->record_info ) {
6425: node_info.end_pos = ctxt->input->consumed +
6426: (CUR_PTR - ctxt->input->base);
6427: node_info.end_line = ctxt->input->line;
6428: node_info.node = ret;
6429: xmlParserAddNodeInfo(ctxt, &node_info);
6430: }
1.2 veillard 6431: }
6432:
1.50 daniel 6433: /**
6434: * xmlParseVersionNum:
6435: * @ctxt: an XML parser context
6436: *
6437: * parse the XML version value.
1.29 daniel 6438: *
6439: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6440: *
6441: * Returns the string giving the XML version number, or NULL
1.29 daniel 6442: */
1.123 daniel 6443: xmlChar *
1.55 daniel 6444: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6445: xmlChar *buf = NULL;
6446: int len = 0;
6447: int size = 10;
6448: xmlChar cur;
1.29 daniel 6449:
1.135 daniel 6450: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6451: if (buf == NULL) {
1.241 veillard 6452: xmlGenericError(xmlGenericErrorContext,
6453: "malloc of %d byte failed\n", size);
1.135 daniel 6454: return(NULL);
6455: }
6456: cur = CUR;
1.152 daniel 6457: while (((cur >= 'a') && (cur <= 'z')) ||
6458: ((cur >= 'A') && (cur <= 'Z')) ||
6459: ((cur >= '0') && (cur <= '9')) ||
6460: (cur == '_') || (cur == '.') ||
6461: (cur == ':') || (cur == '-')) {
1.135 daniel 6462: if (len + 1 >= size) {
6463: size *= 2;
1.204 veillard 6464: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6465: if (buf == NULL) {
1.241 veillard 6466: xmlGenericError(xmlGenericErrorContext,
6467: "realloc of %d byte failed\n", size);
1.135 daniel 6468: return(NULL);
6469: }
6470: }
6471: buf[len++] = cur;
6472: NEXT;
6473: cur=CUR;
6474: }
6475: buf[len] = 0;
6476: return(buf);
1.29 daniel 6477: }
6478:
1.50 daniel 6479: /**
6480: * xmlParseVersionInfo:
6481: * @ctxt: an XML parser context
6482: *
6483: * parse the XML version.
1.29 daniel 6484: *
6485: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6486: *
6487: * [25] Eq ::= S? '=' S?
1.50 daniel 6488: *
1.68 daniel 6489: * Returns the version string, e.g. "1.0"
1.29 daniel 6490: */
6491:
1.123 daniel 6492: xmlChar *
1.55 daniel 6493: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6494: xmlChar *version = NULL;
6495: const xmlChar *q;
1.29 daniel 6496:
1.152 daniel 6497: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6498: (NXT(2) == 'r') && (NXT(3) == 's') &&
6499: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6500: (NXT(6) == 'n')) {
6501: SKIP(7);
1.42 daniel 6502: SKIP_BLANKS;
1.152 daniel 6503: if (RAW != '=') {
1.230 veillard 6504: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6505: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6506: ctxt->sax->error(ctxt->userData,
6507: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6508: ctxt->wellFormed = 0;
1.180 daniel 6509: ctxt->disableSAX = 1;
1.31 daniel 6510: return(NULL);
6511: }
1.40 daniel 6512: NEXT;
1.42 daniel 6513: SKIP_BLANKS;
1.152 daniel 6514: if (RAW == '"') {
1.40 daniel 6515: NEXT;
6516: q = CUR_PTR;
1.29 daniel 6517: version = xmlParseVersionNum(ctxt);
1.152 daniel 6518: if (RAW != '"') {
1.230 veillard 6519: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6521: ctxt->sax->error(ctxt->userData,
6522: "String not closed\n%.50s\n", q);
1.59 daniel 6523: ctxt->wellFormed = 0;
1.180 daniel 6524: ctxt->disableSAX = 1;
1.55 daniel 6525: } else
1.40 daniel 6526: NEXT;
1.152 daniel 6527: } else if (RAW == '\''){
1.40 daniel 6528: NEXT;
6529: q = CUR_PTR;
1.29 daniel 6530: version = xmlParseVersionNum(ctxt);
1.152 daniel 6531: if (RAW != '\'') {
1.230 veillard 6532: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6533: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6534: ctxt->sax->error(ctxt->userData,
6535: "String not closed\n%.50s\n", q);
1.59 daniel 6536: ctxt->wellFormed = 0;
1.180 daniel 6537: ctxt->disableSAX = 1;
1.55 daniel 6538: } else
1.40 daniel 6539: NEXT;
1.31 daniel 6540: } else {
1.230 veillard 6541: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6543: ctxt->sax->error(ctxt->userData,
1.59 daniel 6544: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6545: ctxt->wellFormed = 0;
1.180 daniel 6546: ctxt->disableSAX = 1;
1.29 daniel 6547: }
6548: }
6549: return(version);
6550: }
6551:
1.50 daniel 6552: /**
6553: * xmlParseEncName:
6554: * @ctxt: an XML parser context
6555: *
6556: * parse the XML encoding name
1.29 daniel 6557: *
6558: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 6559: *
1.68 daniel 6560: * Returns the encoding name value or NULL
1.29 daniel 6561: */
1.123 daniel 6562: xmlChar *
1.55 daniel 6563: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 6564: xmlChar *buf = NULL;
6565: int len = 0;
6566: int size = 10;
6567: xmlChar cur;
1.29 daniel 6568:
1.135 daniel 6569: cur = CUR;
6570: if (((cur >= 'a') && (cur <= 'z')) ||
6571: ((cur >= 'A') && (cur <= 'Z'))) {
6572: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6573: if (buf == NULL) {
1.241 veillard 6574: xmlGenericError(xmlGenericErrorContext,
6575: "malloc of %d byte failed\n", size);
1.135 daniel 6576: return(NULL);
6577: }
6578:
6579: buf[len++] = cur;
1.40 daniel 6580: NEXT;
1.135 daniel 6581: cur = CUR;
1.152 daniel 6582: while (((cur >= 'a') && (cur <= 'z')) ||
6583: ((cur >= 'A') && (cur <= 'Z')) ||
6584: ((cur >= '0') && (cur <= '9')) ||
6585: (cur == '.') || (cur == '_') ||
6586: (cur == '-')) {
1.135 daniel 6587: if (len + 1 >= size) {
6588: size *= 2;
1.204 veillard 6589: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6590: if (buf == NULL) {
1.241 veillard 6591: xmlGenericError(xmlGenericErrorContext,
6592: "realloc of %d byte failed\n", size);
1.135 daniel 6593: return(NULL);
6594: }
6595: }
6596: buf[len++] = cur;
6597: NEXT;
6598: cur = CUR;
6599: if (cur == 0) {
6600: SHRINK;
6601: GROW;
6602: cur = CUR;
6603: }
6604: }
6605: buf[len] = 0;
1.29 daniel 6606: } else {
1.230 veillard 6607: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.55 daniel 6608: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6609: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 6610: ctxt->wellFormed = 0;
1.180 daniel 6611: ctxt->disableSAX = 1;
1.29 daniel 6612: }
1.135 daniel 6613: return(buf);
1.29 daniel 6614: }
6615:
1.50 daniel 6616: /**
6617: * xmlParseEncodingDecl:
6618: * @ctxt: an XML parser context
6619: *
6620: * parse the XML encoding declaration
1.29 daniel 6621: *
6622: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 6623: *
1.229 veillard 6624: * this setups the conversion filters.
1.50 daniel 6625: *
1.68 daniel 6626: * Returns the encoding value or NULL
1.29 daniel 6627: */
6628:
1.123 daniel 6629: xmlChar *
1.55 daniel 6630: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6631: xmlChar *encoding = NULL;
6632: const xmlChar *q;
1.29 daniel 6633:
1.42 daniel 6634: SKIP_BLANKS;
1.152 daniel 6635: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 6636: (NXT(2) == 'c') && (NXT(3) == 'o') &&
6637: (NXT(4) == 'd') && (NXT(5) == 'i') &&
6638: (NXT(6) == 'n') && (NXT(7) == 'g')) {
6639: SKIP(8);
1.42 daniel 6640: SKIP_BLANKS;
1.152 daniel 6641: if (RAW != '=') {
1.230 veillard 6642: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6644: ctxt->sax->error(ctxt->userData,
6645: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 6646: ctxt->wellFormed = 0;
1.180 daniel 6647: ctxt->disableSAX = 1;
1.31 daniel 6648: return(NULL);
6649: }
1.40 daniel 6650: NEXT;
1.42 daniel 6651: SKIP_BLANKS;
1.152 daniel 6652: if (RAW == '"') {
1.40 daniel 6653: NEXT;
6654: q = CUR_PTR;
1.29 daniel 6655: encoding = xmlParseEncName(ctxt);
1.152 daniel 6656: if (RAW != '"') {
1.230 veillard 6657: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6658: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6659: ctxt->sax->error(ctxt->userData,
6660: "String not closed\n%.50s\n", q);
1.59 daniel 6661: ctxt->wellFormed = 0;
1.180 daniel 6662: ctxt->disableSAX = 1;
1.55 daniel 6663: } else
1.40 daniel 6664: NEXT;
1.152 daniel 6665: } else if (RAW == '\''){
1.40 daniel 6666: NEXT;
6667: q = CUR_PTR;
1.29 daniel 6668: encoding = xmlParseEncName(ctxt);
1.152 daniel 6669: if (RAW != '\'') {
1.230 veillard 6670: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6671: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6672: ctxt->sax->error(ctxt->userData,
6673: "String not closed\n%.50s\n", q);
1.59 daniel 6674: ctxt->wellFormed = 0;
1.180 daniel 6675: ctxt->disableSAX = 1;
1.55 daniel 6676: } else
1.40 daniel 6677: NEXT;
1.152 daniel 6678: } else if (RAW == '"'){
1.230 veillard 6679: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6681: ctxt->sax->error(ctxt->userData,
1.59 daniel 6682: "xmlParseEncodingDecl : expected ' or \"\n");
6683: ctxt->wellFormed = 0;
1.180 daniel 6684: ctxt->disableSAX = 1;
1.29 daniel 6685: }
1.193 daniel 6686: if (encoding != NULL) {
6687: xmlCharEncoding enc;
6688: xmlCharEncodingHandlerPtr handler;
6689:
1.195 daniel 6690: if (ctxt->input->encoding != NULL)
6691: xmlFree((xmlChar *) ctxt->input->encoding);
6692: ctxt->input->encoding = encoding;
6693:
1.193 daniel 6694: enc = xmlParseCharEncoding((const char *) encoding);
6695: /*
6696: * registered set of known encodings
6697: */
6698: if (enc != XML_CHAR_ENCODING_ERROR) {
6699: xmlSwitchEncoding(ctxt, enc);
6700: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6701: xmlFree(encoding);
6702: return(NULL);
6703: }
6704: } else {
6705: /*
6706: * fallback for unknown encodings
6707: */
6708: handler = xmlFindCharEncodingHandler((const char *) encoding);
6709: if (handler != NULL) {
6710: xmlSwitchToEncoding(ctxt, handler);
6711: } else {
6712: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 6713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6714: ctxt->sax->error(ctxt->userData,
6715: "Unsupported encoding %s\n", encoding);
1.193 daniel 6716: return(NULL);
6717: }
6718: }
6719: }
1.29 daniel 6720: }
6721: return(encoding);
6722: }
6723:
1.50 daniel 6724: /**
6725: * xmlParseSDDecl:
6726: * @ctxt: an XML parser context
6727: *
6728: * parse the XML standalone declaration
1.29 daniel 6729: *
6730: * [32] SDDecl ::= S 'standalone' Eq
6731: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 6732: *
6733: * [ VC: Standalone Document Declaration ]
6734: * TODO The standalone document declaration must have the value "no"
6735: * if any external markup declarations contain declarations of:
6736: * - attributes with default values, if elements to which these
6737: * attributes apply appear in the document without specifications
6738: * of values for these attributes, or
6739: * - entities (other than amp, lt, gt, apos, quot), if references
6740: * to those entities appear in the document, or
6741: * - attributes with values subject to normalization, where the
6742: * attribute appears in the document with a value which will change
6743: * as a result of normalization, or
6744: * - element types with element content, if white space occurs directly
6745: * within any instance of those types.
1.68 daniel 6746: *
6747: * Returns 1 if standalone, 0 otherwise
1.29 daniel 6748: */
6749:
1.55 daniel 6750: int
6751: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 6752: int standalone = -1;
6753:
1.42 daniel 6754: SKIP_BLANKS;
1.152 daniel 6755: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 6756: (NXT(2) == 'a') && (NXT(3) == 'n') &&
6757: (NXT(4) == 'd') && (NXT(5) == 'a') &&
6758: (NXT(6) == 'l') && (NXT(7) == 'o') &&
6759: (NXT(8) == 'n') && (NXT(9) == 'e')) {
6760: SKIP(10);
1.81 daniel 6761: SKIP_BLANKS;
1.152 daniel 6762: if (RAW != '=') {
1.230 veillard 6763: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6764: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6765: ctxt->sax->error(ctxt->userData,
1.59 daniel 6766: "XML standalone declaration : expected '='\n");
6767: ctxt->wellFormed = 0;
1.180 daniel 6768: ctxt->disableSAX = 1;
1.32 daniel 6769: return(standalone);
6770: }
1.40 daniel 6771: NEXT;
1.42 daniel 6772: SKIP_BLANKS;
1.152 daniel 6773: if (RAW == '\''){
1.40 daniel 6774: NEXT;
1.152 daniel 6775: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6776: standalone = 0;
1.40 daniel 6777: SKIP(2);
1.152 daniel 6778: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6779: (NXT(2) == 's')) {
1.29 daniel 6780: standalone = 1;
1.40 daniel 6781: SKIP(3);
1.29 daniel 6782: } else {
1.230 veillard 6783: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6784: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6785: ctxt->sax->error(ctxt->userData,
6786: "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 6787: ctxt->wellFormed = 0;
1.180 daniel 6788: ctxt->disableSAX = 1;
1.29 daniel 6789: }
1.152 daniel 6790: if (RAW != '\'') {
1.230 veillard 6791: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6793: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6794: ctxt->wellFormed = 0;
1.180 daniel 6795: ctxt->disableSAX = 1;
1.55 daniel 6796: } else
1.40 daniel 6797: NEXT;
1.152 daniel 6798: } else if (RAW == '"'){
1.40 daniel 6799: NEXT;
1.152 daniel 6800: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6801: standalone = 0;
1.40 daniel 6802: SKIP(2);
1.152 daniel 6803: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6804: (NXT(2) == 's')) {
1.29 daniel 6805: standalone = 1;
1.40 daniel 6806: SKIP(3);
1.29 daniel 6807: } else {
1.230 veillard 6808: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6809: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6810: ctxt->sax->error(ctxt->userData,
1.59 daniel 6811: "standalone accepts only 'yes' or 'no'\n");
6812: ctxt->wellFormed = 0;
1.180 daniel 6813: ctxt->disableSAX = 1;
1.29 daniel 6814: }
1.152 daniel 6815: if (RAW != '"') {
1.230 veillard 6816: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6818: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6819: ctxt->wellFormed = 0;
1.180 daniel 6820: ctxt->disableSAX = 1;
1.55 daniel 6821: } else
1.40 daniel 6822: NEXT;
1.37 daniel 6823: } else {
1.230 veillard 6824: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6825: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6826: ctxt->sax->error(ctxt->userData,
6827: "Standalone value not found\n");
1.59 daniel 6828: ctxt->wellFormed = 0;
1.180 daniel 6829: ctxt->disableSAX = 1;
1.37 daniel 6830: }
1.29 daniel 6831: }
6832: return(standalone);
6833: }
6834:
1.50 daniel 6835: /**
6836: * xmlParseXMLDecl:
6837: * @ctxt: an XML parser context
6838: *
6839: * parse an XML declaration header
1.29 daniel 6840: *
6841: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 6842: */
6843:
1.55 daniel 6844: void
6845: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6846: xmlChar *version;
1.1 veillard 6847:
6848: /*
1.19 daniel 6849: * We know that '<?xml' is here.
1.1 veillard 6850: */
1.40 daniel 6851: SKIP(5);
1.1 veillard 6852:
1.153 daniel 6853: if (!IS_BLANK(RAW)) {
1.230 veillard 6854: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6855: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6856: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 6857: ctxt->wellFormed = 0;
1.180 daniel 6858: ctxt->disableSAX = 1;
1.59 daniel 6859: }
1.42 daniel 6860: SKIP_BLANKS;
1.1 veillard 6861:
6862: /*
1.29 daniel 6863: * We should have the VersionInfo here.
1.1 veillard 6864: */
1.29 daniel 6865: version = xmlParseVersionInfo(ctxt);
6866: if (version == NULL)
1.45 daniel 6867: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 6868: ctxt->version = xmlStrdup(version);
1.119 daniel 6869: xmlFree(version);
1.29 daniel 6870:
6871: /*
6872: * We may have the encoding declaration
6873: */
1.153 daniel 6874: if (!IS_BLANK(RAW)) {
1.152 daniel 6875: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6876: SKIP(2);
6877: return;
6878: }
1.230 veillard 6879: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6881: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6882: ctxt->wellFormed = 0;
1.180 daniel 6883: ctxt->disableSAX = 1;
1.59 daniel 6884: }
1.195 daniel 6885: xmlParseEncodingDecl(ctxt);
1.193 daniel 6886: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6887: /*
6888: * The XML REC instructs us to stop parsing right here
6889: */
6890: return;
6891: }
1.1 veillard 6892:
6893: /*
1.29 daniel 6894: * We may have the standalone status.
1.1 veillard 6895: */
1.164 daniel 6896: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 6897: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6898: SKIP(2);
6899: return;
6900: }
1.230 veillard 6901: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6903: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6904: ctxt->wellFormed = 0;
1.180 daniel 6905: ctxt->disableSAX = 1;
1.59 daniel 6906: }
6907: SKIP_BLANKS;
1.167 daniel 6908: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 6909:
1.42 daniel 6910: SKIP_BLANKS;
1.152 daniel 6911: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 6912: SKIP(2);
1.152 daniel 6913: } else if (RAW == '>') {
1.31 daniel 6914: /* Deprecated old WD ... */
1.230 veillard 6915: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6916: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6917: ctxt->sax->error(ctxt->userData,
6918: "XML declaration must end-up with '?>'\n");
1.59 daniel 6919: ctxt->wellFormed = 0;
1.180 daniel 6920: ctxt->disableSAX = 1;
1.40 daniel 6921: NEXT;
1.29 daniel 6922: } else {
1.230 veillard 6923: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6925: ctxt->sax->error(ctxt->userData,
6926: "parsing XML declaration: '?>' expected\n");
1.59 daniel 6927: ctxt->wellFormed = 0;
1.180 daniel 6928: ctxt->disableSAX = 1;
1.40 daniel 6929: MOVETO_ENDTAG(CUR_PTR);
6930: NEXT;
1.29 daniel 6931: }
1.1 veillard 6932: }
6933:
1.50 daniel 6934: /**
6935: * xmlParseMisc:
6936: * @ctxt: an XML parser context
6937: *
6938: * parse an XML Misc* optionnal field.
1.21 daniel 6939: *
1.22 daniel 6940: * [27] Misc ::= Comment | PI | S
1.1 veillard 6941: */
6942:
1.55 daniel 6943: void
6944: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 6945: while (((RAW == '<') && (NXT(1) == '?')) ||
6946: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6947: (NXT(2) == '-') && (NXT(3) == '-')) ||
6948: IS_BLANK(CUR)) {
1.152 daniel 6949: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 6950: xmlParsePI(ctxt);
1.40 daniel 6951: } else if (IS_BLANK(CUR)) {
6952: NEXT;
1.1 veillard 6953: } else
1.114 daniel 6954: xmlParseComment(ctxt);
1.1 veillard 6955: }
6956: }
6957:
1.50 daniel 6958: /**
1.181 daniel 6959: * xmlParseDocument:
1.50 daniel 6960: * @ctxt: an XML parser context
6961: *
6962: * parse an XML document (and build a tree if using the standard SAX
6963: * interface).
1.21 daniel 6964: *
1.22 daniel 6965: * [1] document ::= prolog element Misc*
1.29 daniel 6966: *
6967: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 6968: *
1.68 daniel 6969: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 6970: * as a result of the parsing.
1.1 veillard 6971: */
6972:
1.55 daniel 6973: int
6974: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 6975: xmlChar start[4];
6976: xmlCharEncoding enc;
6977:
1.235 veillard 6978: xmlInitParser();
1.45 daniel 6979:
1.91 daniel 6980: GROW;
6981:
1.14 veillard 6982: /*
1.44 daniel 6983: * SAX: beginning of the document processing.
6984: */
1.72 daniel 6985: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 6986: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 6987:
1.156 daniel 6988: /*
6989: * Get the 4 first bytes and decode the charset
6990: * if enc != XML_CHAR_ENCODING_NONE
6991: * plug some encoding conversion routines.
6992: */
6993: start[0] = RAW;
6994: start[1] = NXT(1);
6995: start[2] = NXT(2);
6996: start[3] = NXT(3);
6997: enc = xmlDetectCharEncoding(start, 4);
6998: if (enc != XML_CHAR_ENCODING_NONE) {
6999: xmlSwitchEncoding(ctxt, enc);
7000: }
7001:
1.1 veillard 7002:
1.59 daniel 7003: if (CUR == 0) {
1.230 veillard 7004: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7005: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7006: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 7007: ctxt->wellFormed = 0;
1.180 daniel 7008: ctxt->disableSAX = 1;
1.59 daniel 7009: }
1.1 veillard 7010:
7011: /*
7012: * Check for the XMLDecl in the Prolog.
7013: */
1.91 daniel 7014: GROW;
1.152 daniel 7015: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 7016: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 7017: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 7018:
7019: /*
7020: * Note that we will switch encoding on the fly.
7021: */
1.19 daniel 7022: xmlParseXMLDecl(ctxt);
1.193 daniel 7023: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7024: /*
7025: * The XML REC instructs us to stop parsing right here
7026: */
7027: return(-1);
7028: }
1.167 daniel 7029: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 7030: SKIP_BLANKS;
1.1 veillard 7031: } else {
1.72 daniel 7032: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 7033: }
1.171 daniel 7034: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 7035: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7036:
7037: /*
7038: * The Misc part of the Prolog
7039: */
1.91 daniel 7040: GROW;
1.16 daniel 7041: xmlParseMisc(ctxt);
1.1 veillard 7042:
7043: /*
1.29 daniel 7044: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7045: * (doctypedecl Misc*)?
7046: */
1.91 daniel 7047: GROW;
1.152 daniel 7048: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7049: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7050: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7051: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7052: (NXT(8) == 'E')) {
1.165 daniel 7053:
1.166 daniel 7054: ctxt->inSubset = 1;
1.22 daniel 7055: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7056: if (RAW == '[') {
1.140 daniel 7057: ctxt->instate = XML_PARSER_DTD;
7058: xmlParseInternalSubset(ctxt);
7059: }
1.165 daniel 7060:
7061: /*
7062: * Create and update the external subset.
7063: */
1.166 daniel 7064: ctxt->inSubset = 2;
1.171 daniel 7065: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7066: (!ctxt->disableSAX))
1.165 daniel 7067: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7068: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7069: ctxt->inSubset = 0;
1.165 daniel 7070:
7071:
1.96 daniel 7072: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7073: xmlParseMisc(ctxt);
1.21 daniel 7074: }
7075:
7076: /*
7077: * Time to start parsing the tree itself
1.1 veillard 7078: */
1.91 daniel 7079: GROW;
1.152 daniel 7080: if (RAW != '<') {
1.230 veillard 7081: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7083: ctxt->sax->error(ctxt->userData,
1.151 daniel 7084: "Start tag expected, '<' not found\n");
1.59 daniel 7085: ctxt->wellFormed = 0;
1.180 daniel 7086: ctxt->disableSAX = 1;
1.140 daniel 7087: ctxt->instate = XML_PARSER_EOF;
7088: } else {
7089: ctxt->instate = XML_PARSER_CONTENT;
7090: xmlParseElement(ctxt);
7091: ctxt->instate = XML_PARSER_EPILOG;
7092:
7093:
7094: /*
7095: * The Misc part at the end
7096: */
7097: xmlParseMisc(ctxt);
7098:
1.152 daniel 7099: if (RAW != 0) {
1.230 veillard 7100: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7102: ctxt->sax->error(ctxt->userData,
7103: "Extra content at the end of the document\n");
7104: ctxt->wellFormed = 0;
1.180 daniel 7105: ctxt->disableSAX = 1;
1.140 daniel 7106: }
7107: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7108: }
7109:
1.44 daniel 7110: /*
7111: * SAX: end of the document processing.
7112: */
1.171 daniel 7113: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7114: (!ctxt->disableSAX))
1.74 daniel 7115: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7116:
1.59 daniel 7117: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7118: return(0);
7119: }
7120:
1.229 veillard 7121: /**
7122: * xmlParseExtParsedEnt:
7123: * @ctxt: an XML parser context
7124: *
7125: * parse a genreral parsed entity
7126: * An external general parsed entity is well-formed if it matches the
7127: * production labeled extParsedEnt.
7128: *
7129: * [78] extParsedEnt ::= TextDecl? content
7130: *
7131: * Returns 0, -1 in case of error. the parser context is augmented
7132: * as a result of the parsing.
7133: */
7134:
7135: int
7136: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7137: xmlChar start[4];
7138: xmlCharEncoding enc;
7139:
7140: xmlDefaultSAXHandlerInit();
7141:
7142: GROW;
7143:
7144: /*
7145: * SAX: beginning of the document processing.
7146: */
7147: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7148: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7149:
7150: /*
7151: * Get the 4 first bytes and decode the charset
7152: * if enc != XML_CHAR_ENCODING_NONE
7153: * plug some encoding conversion routines.
7154: */
7155: start[0] = RAW;
7156: start[1] = NXT(1);
7157: start[2] = NXT(2);
7158: start[3] = NXT(3);
7159: enc = xmlDetectCharEncoding(start, 4);
7160: if (enc != XML_CHAR_ENCODING_NONE) {
7161: xmlSwitchEncoding(ctxt, enc);
7162: }
7163:
7164:
7165: if (CUR == 0) {
1.230 veillard 7166: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.229 veillard 7167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7168: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7169: ctxt->wellFormed = 0;
7170: ctxt->disableSAX = 1;
7171: }
7172:
7173: /*
7174: * Check for the XMLDecl in the Prolog.
7175: */
7176: GROW;
7177: if ((RAW == '<') && (NXT(1) == '?') &&
7178: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7179: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7180:
7181: /*
7182: * Note that we will switch encoding on the fly.
7183: */
7184: xmlParseXMLDecl(ctxt);
7185: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7186: /*
7187: * The XML REC instructs us to stop parsing right here
7188: */
7189: return(-1);
7190: }
7191: SKIP_BLANKS;
7192: } else {
7193: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7194: }
7195: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7196: ctxt->sax->startDocument(ctxt->userData);
7197:
7198: /*
7199: * Doing validity checking on chunk doesn't make sense
7200: */
7201: ctxt->instate = XML_PARSER_CONTENT;
7202: ctxt->validate = 0;
7203: ctxt->depth = 0;
7204:
7205: xmlParseContent(ctxt);
7206:
7207: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 7208: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.229 veillard 7209: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7210: ctxt->sax->error(ctxt->userData,
7211: "chunk is not well balanced\n");
7212: ctxt->wellFormed = 0;
7213: ctxt->disableSAX = 1;
7214: } else if (RAW != 0) {
1.230 veillard 7215: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.229 veillard 7216: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7217: ctxt->sax->error(ctxt->userData,
7218: "extra content at the end of well balanced chunk\n");
7219: ctxt->wellFormed = 0;
7220: ctxt->disableSAX = 1;
7221: }
7222:
7223: /*
7224: * SAX: end of the document processing.
7225: */
7226: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7227: (!ctxt->disableSAX))
7228: ctxt->sax->endDocument(ctxt->userData);
7229:
7230: if (! ctxt->wellFormed) return(-1);
7231: return(0);
7232: }
7233:
1.98 daniel 7234: /************************************************************************
7235: * *
1.128 daniel 7236: * Progressive parsing interfaces *
7237: * *
7238: ************************************************************************/
7239:
7240: /**
7241: * xmlParseLookupSequence:
7242: * @ctxt: an XML parser context
7243: * @first: the first char to lookup
1.140 daniel 7244: * @next: the next char to lookup or zero
7245: * @third: the next char to lookup or zero
1.128 daniel 7246: *
1.140 daniel 7247: * Try to find if a sequence (first, next, third) or just (first next) or
7248: * (first) is available in the input stream.
7249: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7250: * to avoid rescanning sequences of bytes, it DOES change the state of the
7251: * parser, do not use liberally.
1.128 daniel 7252: *
1.140 daniel 7253: * Returns the index to the current parsing point if the full sequence
7254: * is available, -1 otherwise.
1.128 daniel 7255: */
7256: int
1.140 daniel 7257: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7258: xmlChar next, xmlChar third) {
7259: int base, len;
7260: xmlParserInputPtr in;
7261: const xmlChar *buf;
7262:
7263: in = ctxt->input;
7264: if (in == NULL) return(-1);
7265: base = in->cur - in->base;
7266: if (base < 0) return(-1);
7267: if (ctxt->checkIndex > base)
7268: base = ctxt->checkIndex;
7269: if (in->buf == NULL) {
7270: buf = in->base;
7271: len = in->length;
7272: } else {
7273: buf = in->buf->buffer->content;
7274: len = in->buf->buffer->use;
7275: }
7276: /* take into account the sequence length */
7277: if (third) len -= 2;
7278: else if (next) len --;
7279: for (;base < len;base++) {
7280: if (buf[base] == first) {
7281: if (third != 0) {
7282: if ((buf[base + 1] != next) ||
7283: (buf[base + 2] != third)) continue;
7284: } else if (next != 0) {
7285: if (buf[base + 1] != next) continue;
7286: }
7287: ctxt->checkIndex = 0;
7288: #ifdef DEBUG_PUSH
7289: if (next == 0)
1.241 veillard 7290: xmlGenericError(xmlGenericErrorContext,
7291: "PP: lookup '%c' found at %d\n",
1.140 daniel 7292: first, base);
7293: else if (third == 0)
1.241 veillard 7294: xmlGenericError(xmlGenericErrorContext,
7295: "PP: lookup '%c%c' found at %d\n",
1.140 daniel 7296: first, next, base);
7297: else
1.241 veillard 7298: xmlGenericError(xmlGenericErrorContext,
7299: "PP: lookup '%c%c%c' found at %d\n",
1.140 daniel 7300: first, next, third, base);
7301: #endif
7302: return(base - (in->cur - in->base));
7303: }
7304: }
7305: ctxt->checkIndex = base;
7306: #ifdef DEBUG_PUSH
7307: if (next == 0)
1.241 veillard 7308: xmlGenericError(xmlGenericErrorContext,
7309: "PP: lookup '%c' failed\n", first);
1.140 daniel 7310: else if (third == 0)
1.241 veillard 7311: xmlGenericError(xmlGenericErrorContext,
7312: "PP: lookup '%c%c' failed\n", first, next);
1.140 daniel 7313: else
1.241 veillard 7314: xmlGenericError(xmlGenericErrorContext,
7315: "PP: lookup '%c%c%c' failed\n", first, next, third);
1.140 daniel 7316: #endif
7317: return(-1);
1.128 daniel 7318: }
7319:
7320: /**
1.143 daniel 7321: * xmlParseTryOrFinish:
1.128 daniel 7322: * @ctxt: an XML parser context
1.143 daniel 7323: * @terminate: last chunk indicator
1.128 daniel 7324: *
7325: * Try to progress on parsing
7326: *
7327: * Returns zero if no parsing was possible
7328: */
7329: int
1.143 daniel 7330: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7331: int ret = 0;
1.140 daniel 7332: int avail;
7333: xmlChar cur, next;
7334:
7335: #ifdef DEBUG_PUSH
7336: switch (ctxt->instate) {
7337: case XML_PARSER_EOF:
1.241 veillard 7338: xmlGenericError(xmlGenericErrorContext,
7339: "PP: try EOF\n"); break;
1.140 daniel 7340: case XML_PARSER_START:
1.241 veillard 7341: xmlGenericError(xmlGenericErrorContext,
7342: "PP: try START\n"); break;
1.140 daniel 7343: case XML_PARSER_MISC:
1.241 veillard 7344: xmlGenericError(xmlGenericErrorContext,
7345: "PP: try MISC\n");break;
1.140 daniel 7346: case XML_PARSER_COMMENT:
1.241 veillard 7347: xmlGenericError(xmlGenericErrorContext,
7348: "PP: try COMMENT\n");break;
1.140 daniel 7349: case XML_PARSER_PROLOG:
1.241 veillard 7350: xmlGenericError(xmlGenericErrorContext,
7351: "PP: try PROLOG\n");break;
1.140 daniel 7352: case XML_PARSER_START_TAG:
1.241 veillard 7353: xmlGenericError(xmlGenericErrorContext,
7354: "PP: try START_TAG\n");break;
1.140 daniel 7355: case XML_PARSER_CONTENT:
1.241 veillard 7356: xmlGenericError(xmlGenericErrorContext,
7357: "PP: try CONTENT\n");break;
1.140 daniel 7358: case XML_PARSER_CDATA_SECTION:
1.241 veillard 7359: xmlGenericError(xmlGenericErrorContext,
7360: "PP: try CDATA_SECTION\n");break;
1.140 daniel 7361: case XML_PARSER_END_TAG:
1.241 veillard 7362: xmlGenericError(xmlGenericErrorContext,
7363: "PP: try END_TAG\n");break;
1.140 daniel 7364: case XML_PARSER_ENTITY_DECL:
1.241 veillard 7365: xmlGenericError(xmlGenericErrorContext,
7366: "PP: try ENTITY_DECL\n");break;
1.140 daniel 7367: case XML_PARSER_ENTITY_VALUE:
1.241 veillard 7368: xmlGenericError(xmlGenericErrorContext,
7369: "PP: try ENTITY_VALUE\n");break;
1.140 daniel 7370: case XML_PARSER_ATTRIBUTE_VALUE:
1.241 veillard 7371: xmlGenericError(xmlGenericErrorContext,
7372: "PP: try ATTRIBUTE_VALUE\n");break;
1.140 daniel 7373: case XML_PARSER_DTD:
1.241 veillard 7374: xmlGenericError(xmlGenericErrorContext,
7375: "PP: try DTD\n");break;
1.140 daniel 7376: case XML_PARSER_EPILOG:
1.241 veillard 7377: xmlGenericError(xmlGenericErrorContext,
7378: "PP: try EPILOG\n");break;
1.140 daniel 7379: case XML_PARSER_PI:
1.241 veillard 7380: xmlGenericError(xmlGenericErrorContext,
7381: "PP: try PI\n");break;
1.140 daniel 7382: }
7383: #endif
1.128 daniel 7384:
7385: while (1) {
1.140 daniel 7386: /*
7387: * Pop-up of finished entities.
7388: */
1.152 daniel 7389: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7390: xmlPopInput(ctxt);
7391:
1.184 daniel 7392: if (ctxt->input ==NULL) break;
7393: if (ctxt->input->buf == NULL)
7394: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7395: else
1.184 daniel 7396: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7397: if (avail < 1)
7398: goto done;
1.128 daniel 7399: switch (ctxt->instate) {
7400: case XML_PARSER_EOF:
1.140 daniel 7401: /*
7402: * Document parsing is done !
7403: */
7404: goto done;
7405: case XML_PARSER_START:
7406: /*
7407: * Very first chars read from the document flow.
7408: */
1.184 daniel 7409: cur = ctxt->input->cur[0];
1.140 daniel 7410: if (IS_BLANK(cur)) {
7411: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7412: ctxt->sax->setDocumentLocator(ctxt->userData,
7413: &xmlDefaultSAXLocator);
1.230 veillard 7414: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.140 daniel 7415: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7416: ctxt->sax->error(ctxt->userData,
7417: "Extra spaces at the beginning of the document are not allowed\n");
7418: ctxt->wellFormed = 0;
1.180 daniel 7419: ctxt->disableSAX = 1;
1.140 daniel 7420: SKIP_BLANKS;
7421: ret++;
1.184 daniel 7422: if (ctxt->input->buf == NULL)
7423: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7424: else
1.184 daniel 7425: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7426: }
7427: if (avail < 2)
7428: goto done;
7429:
1.184 daniel 7430: cur = ctxt->input->cur[0];
7431: next = ctxt->input->cur[1];
1.140 daniel 7432: if (cur == 0) {
7433: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7434: ctxt->sax->setDocumentLocator(ctxt->userData,
7435: &xmlDefaultSAXLocator);
1.230 veillard 7436: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7438: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7439: ctxt->wellFormed = 0;
1.180 daniel 7440: ctxt->disableSAX = 1;
1.140 daniel 7441: ctxt->instate = XML_PARSER_EOF;
7442: #ifdef DEBUG_PUSH
1.241 veillard 7443: xmlGenericError(xmlGenericErrorContext,
7444: "PP: entering EOF\n");
1.140 daniel 7445: #endif
7446: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7447: ctxt->sax->endDocument(ctxt->userData);
7448: goto done;
7449: }
7450: if ((cur == '<') && (next == '?')) {
7451: /* PI or XML decl */
7452: if (avail < 5) return(ret);
1.143 daniel 7453: if ((!terminate) &&
7454: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7455: return(ret);
7456: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7457: ctxt->sax->setDocumentLocator(ctxt->userData,
7458: &xmlDefaultSAXLocator);
1.184 daniel 7459: if ((ctxt->input->cur[2] == 'x') &&
7460: (ctxt->input->cur[3] == 'm') &&
7461: (ctxt->input->cur[4] == 'l') &&
7462: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 7463: ret += 5;
7464: #ifdef DEBUG_PUSH
1.241 veillard 7465: xmlGenericError(xmlGenericErrorContext,
7466: "PP: Parsing XML Decl\n");
1.140 daniel 7467: #endif
7468: xmlParseXMLDecl(ctxt);
1.193 daniel 7469: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7470: /*
7471: * The XML REC instructs us to stop parsing right
7472: * here
7473: */
7474: ctxt->instate = XML_PARSER_EOF;
7475: return(0);
7476: }
1.167 daniel 7477: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7478: if ((ctxt->encoding == NULL) &&
7479: (ctxt->input->encoding != NULL))
7480: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 7481: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7482: (!ctxt->disableSAX))
1.140 daniel 7483: ctxt->sax->startDocument(ctxt->userData);
7484: ctxt->instate = XML_PARSER_MISC;
7485: #ifdef DEBUG_PUSH
1.241 veillard 7486: xmlGenericError(xmlGenericErrorContext,
7487: "PP: entering MISC\n");
1.140 daniel 7488: #endif
7489: } else {
7490: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7491: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7492: (!ctxt->disableSAX))
1.140 daniel 7493: ctxt->sax->startDocument(ctxt->userData);
7494: ctxt->instate = XML_PARSER_MISC;
7495: #ifdef DEBUG_PUSH
1.241 veillard 7496: xmlGenericError(xmlGenericErrorContext,
7497: "PP: entering MISC\n");
1.140 daniel 7498: #endif
7499: }
7500: } else {
7501: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7502: ctxt->sax->setDocumentLocator(ctxt->userData,
7503: &xmlDefaultSAXLocator);
7504: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7505: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7506: (!ctxt->disableSAX))
1.140 daniel 7507: ctxt->sax->startDocument(ctxt->userData);
7508: ctxt->instate = XML_PARSER_MISC;
7509: #ifdef DEBUG_PUSH
1.241 veillard 7510: xmlGenericError(xmlGenericErrorContext,
7511: "PP: entering MISC\n");
1.140 daniel 7512: #endif
7513: }
7514: break;
7515: case XML_PARSER_MISC:
7516: SKIP_BLANKS;
1.184 daniel 7517: if (ctxt->input->buf == NULL)
7518: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7519: else
1.184 daniel 7520: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7521: if (avail < 2)
7522: goto done;
1.184 daniel 7523: cur = ctxt->input->cur[0];
7524: next = ctxt->input->cur[1];
1.140 daniel 7525: if ((cur == '<') && (next == '?')) {
1.143 daniel 7526: if ((!terminate) &&
7527: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7528: goto done;
7529: #ifdef DEBUG_PUSH
1.241 veillard 7530: xmlGenericError(xmlGenericErrorContext,
7531: "PP: Parsing PI\n");
1.140 daniel 7532: #endif
7533: xmlParsePI(ctxt);
7534: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7535: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7536: if ((!terminate) &&
7537: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7538: goto done;
7539: #ifdef DEBUG_PUSH
1.241 veillard 7540: xmlGenericError(xmlGenericErrorContext,
7541: "PP: Parsing Comment\n");
1.140 daniel 7542: #endif
7543: xmlParseComment(ctxt);
7544: ctxt->instate = XML_PARSER_MISC;
7545: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7546: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7547: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7548: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7549: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 7550: if ((!terminate) &&
7551: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7552: goto done;
7553: #ifdef DEBUG_PUSH
1.241 veillard 7554: xmlGenericError(xmlGenericErrorContext,
7555: "PP: Parsing internal subset\n");
1.140 daniel 7556: #endif
1.166 daniel 7557: ctxt->inSubset = 1;
1.140 daniel 7558: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7559: if (RAW == '[') {
1.140 daniel 7560: ctxt->instate = XML_PARSER_DTD;
7561: #ifdef DEBUG_PUSH
1.241 veillard 7562: xmlGenericError(xmlGenericErrorContext,
7563: "PP: entering DTD\n");
1.140 daniel 7564: #endif
7565: } else {
1.166 daniel 7566: /*
7567: * Create and update the external subset.
7568: */
7569: ctxt->inSubset = 2;
1.171 daniel 7570: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7571: (ctxt->sax->externalSubset != NULL))
7572: ctxt->sax->externalSubset(ctxt->userData,
7573: ctxt->intSubName, ctxt->extSubSystem,
7574: ctxt->extSubURI);
7575: ctxt->inSubset = 0;
1.140 daniel 7576: ctxt->instate = XML_PARSER_PROLOG;
7577: #ifdef DEBUG_PUSH
1.241 veillard 7578: xmlGenericError(xmlGenericErrorContext,
7579: "PP: entering PROLOG\n");
1.140 daniel 7580: #endif
7581: }
7582: } else if ((cur == '<') && (next == '!') &&
7583: (avail < 9)) {
7584: goto done;
7585: } else {
7586: ctxt->instate = XML_PARSER_START_TAG;
7587: #ifdef DEBUG_PUSH
1.241 veillard 7588: xmlGenericError(xmlGenericErrorContext,
7589: "PP: entering START_TAG\n");
1.140 daniel 7590: #endif
7591: }
7592: break;
1.128 daniel 7593: case XML_PARSER_PROLOG:
1.140 daniel 7594: SKIP_BLANKS;
1.184 daniel 7595: if (ctxt->input->buf == NULL)
7596: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7597: else
1.184 daniel 7598: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7599: if (avail < 2)
7600: goto done;
1.184 daniel 7601: cur = ctxt->input->cur[0];
7602: next = ctxt->input->cur[1];
1.140 daniel 7603: if ((cur == '<') && (next == '?')) {
1.143 daniel 7604: if ((!terminate) &&
7605: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7606: goto done;
7607: #ifdef DEBUG_PUSH
1.241 veillard 7608: xmlGenericError(xmlGenericErrorContext,
7609: "PP: Parsing PI\n");
1.140 daniel 7610: #endif
7611: xmlParsePI(ctxt);
7612: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7613: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7614: if ((!terminate) &&
7615: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7616: goto done;
7617: #ifdef DEBUG_PUSH
1.241 veillard 7618: xmlGenericError(xmlGenericErrorContext,
7619: "PP: Parsing Comment\n");
1.140 daniel 7620: #endif
7621: xmlParseComment(ctxt);
7622: ctxt->instate = XML_PARSER_PROLOG;
7623: } else if ((cur == '<') && (next == '!') &&
7624: (avail < 4)) {
7625: goto done;
7626: } else {
7627: ctxt->instate = XML_PARSER_START_TAG;
7628: #ifdef DEBUG_PUSH
1.241 veillard 7629: xmlGenericError(xmlGenericErrorContext,
7630: "PP: entering START_TAG\n");
1.140 daniel 7631: #endif
7632: }
7633: break;
7634: case XML_PARSER_EPILOG:
7635: SKIP_BLANKS;
1.184 daniel 7636: if (ctxt->input->buf == NULL)
7637: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7638: else
1.184 daniel 7639: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7640: if (avail < 2)
7641: goto done;
1.184 daniel 7642: cur = ctxt->input->cur[0];
7643: next = ctxt->input->cur[1];
1.140 daniel 7644: if ((cur == '<') && (next == '?')) {
1.143 daniel 7645: if ((!terminate) &&
7646: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7647: goto done;
7648: #ifdef DEBUG_PUSH
1.241 veillard 7649: xmlGenericError(xmlGenericErrorContext,
7650: "PP: Parsing PI\n");
1.140 daniel 7651: #endif
7652: xmlParsePI(ctxt);
7653: ctxt->instate = XML_PARSER_EPILOG;
7654: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7655: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7656: if ((!terminate) &&
7657: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7658: goto done;
7659: #ifdef DEBUG_PUSH
1.241 veillard 7660: xmlGenericError(xmlGenericErrorContext,
7661: "PP: Parsing Comment\n");
1.140 daniel 7662: #endif
7663: xmlParseComment(ctxt);
7664: ctxt->instate = XML_PARSER_EPILOG;
7665: } else if ((cur == '<') && (next == '!') &&
7666: (avail < 4)) {
7667: goto done;
7668: } else {
1.230 veillard 7669: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7671: ctxt->sax->error(ctxt->userData,
7672: "Extra content at the end of the document\n");
7673: ctxt->wellFormed = 0;
1.180 daniel 7674: ctxt->disableSAX = 1;
1.140 daniel 7675: ctxt->instate = XML_PARSER_EOF;
7676: #ifdef DEBUG_PUSH
1.241 veillard 7677: xmlGenericError(xmlGenericErrorContext,
7678: "PP: entering EOF\n");
1.140 daniel 7679: #endif
1.171 daniel 7680: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7681: (!ctxt->disableSAX))
1.140 daniel 7682: ctxt->sax->endDocument(ctxt->userData);
7683: goto done;
7684: }
7685: break;
7686: case XML_PARSER_START_TAG: {
7687: xmlChar *name, *oldname;
7688:
1.184 daniel 7689: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7690: goto done;
1.184 daniel 7691: cur = ctxt->input->cur[0];
1.140 daniel 7692: if (cur != '<') {
1.230 veillard 7693: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7694: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7695: ctxt->sax->error(ctxt->userData,
7696: "Start tag expect, '<' not found\n");
7697: ctxt->wellFormed = 0;
1.180 daniel 7698: ctxt->disableSAX = 1;
1.140 daniel 7699: ctxt->instate = XML_PARSER_EOF;
7700: #ifdef DEBUG_PUSH
1.241 veillard 7701: xmlGenericError(xmlGenericErrorContext,
7702: "PP: entering EOF\n");
1.140 daniel 7703: #endif
1.171 daniel 7704: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7705: (!ctxt->disableSAX))
1.140 daniel 7706: ctxt->sax->endDocument(ctxt->userData);
7707: goto done;
7708: }
1.143 daniel 7709: if ((!terminate) &&
7710: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7711: goto done;
1.176 daniel 7712: if (ctxt->spaceNr == 0)
7713: spacePush(ctxt, -1);
7714: else
7715: spacePush(ctxt, *ctxt->space);
1.140 daniel 7716: name = xmlParseStartTag(ctxt);
7717: if (name == NULL) {
1.176 daniel 7718: spacePop(ctxt);
1.140 daniel 7719: ctxt->instate = XML_PARSER_EOF;
7720: #ifdef DEBUG_PUSH
1.241 veillard 7721: xmlGenericError(xmlGenericErrorContext,
7722: "PP: entering EOF\n");
1.140 daniel 7723: #endif
1.171 daniel 7724: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7725: (!ctxt->disableSAX))
1.140 daniel 7726: ctxt->sax->endDocument(ctxt->userData);
7727: goto done;
7728: }
7729: namePush(ctxt, xmlStrdup(name));
7730:
7731: /*
7732: * [ VC: Root Element Type ]
7733: * The Name in the document type declaration must match
7734: * the element type of the root element.
7735: */
7736: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7737: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7738: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7739:
7740: /*
7741: * Check for an Empty Element.
7742: */
1.152 daniel 7743: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7744: SKIP(2);
1.171 daniel 7745: if ((ctxt->sax != NULL) &&
7746: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 7747: ctxt->sax->endElement(ctxt->userData, name);
7748: xmlFree(name);
7749: oldname = namePop(ctxt);
1.176 daniel 7750: spacePop(ctxt);
1.140 daniel 7751: if (oldname != NULL) {
7752: #ifdef DEBUG_STACK
1.241 veillard 7753: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 7754: #endif
7755: xmlFree(oldname);
7756: }
7757: if (ctxt->name == NULL) {
7758: ctxt->instate = XML_PARSER_EPILOG;
7759: #ifdef DEBUG_PUSH
1.241 veillard 7760: xmlGenericError(xmlGenericErrorContext,
7761: "PP: entering EPILOG\n");
1.140 daniel 7762: #endif
7763: } else {
7764: ctxt->instate = XML_PARSER_CONTENT;
7765: #ifdef DEBUG_PUSH
1.241 veillard 7766: xmlGenericError(xmlGenericErrorContext,
7767: "PP: entering CONTENT\n");
1.140 daniel 7768: #endif
7769: }
7770: break;
7771: }
1.152 daniel 7772: if (RAW == '>') {
1.140 daniel 7773: NEXT;
7774: } else {
1.230 veillard 7775: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.140 daniel 7776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7777: ctxt->sax->error(ctxt->userData,
7778: "Couldn't find end of Start Tag %s\n",
7779: name);
7780: ctxt->wellFormed = 0;
1.180 daniel 7781: ctxt->disableSAX = 1;
1.140 daniel 7782:
7783: /*
7784: * end of parsing of this node.
7785: */
7786: nodePop(ctxt);
7787: oldname = namePop(ctxt);
1.176 daniel 7788: spacePop(ctxt);
1.140 daniel 7789: if (oldname != NULL) {
7790: #ifdef DEBUG_STACK
1.241 veillard 7791: xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
1.140 daniel 7792: #endif
7793: xmlFree(oldname);
7794: }
7795: }
7796: xmlFree(name);
7797: ctxt->instate = XML_PARSER_CONTENT;
7798: #ifdef DEBUG_PUSH
1.241 veillard 7799: xmlGenericError(xmlGenericErrorContext,
7800: "PP: entering CONTENT\n");
1.140 daniel 7801: #endif
7802: break;
7803: }
1.224 veillard 7804: case XML_PARSER_CONTENT: {
7805: const xmlChar *test;
7806: int cons;
7807: xmlChar tok;
7808:
1.140 daniel 7809: /*
7810: * Handle preparsed entities and charRef
7811: */
7812: if (ctxt->token != 0) {
7813: xmlChar cur[2] = { 0 , 0 } ;
7814:
7815: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 7816: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7817: (ctxt->sax->characters != NULL))
1.140 daniel 7818: ctxt->sax->characters(ctxt->userData, cur, 1);
7819: ctxt->token = 0;
7820: }
1.184 daniel 7821: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7822: goto done;
1.184 daniel 7823: cur = ctxt->input->cur[0];
7824: next = ctxt->input->cur[1];
1.224 veillard 7825:
7826: test = CUR_PTR;
7827: cons = ctxt->input->consumed;
7828: tok = ctxt->token;
1.140 daniel 7829: if ((cur == '<') && (next == '?')) {
1.143 daniel 7830: if ((!terminate) &&
7831: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7832: goto done;
7833: #ifdef DEBUG_PUSH
1.241 veillard 7834: xmlGenericError(xmlGenericErrorContext,
7835: "PP: Parsing PI\n");
1.140 daniel 7836: #endif
7837: xmlParsePI(ctxt);
7838: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7839: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7840: if ((!terminate) &&
7841: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7842: goto done;
7843: #ifdef DEBUG_PUSH
1.241 veillard 7844: xmlGenericError(xmlGenericErrorContext,
7845: "PP: Parsing Comment\n");
1.140 daniel 7846: #endif
7847: xmlParseComment(ctxt);
7848: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 7849: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7850: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7851: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7852: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7853: (ctxt->input->cur[8] == '[')) {
1.140 daniel 7854: SKIP(9);
7855: ctxt->instate = XML_PARSER_CDATA_SECTION;
7856: #ifdef DEBUG_PUSH
1.241 veillard 7857: xmlGenericError(xmlGenericErrorContext,
7858: "PP: entering CDATA_SECTION\n");
1.140 daniel 7859: #endif
7860: break;
7861: } else if ((cur == '<') && (next == '!') &&
7862: (avail < 9)) {
7863: goto done;
7864: } else if ((cur == '<') && (next == '/')) {
7865: ctxt->instate = XML_PARSER_END_TAG;
7866: #ifdef DEBUG_PUSH
1.241 veillard 7867: xmlGenericError(xmlGenericErrorContext,
7868: "PP: entering END_TAG\n");
1.140 daniel 7869: #endif
7870: break;
7871: } else if (cur == '<') {
7872: ctxt->instate = XML_PARSER_START_TAG;
7873: #ifdef DEBUG_PUSH
1.241 veillard 7874: xmlGenericError(xmlGenericErrorContext,
7875: "PP: entering START_TAG\n");
1.140 daniel 7876: #endif
7877: break;
7878: } else if (cur == '&') {
1.143 daniel 7879: if ((!terminate) &&
7880: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 7881: goto done;
7882: #ifdef DEBUG_PUSH
1.241 veillard 7883: xmlGenericError(xmlGenericErrorContext,
7884: "PP: Parsing Reference\n");
1.140 daniel 7885: #endif
7886: xmlParseReference(ctxt);
7887: } else {
1.156 daniel 7888: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 7889: /*
1.181 daniel 7890: * Goal of the following test is:
1.140 daniel 7891: * - minimize calls to the SAX 'character' callback
7892: * when they are mergeable
7893: * - handle an problem for isBlank when we only parse
7894: * a sequence of blank chars and the next one is
7895: * not available to check against '<' presence.
7896: * - tries to homogenize the differences in SAX
7897: * callbacks beween the push and pull versions
7898: * of the parser.
7899: */
7900: if ((ctxt->inputNr == 1) &&
7901: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 7902: if ((!terminate) &&
7903: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 7904: goto done;
7905: }
7906: ctxt->checkIndex = 0;
7907: #ifdef DEBUG_PUSH
1.241 veillard 7908: xmlGenericError(xmlGenericErrorContext,
7909: "PP: Parsing char data\n");
1.140 daniel 7910: #endif
7911: xmlParseCharData(ctxt, 0);
7912: }
7913: /*
7914: * Pop-up of finished entities.
7915: */
1.152 daniel 7916: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7917: xmlPopInput(ctxt);
1.224 veillard 7918: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7919: (tok == ctxt->token)) {
1.230 veillard 7920: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.224 veillard 7921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7922: ctxt->sax->error(ctxt->userData,
7923: "detected an error in element content\n");
7924: ctxt->wellFormed = 0;
7925: ctxt->disableSAX = 1;
7926: ctxt->instate = XML_PARSER_EOF;
7927: break;
7928: }
1.140 daniel 7929: break;
1.224 veillard 7930: }
1.140 daniel 7931: case XML_PARSER_CDATA_SECTION: {
7932: /*
7933: * The Push mode need to have the SAX callback for
7934: * cdataBlock merge back contiguous callbacks.
7935: */
7936: int base;
7937:
7938: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7939: if (base < 0) {
7940: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 7941: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 7942: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 7943: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 7944: XML_PARSER_BIG_BUFFER_SIZE);
7945: }
7946: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7947: ctxt->checkIndex = 0;
7948: }
7949: goto done;
7950: } else {
1.171 daniel 7951: if ((ctxt->sax != NULL) && (base > 0) &&
7952: (!ctxt->disableSAX)) {
1.140 daniel 7953: if (ctxt->sax->cdataBlock != NULL)
7954: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 7955: ctxt->input->cur, base);
1.140 daniel 7956: }
7957: SKIP(base + 3);
7958: ctxt->checkIndex = 0;
7959: ctxt->instate = XML_PARSER_CONTENT;
7960: #ifdef DEBUG_PUSH
1.241 veillard 7961: xmlGenericError(xmlGenericErrorContext,
7962: "PP: entering CONTENT\n");
1.140 daniel 7963: #endif
7964: }
7965: break;
7966: }
1.141 daniel 7967: case XML_PARSER_END_TAG:
1.140 daniel 7968: if (avail < 2)
7969: goto done;
1.143 daniel 7970: if ((!terminate) &&
7971: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7972: goto done;
7973: xmlParseEndTag(ctxt);
7974: if (ctxt->name == NULL) {
7975: ctxt->instate = XML_PARSER_EPILOG;
7976: #ifdef DEBUG_PUSH
1.241 veillard 7977: xmlGenericError(xmlGenericErrorContext,
7978: "PP: entering EPILOG\n");
1.140 daniel 7979: #endif
7980: } else {
7981: ctxt->instate = XML_PARSER_CONTENT;
7982: #ifdef DEBUG_PUSH
1.241 veillard 7983: xmlGenericError(xmlGenericErrorContext,
7984: "PP: entering CONTENT\n");
1.140 daniel 7985: #endif
7986: }
7987: break;
7988: case XML_PARSER_DTD: {
7989: /*
7990: * Sorry but progressive parsing of the internal subset
7991: * is not expected to be supported. We first check that
7992: * the full content of the internal subset is available and
7993: * the parsing is launched only at that point.
7994: * Internal subset ends up with "']' S? '>'" in an unescaped
7995: * section and not in a ']]>' sequence which are conditional
7996: * sections (whoever argued to keep that crap in XML deserve
7997: * a place in hell !).
7998: */
7999: int base, i;
8000: xmlChar *buf;
8001: xmlChar quote = 0;
8002:
1.184 daniel 8003: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 8004: if (base < 0) return(0);
8005: if (ctxt->checkIndex > base)
8006: base = ctxt->checkIndex;
1.184 daniel 8007: buf = ctxt->input->buf->buffer->content;
1.202 daniel 8008: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8009: base++) {
1.140 daniel 8010: if (quote != 0) {
8011: if (buf[base] == quote)
8012: quote = 0;
8013: continue;
8014: }
8015: if (buf[base] == '"') {
8016: quote = '"';
8017: continue;
8018: }
8019: if (buf[base] == '\'') {
8020: quote = '\'';
8021: continue;
8022: }
8023: if (buf[base] == ']') {
1.202 daniel 8024: if ((unsigned int) base +1 >=
8025: ctxt->input->buf->buffer->use)
1.140 daniel 8026: break;
8027: if (buf[base + 1] == ']') {
8028: /* conditional crap, skip both ']' ! */
8029: base++;
8030: continue;
8031: }
1.202 daniel 8032: for (i = 0;
8033: (unsigned int) base + i < ctxt->input->buf->buffer->use;
8034: i++) {
1.140 daniel 8035: if (buf[base + i] == '>')
8036: goto found_end_int_subset;
8037: }
8038: break;
8039: }
8040: }
8041: /*
8042: * We didn't found the end of the Internal subset
8043: */
8044: if (quote == 0)
8045: ctxt->checkIndex = base;
8046: #ifdef DEBUG_PUSH
8047: if (next == 0)
1.241 veillard 8048: xmlGenericError(xmlGenericErrorContext,
8049: "PP: lookup of int subset end filed\n");
1.140 daniel 8050: #endif
8051: goto done;
8052:
8053: found_end_int_subset:
8054: xmlParseInternalSubset(ctxt);
1.166 daniel 8055: ctxt->inSubset = 2;
1.171 daniel 8056: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 8057: (ctxt->sax->externalSubset != NULL))
8058: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8059: ctxt->extSubSystem, ctxt->extSubURI);
8060: ctxt->inSubset = 0;
1.140 daniel 8061: ctxt->instate = XML_PARSER_PROLOG;
8062: ctxt->checkIndex = 0;
8063: #ifdef DEBUG_PUSH
1.241 veillard 8064: xmlGenericError(xmlGenericErrorContext,
8065: "PP: entering PROLOG\n");
1.140 daniel 8066: #endif
8067: break;
8068: }
8069: case XML_PARSER_COMMENT:
1.241 veillard 8070: xmlGenericError(xmlGenericErrorContext,
8071: "PP: internal error, state == COMMENT\n");
1.140 daniel 8072: ctxt->instate = XML_PARSER_CONTENT;
8073: #ifdef DEBUG_PUSH
1.241 veillard 8074: xmlGenericError(xmlGenericErrorContext,
8075: "PP: entering CONTENT\n");
1.140 daniel 8076: #endif
8077: break;
8078: case XML_PARSER_PI:
1.241 veillard 8079: xmlGenericError(xmlGenericErrorContext,
8080: "PP: internal error, state == PI\n");
1.140 daniel 8081: ctxt->instate = XML_PARSER_CONTENT;
8082: #ifdef DEBUG_PUSH
1.241 veillard 8083: xmlGenericError(xmlGenericErrorContext,
8084: "PP: entering CONTENT\n");
1.140 daniel 8085: #endif
8086: break;
1.128 daniel 8087: case XML_PARSER_ENTITY_DECL:
1.241 veillard 8088: xmlGenericError(xmlGenericErrorContext,
8089: "PP: internal error, state == ENTITY_DECL\n");
1.140 daniel 8090: ctxt->instate = XML_PARSER_DTD;
8091: #ifdef DEBUG_PUSH
1.241 veillard 8092: xmlGenericError(xmlGenericErrorContext,
8093: "PP: entering DTD\n");
1.140 daniel 8094: #endif
8095: break;
1.128 daniel 8096: case XML_PARSER_ENTITY_VALUE:
1.241 veillard 8097: xmlGenericError(xmlGenericErrorContext,
8098: "PP: internal error, state == ENTITY_VALUE\n");
1.140 daniel 8099: ctxt->instate = XML_PARSER_CONTENT;
8100: #ifdef DEBUG_PUSH
1.241 veillard 8101: xmlGenericError(xmlGenericErrorContext,
8102: "PP: entering DTD\n");
1.140 daniel 8103: #endif
8104: break;
1.128 daniel 8105: case XML_PARSER_ATTRIBUTE_VALUE:
1.241 veillard 8106: xmlGenericError(xmlGenericErrorContext,
8107: "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8108: ctxt->instate = XML_PARSER_START_TAG;
8109: #ifdef DEBUG_PUSH
1.241 veillard 8110: xmlGenericError(xmlGenericErrorContext,
8111: "PP: entering START_TAG\n");
1.168 daniel 8112: #endif
8113: break;
8114: case XML_PARSER_SYSTEM_LITERAL:
1.241 veillard 8115: xmlGenericError(xmlGenericErrorContext,
8116: "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8117: ctxt->instate = XML_PARSER_START_TAG;
8118: #ifdef DEBUG_PUSH
1.241 veillard 8119: xmlGenericError(xmlGenericErrorContext,
8120: "PP: entering START_TAG\n");
1.140 daniel 8121: #endif
8122: break;
1.128 daniel 8123: }
8124: }
1.140 daniel 8125: done:
8126: #ifdef DEBUG_PUSH
1.241 veillard 8127: xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
1.140 daniel 8128: #endif
1.128 daniel 8129: return(ret);
8130: }
8131:
8132: /**
1.143 daniel 8133: * xmlParseTry:
8134: * @ctxt: an XML parser context
8135: *
8136: * Try to progress on parsing
8137: *
8138: * Returns zero if no parsing was possible
8139: */
8140: int
8141: xmlParseTry(xmlParserCtxtPtr ctxt) {
8142: return(xmlParseTryOrFinish(ctxt, 0));
8143: }
8144:
8145: /**
1.128 daniel 8146: * xmlParseChunk:
8147: * @ctxt: an XML parser context
8148: * @chunk: an char array
8149: * @size: the size in byte of the chunk
8150: * @terminate: last chunk indicator
8151: *
8152: * Parse a Chunk of memory
8153: *
8154: * Returns zero if no error, the xmlParserErrors otherwise.
8155: */
1.140 daniel 8156: int
1.128 daniel 8157: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8158: int terminate) {
1.132 daniel 8159: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8160: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8161: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8162: int cur = ctxt->input->cur - ctxt->input->base;
8163:
1.132 daniel 8164: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8165: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8166: ctxt->input->cur = ctxt->input->base + cur;
8167: #ifdef DEBUG_PUSH
1.241 veillard 8168: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
1.140 daniel 8169: #endif
8170:
1.150 daniel 8171: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8172: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8173: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8174: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8175: if (terminate) {
1.151 daniel 8176: /*
8177: * Check for termination
8178: */
1.140 daniel 8179: if ((ctxt->instate != XML_PARSER_EOF) &&
8180: (ctxt->instate != XML_PARSER_EPILOG)) {
1.230 veillard 8181: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 8182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8183: ctxt->sax->error(ctxt->userData,
8184: "Extra content at the end of the document\n");
8185: ctxt->wellFormed = 0;
1.180 daniel 8186: ctxt->disableSAX = 1;
1.140 daniel 8187: }
8188: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8189: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8190: (!ctxt->disableSAX))
1.140 daniel 8191: ctxt->sax->endDocument(ctxt->userData);
8192: }
8193: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8194: }
8195: return((xmlParserErrors) ctxt->errNo);
8196: }
8197:
8198: /************************************************************************
8199: * *
1.98 daniel 8200: * I/O front end functions to the parser *
8201: * *
8202: ************************************************************************/
1.201 daniel 8203:
8204: /**
1.229 veillard 8205: * xmlStopParser:
1.201 daniel 8206: * @ctxt: an XML parser context
8207: *
8208: * Blocks further parser processing
8209: */
8210: void
8211: xmlStopParser(xmlParserCtxtPtr ctxt) {
8212: ctxt->instate = XML_PARSER_EOF;
8213: if (ctxt->input != NULL)
8214: ctxt->input->cur = BAD_CAST"";
8215: }
1.98 daniel 8216:
1.50 daniel 8217: /**
1.181 daniel 8218: * xmlCreatePushParserCtxt:
1.140 daniel 8219: * @sax: a SAX handler
8220: * @user_data: The user data returned on SAX callbacks
8221: * @chunk: a pointer to an array of chars
8222: * @size: number of chars in the array
8223: * @filename: an optional file name or URI
8224: *
8225: * Create a parser context for using the XML parser in push mode
8226: * To allow content encoding detection, @size should be >= 4
8227: * The value of @filename is used for fetching external entities
8228: * and error/warning reports.
8229: *
8230: * Returns the new parser context or NULL
8231: */
8232: xmlParserCtxtPtr
8233: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8234: const char *chunk, int size, const char *filename) {
8235: xmlParserCtxtPtr ctxt;
8236: xmlParserInputPtr inputStream;
8237: xmlParserInputBufferPtr buf;
8238: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8239:
8240: /*
1.156 daniel 8241: * plug some encoding conversion routines
1.140 daniel 8242: */
8243: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8244: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8245:
8246: buf = xmlAllocParserInputBuffer(enc);
8247: if (buf == NULL) return(NULL);
8248:
8249: ctxt = xmlNewParserCtxt();
8250: if (ctxt == NULL) {
8251: xmlFree(buf);
8252: return(NULL);
8253: }
8254: if (sax != NULL) {
8255: if (ctxt->sax != &xmlDefaultSAXHandler)
8256: xmlFree(ctxt->sax);
8257: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8258: if (ctxt->sax == NULL) {
8259: xmlFree(buf);
8260: xmlFree(ctxt);
8261: return(NULL);
8262: }
8263: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8264: if (user_data != NULL)
8265: ctxt->userData = user_data;
8266: }
8267: if (filename == NULL) {
8268: ctxt->directory = NULL;
8269: } else {
8270: ctxt->directory = xmlParserGetDirectory(filename);
8271: }
8272:
8273: inputStream = xmlNewInputStream(ctxt);
8274: if (inputStream == NULL) {
8275: xmlFreeParserCtxt(ctxt);
8276: return(NULL);
8277: }
8278:
8279: if (filename == NULL)
8280: inputStream->filename = NULL;
8281: else
8282: inputStream->filename = xmlMemStrdup(filename);
8283: inputStream->buf = buf;
8284: inputStream->base = inputStream->buf->buffer->content;
8285: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8286: if (enc != XML_CHAR_ENCODING_NONE) {
8287: xmlSwitchEncoding(ctxt, enc);
8288: }
1.140 daniel 8289:
8290: inputPush(ctxt, inputStream);
8291:
8292: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8293: (ctxt->input->buf != NULL)) {
8294: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8295: #ifdef DEBUG_PUSH
1.241 veillard 8296: xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
1.140 daniel 8297: #endif
8298: }
1.190 daniel 8299:
8300: return(ctxt);
8301: }
8302:
8303: /**
8304: * xmlCreateIOParserCtxt:
8305: * @sax: a SAX handler
8306: * @user_data: The user data returned on SAX callbacks
8307: * @ioread: an I/O read function
8308: * @ioclose: an I/O close function
8309: * @ioctx: an I/O handler
8310: * @enc: the charset encoding if known
8311: *
8312: * Create a parser context for using the XML parser with an existing
8313: * I/O stream
8314: *
8315: * Returns the new parser context or NULL
8316: */
8317: xmlParserCtxtPtr
8318: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8319: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8320: void *ioctx, xmlCharEncoding enc) {
8321: xmlParserCtxtPtr ctxt;
8322: xmlParserInputPtr inputStream;
8323: xmlParserInputBufferPtr buf;
8324:
8325: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8326: if (buf == NULL) return(NULL);
8327:
8328: ctxt = xmlNewParserCtxt();
8329: if (ctxt == NULL) {
8330: xmlFree(buf);
8331: return(NULL);
8332: }
8333: if (sax != NULL) {
8334: if (ctxt->sax != &xmlDefaultSAXHandler)
8335: xmlFree(ctxt->sax);
8336: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8337: if (ctxt->sax == NULL) {
8338: xmlFree(buf);
8339: xmlFree(ctxt);
8340: return(NULL);
8341: }
8342: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8343: if (user_data != NULL)
8344: ctxt->userData = user_data;
8345: }
8346:
1.229 veillard 8347: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8348: if (inputStream == NULL) {
8349: xmlFreeParserCtxt(ctxt);
8350: return(NULL);
1.74 daniel 8351: }
1.229 veillard 8352: inputPush(ctxt, inputStream);
1.69 daniel 8353:
1.229 veillard 8354: return(ctxt);
1.1 veillard 8355: }
8356:
1.229 veillard 8357: /************************************************************************
8358: * *
8359: * Front ends when parsing a Dtd *
8360: * *
8361: ************************************************************************/
1.76 daniel 8362:
8363: /**
1.242 veillard 8364: * xmlIOParseDTD:
8365: * @sax: the SAX handler block or NULL
8366: * @input: an Input Buffer
8367: * @enc: the charset encoding if known
8368: *
8369: * Load and parse a DTD
8370: *
8371: * Returns the resulting xmlDtdPtr or NULL in case of error.
1.243 ! veillard 8372: * @input will be freed at parsing end.
1.242 veillard 8373: */
8374:
8375: xmlDtdPtr
8376: xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8377: xmlCharEncoding enc) {
8378: xmlDtdPtr ret = NULL;
8379: xmlParserCtxtPtr ctxt;
8380: xmlParserInputPtr pinput = NULL;
8381:
8382: if (input == NULL)
8383: return(NULL);
8384:
8385: ctxt = xmlNewParserCtxt();
8386: if (ctxt == NULL) {
8387: return(NULL);
8388: }
8389:
8390: /*
8391: * Set-up the SAX context
8392: */
8393: if (sax != NULL) {
8394: if (ctxt->sax != NULL)
8395: xmlFree(ctxt->sax);
8396: ctxt->sax = sax;
8397: ctxt->userData = NULL;
8398: }
8399:
8400: /*
8401: * generate a parser input from the I/O handler
8402: */
8403:
8404: pinput = xmlNewIOInputStream(ctxt, input, enc);
8405: if (pinput == NULL) {
8406: if (sax != NULL) ctxt->sax = NULL;
8407: xmlFreeParserCtxt(ctxt);
8408: return(NULL);
8409: }
8410:
8411: /*
8412: * plug some encoding conversion routines here.
8413: */
8414: xmlPushInput(ctxt, pinput);
8415:
8416: pinput->filename = NULL;
8417: pinput->line = 1;
8418: pinput->col = 1;
8419: pinput->base = ctxt->input->cur;
8420: pinput->cur = ctxt->input->cur;
8421: pinput->free = NULL;
8422:
8423: /*
8424: * let's parse that entity knowing it's an external subset.
8425: */
8426: ctxt->inSubset = 2;
8427: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8428: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8429: BAD_CAST "none", BAD_CAST "none");
8430: xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8431:
8432: if (ctxt->myDoc != NULL) {
8433: if (ctxt->wellFormed) {
8434: ret = ctxt->myDoc->extSubset;
8435: ctxt->myDoc->extSubset = NULL;
8436: } else {
8437: ret = NULL;
8438: }
8439: xmlFreeDoc(ctxt->myDoc);
8440: ctxt->myDoc = NULL;
8441: }
8442: if (sax != NULL) ctxt->sax = NULL;
8443: xmlFreeParserCtxt(ctxt);
8444:
8445: return(ret);
8446: }
8447:
8448: /**
1.181 daniel 8449: * xmlSAXParseDTD:
1.76 daniel 8450: * @sax: the SAX handler block
8451: * @ExternalID: a NAME* containing the External ID of the DTD
8452: * @SystemID: a NAME* containing the URL to the DTD
8453: *
8454: * Load and parse an external subset.
8455: *
8456: * Returns the resulting xmlDtdPtr or NULL in case of error.
8457: */
8458:
8459: xmlDtdPtr
1.123 daniel 8460: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8461: const xmlChar *SystemID) {
1.76 daniel 8462: xmlDtdPtr ret = NULL;
8463: xmlParserCtxtPtr ctxt;
1.83 daniel 8464: xmlParserInputPtr input = NULL;
1.76 daniel 8465: xmlCharEncoding enc;
8466:
8467: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8468:
1.97 daniel 8469: ctxt = xmlNewParserCtxt();
1.76 daniel 8470: if (ctxt == NULL) {
8471: return(NULL);
8472: }
8473:
8474: /*
8475: * Set-up the SAX context
8476: */
8477: if (sax != NULL) {
1.93 veillard 8478: if (ctxt->sax != NULL)
1.119 daniel 8479: xmlFree(ctxt->sax);
1.76 daniel 8480: ctxt->sax = sax;
8481: ctxt->userData = NULL;
8482: }
8483:
8484: /*
8485: * Ask the Entity resolver to load the damn thing
8486: */
8487:
8488: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8489: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8490: if (input == NULL) {
1.86 daniel 8491: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8492: xmlFreeParserCtxt(ctxt);
8493: return(NULL);
8494: }
8495:
8496: /*
1.156 daniel 8497: * plug some encoding conversion routines here.
1.76 daniel 8498: */
8499: xmlPushInput(ctxt, input);
1.156 daniel 8500: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8501: xmlSwitchEncoding(ctxt, enc);
8502:
1.95 veillard 8503: if (input->filename == NULL)
1.156 daniel 8504: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8505: input->line = 1;
8506: input->col = 1;
8507: input->base = ctxt->input->cur;
8508: input->cur = ctxt->input->cur;
8509: input->free = NULL;
8510:
8511: /*
8512: * let's parse that entity knowing it's an external subset.
8513: */
1.191 daniel 8514: ctxt->inSubset = 2;
8515: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8516: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8517: ExternalID, SystemID);
1.79 daniel 8518: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8519:
8520: if (ctxt->myDoc != NULL) {
8521: if (ctxt->wellFormed) {
1.191 daniel 8522: ret = ctxt->myDoc->extSubset;
8523: ctxt->myDoc->extSubset = NULL;
1.76 daniel 8524: } else {
8525: ret = NULL;
8526: }
8527: xmlFreeDoc(ctxt->myDoc);
8528: ctxt->myDoc = NULL;
8529: }
1.86 daniel 8530: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8531: xmlFreeParserCtxt(ctxt);
8532:
8533: return(ret);
8534: }
8535:
8536: /**
1.181 daniel 8537: * xmlParseDTD:
1.76 daniel 8538: * @ExternalID: a NAME* containing the External ID of the DTD
8539: * @SystemID: a NAME* containing the URL to the DTD
8540: *
8541: * Load and parse an external subset.
8542: *
8543: * Returns the resulting xmlDtdPtr or NULL in case of error.
8544: */
8545:
8546: xmlDtdPtr
1.123 daniel 8547: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8548: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8549: }
8550:
1.229 veillard 8551: /************************************************************************
8552: * *
8553: * Front ends when parsing an Entity *
8554: * *
8555: ************************************************************************/
8556:
1.59 daniel 8557: /**
1.181 daniel 8558: * xmlSAXParseBalancedChunk:
1.144 daniel 8559: * @ctx: an XML parser context (possibly NULL)
8560: * @sax: the SAX handler bloc (possibly NULL)
8561: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8562: * @input: a parser input stream
8563: * @enc: the encoding
8564: *
8565: * Parse a well-balanced chunk of an XML document
8566: * The user has to provide SAX callback block whose routines will be
8567: * called by the parser
8568: * The allowed sequence for the Well Balanced Chunk is the one defined by
8569: * the content production in the XML grammar:
8570: *
8571: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8572: *
1.176 daniel 8573: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 8574: * the error code otherwise
8575: */
8576:
8577: int
8578: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8579: void *user_data, xmlParserInputPtr input,
8580: xmlCharEncoding enc) {
8581: xmlParserCtxtPtr ctxt;
8582: int ret;
8583:
8584: if (input == NULL) return(-1);
8585:
8586: if (ctx != NULL)
8587: ctxt = ctx;
8588: else {
8589: ctxt = xmlNewParserCtxt();
8590: if (ctxt == NULL)
8591: return(-1);
8592: if (sax == NULL)
8593: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8594: }
8595:
8596: /*
8597: * Set-up the SAX context
8598: */
8599: if (sax != NULL) {
8600: if (ctxt->sax != NULL)
8601: xmlFree(ctxt->sax);
8602: ctxt->sax = sax;
8603: ctxt->userData = user_data;
8604: }
8605:
8606: /*
8607: * plug some encoding conversion routines here.
8608: */
8609: xmlPushInput(ctxt, input);
8610: if (enc != XML_CHAR_ENCODING_NONE)
8611: xmlSwitchEncoding(ctxt, enc);
8612:
8613: /*
8614: * let's parse that entity knowing it's an external subset.
8615: */
8616: xmlParseContent(ctxt);
8617: ret = ctxt->errNo;
8618:
8619: if (ctx == NULL) {
8620: if (sax != NULL)
8621: ctxt->sax = NULL;
8622: else
8623: xmlFreeDoc(ctxt->myDoc);
8624: xmlFreeParserCtxt(ctxt);
8625: }
8626: return(ret);
8627: }
8628:
8629: /**
1.213 veillard 8630: * xmlParseCtxtExternalEntity:
8631: * @ctx: the existing parsing context
8632: * @URL: the URL for the entity to load
8633: * @ID: the System ID for the entity to load
8634: * @list: the return value for the set of parsed nodes
8635: *
8636: * Parse an external general entity within an existing parsing context
8637: * An external general parsed entity is well-formed if it matches the
8638: * production labeled extParsedEnt.
8639: *
8640: * [78] extParsedEnt ::= TextDecl? content
8641: *
8642: * Returns 0 if the entity is well formed, -1 in case of args problem and
8643: * the parser error code otherwise
8644: */
8645:
8646: int
8647: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8648: const xmlChar *ID, xmlNodePtr *list) {
8649: xmlParserCtxtPtr ctxt;
8650: xmlDocPtr newDoc;
8651: xmlSAXHandlerPtr oldsax = NULL;
8652: int ret = 0;
8653:
8654: if (ctx->depth > 40) {
8655: return(XML_ERR_ENTITY_LOOP);
8656: }
8657:
8658: if (list != NULL)
8659: *list = NULL;
8660: if ((URL == NULL) && (ID == NULL))
8661: return(-1);
8662: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8663: return(-1);
8664:
8665:
1.228 veillard 8666: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.213 veillard 8667: if (ctxt == NULL) return(-1);
8668: ctxt->userData = ctxt;
8669: oldsax = ctxt->sax;
8670: ctxt->sax = ctx->sax;
8671: newDoc = xmlNewDoc(BAD_CAST "1.0");
8672: if (newDoc == NULL) {
8673: xmlFreeParserCtxt(ctxt);
8674: return(-1);
8675: }
8676: if (ctx->myDoc != NULL) {
8677: newDoc->intSubset = ctx->myDoc->intSubset;
8678: newDoc->extSubset = ctx->myDoc->extSubset;
8679: }
8680: if (ctx->myDoc->URL != NULL) {
8681: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8682: }
8683: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8684: if (newDoc->children == NULL) {
8685: ctxt->sax = oldsax;
8686: xmlFreeParserCtxt(ctxt);
8687: newDoc->intSubset = NULL;
8688: newDoc->extSubset = NULL;
8689: xmlFreeDoc(newDoc);
8690: return(-1);
8691: }
8692: nodePush(ctxt, newDoc->children);
8693: if (ctx->myDoc == NULL) {
8694: ctxt->myDoc = newDoc;
8695: } else {
8696: ctxt->myDoc = ctx->myDoc;
8697: newDoc->children->doc = ctx->myDoc;
8698: }
8699:
8700: /*
8701: * Parse a possible text declaration first
8702: */
8703: GROW;
8704: if ((RAW == '<') && (NXT(1) == '?') &&
8705: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8706: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8707: xmlParseTextDecl(ctxt);
8708: }
8709:
8710: /*
8711: * Doing validity checking on chunk doesn't make sense
8712: */
8713: ctxt->instate = XML_PARSER_CONTENT;
8714: ctxt->validate = ctx->validate;
8715: ctxt->depth = ctx->depth + 1;
8716: ctxt->replaceEntities = ctx->replaceEntities;
8717: if (ctxt->validate) {
8718: ctxt->vctxt.error = ctx->vctxt.error;
8719: ctxt->vctxt.warning = ctx->vctxt.warning;
8720: /* Allocate the Node stack */
8721: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1.228 veillard 8722: if (ctxt->vctxt.nodeTab == NULL) {
1.241 veillard 8723: xmlGenericError(xmlGenericErrorContext,
8724: "xmlParseCtxtExternalEntity: out of memory\n");
1.228 veillard 8725: ctxt->validate = 0;
8726: ctxt->vctxt.error = NULL;
8727: ctxt->vctxt.warning = NULL;
8728: } else {
8729: ctxt->vctxt.nodeNr = 0;
8730: ctxt->vctxt.nodeMax = 4;
8731: ctxt->vctxt.node = NULL;
8732: }
1.213 veillard 8733: } else {
8734: ctxt->vctxt.error = NULL;
8735: ctxt->vctxt.warning = NULL;
8736: }
8737:
8738: xmlParseContent(ctxt);
8739:
8740: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8741: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8743: ctxt->sax->error(ctxt->userData,
8744: "chunk is not well balanced\n");
8745: ctxt->wellFormed = 0;
8746: ctxt->disableSAX = 1;
8747: } else if (RAW != 0) {
1.230 veillard 8748: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.213 veillard 8749: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8750: ctxt->sax->error(ctxt->userData,
8751: "extra content at the end of well balanced chunk\n");
8752: ctxt->wellFormed = 0;
8753: ctxt->disableSAX = 1;
8754: }
8755: if (ctxt->node != newDoc->children) {
1.230 veillard 8756: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8757: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8758: ctxt->sax->error(ctxt->userData,
8759: "chunk is not well balanced\n");
8760: ctxt->wellFormed = 0;
8761: ctxt->disableSAX = 1;
8762: }
8763:
8764: if (!ctxt->wellFormed) {
8765: if (ctxt->errNo == 0)
8766: ret = 1;
8767: else
8768: ret = ctxt->errNo;
8769: } else {
8770: if (list != NULL) {
8771: xmlNodePtr cur;
8772:
8773: /*
8774: * Return the newly created nodeset after unlinking it from
8775: * they pseudo parent.
8776: */
8777: cur = newDoc->children->children;
8778: *list = cur;
8779: while (cur != NULL) {
8780: cur->parent = NULL;
8781: cur = cur->next;
8782: }
8783: newDoc->children->children = NULL;
8784: }
8785: ret = 0;
8786: }
8787: ctxt->sax = oldsax;
8788: xmlFreeParserCtxt(ctxt);
8789: newDoc->intSubset = NULL;
8790: newDoc->extSubset = NULL;
8791: xmlFreeDoc(newDoc);
8792:
8793: return(ret);
8794: }
8795:
8796: /**
1.181 daniel 8797: * xmlParseExternalEntity:
8798: * @doc: the document the chunk pertains to
8799: * @sax: the SAX handler bloc (possibly NULL)
8800: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8801: * @depth: Used for loop detection, use 0
1.181 daniel 8802: * @URL: the URL for the entity to load
8803: * @ID: the System ID for the entity to load
8804: * @list: the return value for the set of parsed nodes
8805: *
8806: * Parse an external general entity
8807: * An external general parsed entity is well-formed if it matches the
8808: * production labeled extParsedEnt.
8809: *
8810: * [78] extParsedEnt ::= TextDecl? content
8811: *
8812: * Returns 0 if the entity is well formed, -1 in case of args problem and
8813: * the parser error code otherwise
8814: */
8815:
8816: int
8817: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 8818: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 8819: xmlParserCtxtPtr ctxt;
8820: xmlDocPtr newDoc;
8821: xmlSAXHandlerPtr oldsax = NULL;
8822: int ret = 0;
8823:
1.185 daniel 8824: if (depth > 40) {
8825: return(XML_ERR_ENTITY_LOOP);
8826: }
8827:
8828:
1.181 daniel 8829:
8830: if (list != NULL)
8831: *list = NULL;
8832: if ((URL == NULL) && (ID == NULL))
1.213 veillard 8833: return(-1);
8834: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 8835: return(-1);
8836:
8837:
1.228 veillard 8838: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.181 daniel 8839: if (ctxt == NULL) return(-1);
8840: ctxt->userData = ctxt;
8841: if (sax != NULL) {
8842: oldsax = ctxt->sax;
8843: ctxt->sax = sax;
8844: if (user_data != NULL)
8845: ctxt->userData = user_data;
8846: }
8847: newDoc = xmlNewDoc(BAD_CAST "1.0");
8848: if (newDoc == NULL) {
8849: xmlFreeParserCtxt(ctxt);
8850: return(-1);
8851: }
8852: if (doc != NULL) {
8853: newDoc->intSubset = doc->intSubset;
8854: newDoc->extSubset = doc->extSubset;
8855: }
8856: if (doc->URL != NULL) {
8857: newDoc->URL = xmlStrdup(doc->URL);
8858: }
8859: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8860: if (newDoc->children == NULL) {
8861: if (sax != NULL)
8862: ctxt->sax = oldsax;
8863: xmlFreeParserCtxt(ctxt);
8864: newDoc->intSubset = NULL;
8865: newDoc->extSubset = NULL;
8866: xmlFreeDoc(newDoc);
8867: return(-1);
8868: }
8869: nodePush(ctxt, newDoc->children);
8870: if (doc == NULL) {
8871: ctxt->myDoc = newDoc;
8872: } else {
8873: ctxt->myDoc = doc;
8874: newDoc->children->doc = doc;
8875: }
8876:
8877: /*
8878: * Parse a possible text declaration first
8879: */
8880: GROW;
8881: if ((RAW == '<') && (NXT(1) == '?') &&
8882: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8883: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8884: xmlParseTextDecl(ctxt);
8885: }
8886:
8887: /*
8888: * Doing validity checking on chunk doesn't make sense
8889: */
8890: ctxt->instate = XML_PARSER_CONTENT;
8891: ctxt->validate = 0;
1.185 daniel 8892: ctxt->depth = depth;
1.181 daniel 8893:
8894: xmlParseContent(ctxt);
8895:
8896: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8897: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8898: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8899: ctxt->sax->error(ctxt->userData,
8900: "chunk is not well balanced\n");
8901: ctxt->wellFormed = 0;
8902: ctxt->disableSAX = 1;
8903: } else if (RAW != 0) {
1.230 veillard 8904: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.181 daniel 8905: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8906: ctxt->sax->error(ctxt->userData,
8907: "extra content at the end of well balanced chunk\n");
8908: ctxt->wellFormed = 0;
8909: ctxt->disableSAX = 1;
8910: }
8911: if (ctxt->node != newDoc->children) {
1.230 veillard 8912: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8913: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8914: ctxt->sax->error(ctxt->userData,
8915: "chunk is not well balanced\n");
8916: ctxt->wellFormed = 0;
8917: ctxt->disableSAX = 1;
8918: }
8919:
8920: if (!ctxt->wellFormed) {
8921: if (ctxt->errNo == 0)
8922: ret = 1;
8923: else
8924: ret = ctxt->errNo;
8925: } else {
8926: if (list != NULL) {
8927: xmlNodePtr cur;
8928:
8929: /*
8930: * Return the newly created nodeset after unlinking it from
8931: * they pseudo parent.
8932: */
8933: cur = newDoc->children->children;
8934: *list = cur;
8935: while (cur != NULL) {
8936: cur->parent = NULL;
8937: cur = cur->next;
8938: }
8939: newDoc->children->children = NULL;
8940: }
8941: ret = 0;
8942: }
8943: if (sax != NULL)
8944: ctxt->sax = oldsax;
8945: xmlFreeParserCtxt(ctxt);
8946: newDoc->intSubset = NULL;
8947: newDoc->extSubset = NULL;
8948: xmlFreeDoc(newDoc);
8949:
8950: return(ret);
8951: }
8952:
8953: /**
8954: * xmlParseBalancedChunk:
1.176 daniel 8955: * @doc: the document the chunk pertains to
8956: * @sax: the SAX handler bloc (possibly NULL)
8957: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8958: * @depth: Used for loop detection, use 0
1.176 daniel 8959: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8960: * @list: the return value for the set of parsed nodes
8961: *
8962: * Parse a well-balanced chunk of an XML document
8963: * called by the parser
8964: * The allowed sequence for the Well Balanced Chunk is the one defined by
8965: * the content production in the XML grammar:
1.144 daniel 8966: *
1.175 daniel 8967: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8968: *
1.176 daniel 8969: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8970: * the parser error code otherwise
1.144 daniel 8971: */
8972:
1.175 daniel 8973: int
8974: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 8975: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 8976: xmlParserCtxtPtr ctxt;
1.175 daniel 8977: xmlDocPtr newDoc;
1.181 daniel 8978: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 8979: int size;
1.176 daniel 8980: int ret = 0;
1.175 daniel 8981:
1.185 daniel 8982: if (depth > 40) {
8983: return(XML_ERR_ENTITY_LOOP);
8984: }
8985:
1.175 daniel 8986:
1.176 daniel 8987: if (list != NULL)
8988: *list = NULL;
8989: if (string == NULL)
8990: return(-1);
8991:
8992: size = xmlStrlen(string);
8993:
1.183 daniel 8994: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 8995: if (ctxt == NULL) return(-1);
8996: ctxt->userData = ctxt;
1.175 daniel 8997: if (sax != NULL) {
1.176 daniel 8998: oldsax = ctxt->sax;
8999: ctxt->sax = sax;
9000: if (user_data != NULL)
9001: ctxt->userData = user_data;
1.175 daniel 9002: }
9003: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 9004: if (newDoc == NULL) {
9005: xmlFreeParserCtxt(ctxt);
9006: return(-1);
9007: }
1.175 daniel 9008: if (doc != NULL) {
9009: newDoc->intSubset = doc->intSubset;
9010: newDoc->extSubset = doc->extSubset;
9011: }
1.176 daniel 9012: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9013: if (newDoc->children == NULL) {
9014: if (sax != NULL)
9015: ctxt->sax = oldsax;
9016: xmlFreeParserCtxt(ctxt);
9017: newDoc->intSubset = NULL;
9018: newDoc->extSubset = NULL;
9019: xmlFreeDoc(newDoc);
9020: return(-1);
9021: }
9022: nodePush(ctxt, newDoc->children);
9023: if (doc == NULL) {
9024: ctxt->myDoc = newDoc;
9025: } else {
9026: ctxt->myDoc = doc;
9027: newDoc->children->doc = doc;
9028: }
9029: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 9030: ctxt->depth = depth;
1.176 daniel 9031:
9032: /*
9033: * Doing validity checking on chunk doesn't make sense
9034: */
9035: ctxt->validate = 0;
9036:
1.175 daniel 9037: xmlParseContent(ctxt);
1.176 daniel 9038:
9039: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 9040: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9041: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9042: ctxt->sax->error(ctxt->userData,
9043: "chunk is not well balanced\n");
9044: ctxt->wellFormed = 0;
1.180 daniel 9045: ctxt->disableSAX = 1;
1.176 daniel 9046: } else if (RAW != 0) {
1.230 veillard 9047: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.176 daniel 9048: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9049: ctxt->sax->error(ctxt->userData,
9050: "extra content at the end of well balanced chunk\n");
9051: ctxt->wellFormed = 0;
1.180 daniel 9052: ctxt->disableSAX = 1;
1.176 daniel 9053: }
9054: if (ctxt->node != newDoc->children) {
1.230 veillard 9055: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 9056: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9057: ctxt->sax->error(ctxt->userData,
9058: "chunk is not well balanced\n");
9059: ctxt->wellFormed = 0;
1.180 daniel 9060: ctxt->disableSAX = 1;
1.176 daniel 9061: }
1.175 daniel 9062:
1.176 daniel 9063: if (!ctxt->wellFormed) {
9064: if (ctxt->errNo == 0)
9065: ret = 1;
9066: else
9067: ret = ctxt->errNo;
9068: } else {
9069: if (list != NULL) {
9070: xmlNodePtr cur;
1.175 daniel 9071:
1.176 daniel 9072: /*
9073: * Return the newly created nodeset after unlinking it from
9074: * they pseudo parent.
9075: */
9076: cur = newDoc->children->children;
9077: *list = cur;
9078: while (cur != NULL) {
9079: cur->parent = NULL;
9080: cur = cur->next;
9081: }
9082: newDoc->children->children = NULL;
9083: }
9084: ret = 0;
1.175 daniel 9085: }
1.176 daniel 9086: if (sax != NULL)
9087: ctxt->sax = oldsax;
1.175 daniel 9088: xmlFreeParserCtxt(ctxt);
9089: newDoc->intSubset = NULL;
9090: newDoc->extSubset = NULL;
1.176 daniel 9091: xmlFreeDoc(newDoc);
1.175 daniel 9092:
1.176 daniel 9093: return(ret);
1.144 daniel 9094: }
9095:
9096: /**
1.229 veillard 9097: * xmlSAXParseEntity:
9098: * @sax: the SAX handler block
9099: * @filename: the filename
9100: *
9101: * parse an XML external entity out of context and build a tree.
9102: * It use the given SAX function block to handle the parsing callback.
9103: * If sax is NULL, fallback to the default DOM tree building routines.
9104: *
9105: * [78] extParsedEnt ::= TextDecl? content
9106: *
9107: * This correspond to a "Well Balanced" chunk
1.144 daniel 9108: *
1.229 veillard 9109: * Returns the resulting document tree
1.144 daniel 9110: */
9111:
1.229 veillard 9112: xmlDocPtr
9113: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9114: xmlDocPtr ret;
9115: xmlParserCtxtPtr ctxt;
9116: char *directory = NULL;
9117:
9118: ctxt = xmlCreateFileParserCtxt(filename);
9119: if (ctxt == NULL) {
9120: return(NULL);
9121: }
9122: if (sax != NULL) {
9123: if (ctxt->sax != NULL)
9124: xmlFree(ctxt->sax);
9125: ctxt->sax = sax;
9126: ctxt->userData = NULL;
9127: }
9128:
9129: if ((ctxt->directory == NULL) && (directory == NULL))
9130: directory = xmlParserGetDirectory(filename);
9131:
9132: xmlParseExtParsedEnt(ctxt);
9133:
9134: if (ctxt->wellFormed)
9135: ret = ctxt->myDoc;
9136: else {
9137: ret = NULL;
9138: xmlFreeDoc(ctxt->myDoc);
9139: ctxt->myDoc = NULL;
9140: }
9141: if (sax != NULL)
9142: ctxt->sax = NULL;
9143: xmlFreeParserCtxt(ctxt);
9144:
9145: return(ret);
1.144 daniel 9146: }
9147:
9148: /**
1.229 veillard 9149: * xmlParseEntity:
9150: * @filename: the filename
9151: *
9152: * parse an XML external entity out of context and build a tree.
9153: *
9154: * [78] extParsedEnt ::= TextDecl? content
9155: *
9156: * This correspond to a "Well Balanced" chunk
1.59 daniel 9157: *
1.68 daniel 9158: * Returns the resulting document tree
1.59 daniel 9159: */
9160:
1.69 daniel 9161: xmlDocPtr
1.229 veillard 9162: xmlParseEntity(const char *filename) {
9163: return(xmlSAXParseEntity(NULL, filename));
1.55 daniel 9164: }
9165:
9166: /**
1.181 daniel 9167: * xmlCreateEntityParserCtxt:
9168: * @URL: the entity URL
9169: * @ID: the entity PUBLIC ID
9170: * @base: a posible base for the target URI
9171: *
9172: * Create a parser context for an external entity
9173: * Automatic support for ZLIB/Compress compressed document is provided
9174: * by default if found at compile-time.
9175: *
9176: * Returns the new parser context or NULL
9177: */
9178: xmlParserCtxtPtr
9179: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9180: const xmlChar *base) {
9181: xmlParserCtxtPtr ctxt;
9182: xmlParserInputPtr inputStream;
9183: char *directory = NULL;
1.210 veillard 9184: xmlChar *uri;
9185:
1.181 daniel 9186: ctxt = xmlNewParserCtxt();
9187: if (ctxt == NULL) {
9188: return(NULL);
9189: }
9190:
1.210 veillard 9191: uri = xmlBuildURI(URL, base);
9192:
9193: if (uri == NULL) {
9194: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9195: if (inputStream == NULL) {
9196: xmlFreeParserCtxt(ctxt);
9197: return(NULL);
9198: }
9199:
9200: inputPush(ctxt, inputStream);
9201:
9202: if ((ctxt->directory == NULL) && (directory == NULL))
9203: directory = xmlParserGetDirectory((char *)URL);
9204: if ((ctxt->directory == NULL) && (directory != NULL))
9205: ctxt->directory = directory;
9206: } else {
9207: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9208: if (inputStream == NULL) {
9209: xmlFreeParserCtxt(ctxt);
9210: return(NULL);
9211: }
1.181 daniel 9212:
1.210 veillard 9213: inputPush(ctxt, inputStream);
1.181 daniel 9214:
1.210 veillard 9215: if ((ctxt->directory == NULL) && (directory == NULL))
9216: directory = xmlParserGetDirectory((char *)uri);
9217: if ((ctxt->directory == NULL) && (directory != NULL))
9218: ctxt->directory = directory;
9219: xmlFree(uri);
9220: }
1.181 daniel 9221:
9222: return(ctxt);
9223: }
9224:
1.229 veillard 9225: /************************************************************************
9226: * *
9227: * Front ends when parsing from a file *
9228: * *
9229: ************************************************************************/
9230:
1.181 daniel 9231: /**
9232: * xmlCreateFileParserCtxt:
1.50 daniel 9233: * @filename: the filename
9234: *
1.69 daniel 9235: * Create a parser context for a file content.
9236: * Automatic support for ZLIB/Compress compressed document is provided
9237: * by default if found at compile-time.
1.50 daniel 9238: *
1.69 daniel 9239: * Returns the new parser context or NULL
1.9 httpng 9240: */
1.69 daniel 9241: xmlParserCtxtPtr
9242: xmlCreateFileParserCtxt(const char *filename)
9243: {
9244: xmlParserCtxtPtr ctxt;
1.40 daniel 9245: xmlParserInputPtr inputStream;
1.91 daniel 9246: xmlParserInputBufferPtr buf;
1.111 daniel 9247: char *directory = NULL;
1.9 httpng 9248:
1.91 daniel 9249: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.228 veillard 9250: if (buf == NULL) {
9251: return(NULL);
9252: }
1.9 httpng 9253:
1.97 daniel 9254: ctxt = xmlNewParserCtxt();
1.16 daniel 9255: if (ctxt == NULL) {
1.228 veillard 9256: if (xmlDefaultSAXHandler.error != NULL) {
9257: xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9258: }
1.16 daniel 9259: return(NULL);
9260: }
1.97 daniel 9261:
1.96 daniel 9262: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9263: if (inputStream == NULL) {
1.97 daniel 9264: xmlFreeParserCtxt(ctxt);
1.40 daniel 9265: return(NULL);
9266: }
9267:
1.119 daniel 9268: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9269: inputStream->buf = buf;
9270: inputStream->base = inputStream->buf->buffer->content;
9271: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9272:
1.40 daniel 9273: inputPush(ctxt, inputStream);
1.110 daniel 9274: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9275: directory = xmlParserGetDirectory(filename);
9276: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9277: ctxt->directory = directory;
1.106 daniel 9278:
1.69 daniel 9279: return(ctxt);
9280: }
9281:
9282: /**
1.181 daniel 9283: * xmlSAXParseFile:
1.69 daniel 9284: * @sax: the SAX handler block
9285: * @filename: the filename
9286: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9287: * documents
9288: *
9289: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9290: * compressed document is provided by default if found at compile-time.
9291: * It use the given SAX function block to handle the parsing callback.
9292: * If sax is NULL, fallback to the default DOM tree building routines.
9293: *
9294: * Returns the resulting document tree
9295: */
9296:
1.79 daniel 9297: xmlDocPtr
9298: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9299: int recovery) {
9300: xmlDocPtr ret;
1.229 veillard 9301: xmlParserCtxtPtr ctxt;
9302: char *directory = NULL;
9303:
9304: ctxt = xmlCreateFileParserCtxt(filename);
9305: if (ctxt == NULL) {
9306: return(NULL);
9307: }
9308: if (sax != NULL) {
9309: if (ctxt->sax != NULL)
9310: xmlFree(ctxt->sax);
9311: ctxt->sax = sax;
9312: ctxt->userData = NULL;
9313: }
9314:
9315: if ((ctxt->directory == NULL) && (directory == NULL))
9316: directory = xmlParserGetDirectory(filename);
9317: if ((ctxt->directory == NULL) && (directory != NULL))
9318: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9319:
9320: xmlParseDocument(ctxt);
9321:
9322: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9323: else {
9324: ret = NULL;
9325: xmlFreeDoc(ctxt->myDoc);
9326: ctxt->myDoc = NULL;
9327: }
9328: if (sax != NULL)
9329: ctxt->sax = NULL;
9330: xmlFreeParserCtxt(ctxt);
9331:
9332: return(ret);
9333: }
9334:
9335: /**
9336: * xmlRecoverDoc:
9337: * @cur: a pointer to an array of xmlChar
9338: *
9339: * parse an XML in-memory document and build a tree.
9340: * In the case the document is not Well Formed, a tree is built anyway
9341: *
9342: * Returns the resulting document tree
9343: */
9344:
9345: xmlDocPtr
9346: xmlRecoverDoc(xmlChar *cur) {
9347: return(xmlSAXParseDoc(NULL, cur, 1));
9348: }
9349:
9350: /**
9351: * xmlParseFile:
9352: * @filename: the filename
9353: *
9354: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9355: * compressed document is provided by default if found at compile-time.
9356: *
9357: * Returns the resulting document tree
9358: */
9359:
9360: xmlDocPtr
9361: xmlParseFile(const char *filename) {
9362: return(xmlSAXParseFile(NULL, filename, 0));
9363: }
9364:
9365: /**
9366: * xmlRecoverFile:
9367: * @filename: the filename
9368: *
9369: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9370: * compressed document is provided by default if found at compile-time.
9371: * In the case the document is not Well Formed, a tree is built anyway
9372: *
9373: * Returns the resulting document tree
9374: */
9375:
9376: xmlDocPtr
9377: xmlRecoverFile(const char *filename) {
9378: return(xmlSAXParseFile(NULL, filename, 1));
9379: }
9380:
9381:
9382: /**
9383: * xmlSetupParserForBuffer:
9384: * @ctxt: an XML parser context
9385: * @buffer: a xmlChar * buffer
9386: * @filename: a file name
9387: *
9388: * Setup the parser context to parse a new buffer; Clears any prior
9389: * contents from the parser context. The buffer parameter must not be
9390: * NULL, but the filename parameter can be
9391: */
9392: void
9393: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9394: const char* filename)
9395: {
9396: xmlParserInputPtr input;
9397:
9398: input = xmlNewInputStream(ctxt);
9399: if (input == NULL) {
9400: perror("malloc");
9401: xmlFree(ctxt);
9402: return;
9403: }
9404:
9405: xmlClearParserCtxt(ctxt);
9406: if (filename != NULL)
9407: input->filename = xmlMemStrdup(filename);
9408: input->base = buffer;
9409: input->cur = buffer;
9410: inputPush(ctxt, input);
9411: }
9412:
9413: /**
9414: * xmlSAXUserParseFile:
9415: * @sax: a SAX handler
9416: * @user_data: The user data returned on SAX callbacks
9417: * @filename: a file name
9418: *
9419: * parse an XML file and call the given SAX handler routines.
9420: * Automatic support for ZLIB/Compress compressed document is provided
9421: *
9422: * Returns 0 in case of success or a error number otherwise
9423: */
9424: int
9425: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9426: const char *filename) {
9427: int ret = 0;
9428: xmlParserCtxtPtr ctxt;
9429:
9430: ctxt = xmlCreateFileParserCtxt(filename);
9431: if (ctxt == NULL) return -1;
9432: if (ctxt->sax != &xmlDefaultSAXHandler)
9433: xmlFree(ctxt->sax);
9434: ctxt->sax = sax;
9435: if (user_data != NULL)
9436: ctxt->userData = user_data;
9437:
1.16 daniel 9438: xmlParseDocument(ctxt);
1.229 veillard 9439:
9440: if (ctxt->wellFormed)
9441: ret = 0;
1.59 daniel 9442: else {
1.229 veillard 9443: if (ctxt->errNo != 0)
9444: ret = ctxt->errNo;
9445: else
9446: ret = -1;
1.59 daniel 9447: }
1.86 daniel 9448: if (sax != NULL)
1.229 veillard 9449: ctxt->sax = NULL;
1.69 daniel 9450: xmlFreeParserCtxt(ctxt);
1.20 daniel 9451:
1.229 veillard 9452: return ret;
1.20 daniel 9453: }
9454:
1.229 veillard 9455: /************************************************************************
9456: * *
9457: * Front ends when parsing from memory *
9458: * *
9459: ************************************************************************/
1.32 daniel 9460:
1.50 daniel 9461: /**
1.181 daniel 9462: * xmlCreateMemoryParserCtxt:
1.229 veillard 9463: * @buffer: a pointer to a char array
9464: * @size: the size of the array
1.50 daniel 9465: *
1.69 daniel 9466: * Create a parser context for an XML in-memory document.
1.50 daniel 9467: *
1.69 daniel 9468: * Returns the new parser context or NULL
1.20 daniel 9469: */
1.69 daniel 9470: xmlParserCtxtPtr
9471: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9472: xmlParserCtxtPtr ctxt;
1.40 daniel 9473: xmlParserInputPtr input;
1.209 veillard 9474: xmlParserInputBufferPtr buf;
1.40 daniel 9475:
1.229 veillard 9476: if (buffer == NULL)
9477: return(NULL);
9478: if (size <= 0)
1.181 daniel 9479: return(NULL);
1.40 daniel 9480:
1.97 daniel 9481: ctxt = xmlNewParserCtxt();
1.181 daniel 9482: if (ctxt == NULL)
1.20 daniel 9483: return(NULL);
1.97 daniel 9484:
1.209 veillard 9485: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9486: if (buf == NULL) return(NULL);
9487:
1.96 daniel 9488: input = xmlNewInputStream(ctxt);
1.40 daniel 9489: if (input == NULL) {
1.97 daniel 9490: xmlFreeParserCtxt(ctxt);
1.40 daniel 9491: return(NULL);
9492: }
1.20 daniel 9493:
1.40 daniel 9494: input->filename = NULL;
1.209 veillard 9495: input->buf = buf;
9496: input->base = input->buf->buffer->content;
9497: input->cur = input->buf->buffer->content;
1.20 daniel 9498:
1.40 daniel 9499: inputPush(ctxt, input);
1.69 daniel 9500: return(ctxt);
9501: }
9502:
9503: /**
1.181 daniel 9504: * xmlSAXParseMemory:
1.69 daniel 9505: * @sax: the SAX handler block
9506: * @buffer: an pointer to a char array
1.127 daniel 9507: * @size: the size of the array
9508: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9509: * documents
9510: *
9511: * parse an XML in-memory block and use the given SAX function block
9512: * to handle the parsing callback. If sax is NULL, fallback to the default
9513: * DOM tree building routines.
9514: *
9515: * Returns the resulting document tree
9516: */
9517: xmlDocPtr
9518: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9519: xmlDocPtr ret;
9520: xmlParserCtxtPtr ctxt;
9521:
9522: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9523: if (ctxt == NULL) return(NULL);
1.74 daniel 9524: if (sax != NULL) {
9525: ctxt->sax = sax;
9526: ctxt->userData = NULL;
9527: }
1.20 daniel 9528:
9529: xmlParseDocument(ctxt);
1.40 daniel 9530:
1.72 daniel 9531: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9532: else {
9533: ret = NULL;
1.72 daniel 9534: xmlFreeDoc(ctxt->myDoc);
9535: ctxt->myDoc = NULL;
1.59 daniel 9536: }
1.86 daniel 9537: if (sax != NULL)
9538: ctxt->sax = NULL;
1.69 daniel 9539: xmlFreeParserCtxt(ctxt);
1.16 daniel 9540:
1.9 httpng 9541: return(ret);
1.17 daniel 9542: }
9543:
1.55 daniel 9544: /**
1.181 daniel 9545: * xmlParseMemory:
1.68 daniel 9546: * @buffer: an pointer to a char array
1.55 daniel 9547: * @size: the size of the array
9548: *
9549: * parse an XML in-memory block and build a tree.
9550: *
1.68 daniel 9551: * Returns the resulting document tree
1.55 daniel 9552: */
9553:
9554: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9555: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9556: }
9557:
9558: /**
1.181 daniel 9559: * xmlRecoverMemory:
1.68 daniel 9560: * @buffer: an pointer to a char array
1.59 daniel 9561: * @size: the size of the array
9562: *
9563: * parse an XML in-memory block and build a tree.
9564: * In the case the document is not Well Formed, a tree is built anyway
9565: *
1.68 daniel 9566: * Returns the resulting document tree
1.59 daniel 9567: */
9568:
9569: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9570: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9571: }
9572:
1.123 daniel 9573: /**
9574: * xmlSAXUserParseMemory:
9575: * @sax: a SAX handler
9576: * @user_data: The user data returned on SAX callbacks
9577: * @buffer: an in-memory XML document input
1.127 daniel 9578: * @size: the length of the XML document in bytes
1.123 daniel 9579: *
9580: * A better SAX parsing routine.
9581: * parse an XML in-memory buffer and call the given SAX handler routines.
9582: *
9583: * Returns 0 in case of success or a error number otherwise
9584: */
9585: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9586: char *buffer, int size) {
9587: int ret = 0;
9588: xmlParserCtxtPtr ctxt;
1.218 veillard 9589: xmlSAXHandlerPtr oldsax = NULL;
1.123 daniel 9590:
9591: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9592: if (ctxt == NULL) return -1;
1.216 veillard 9593: if (sax != NULL) {
9594: oldsax = ctxt->sax;
9595: ctxt->sax = sax;
9596: }
1.123 daniel 9597: ctxt->userData = user_data;
9598:
9599: xmlParseDocument(ctxt);
9600:
9601: if (ctxt->wellFormed)
9602: ret = 0;
9603: else {
9604: if (ctxt->errNo != 0)
9605: ret = ctxt->errNo;
9606: else
9607: ret = -1;
9608: }
1.216 veillard 9609: if (sax != NULL) {
9610: ctxt->sax = oldsax;
9611: }
1.123 daniel 9612: xmlFreeParserCtxt(ctxt);
9613:
9614: return ret;
9615: }
9616:
1.132 daniel 9617: /**
1.229 veillard 9618: * xmlCreateDocParserCtxt:
9619: * @cur: a pointer to an array of xmlChar
9620: *
9621: * Creates a parser context for an XML in-memory document.
1.132 daniel 9622: *
1.229 veillard 9623: * Returns the new parser context or NULL
1.132 daniel 9624: */
1.229 veillard 9625: xmlParserCtxtPtr
9626: xmlCreateDocParserCtxt(xmlChar *cur) {
9627: int len;
1.132 daniel 9628:
1.229 veillard 9629: if (cur == NULL)
9630: return(NULL);
9631: len = xmlStrlen(cur);
9632: return(xmlCreateMemoryParserCtxt((char *)cur, len));
1.132 daniel 9633: }
1.98 daniel 9634:
1.50 daniel 9635: /**
1.229 veillard 9636: * xmlSAXParseDoc:
9637: * @sax: the SAX handler block
9638: * @cur: a pointer to an array of xmlChar
9639: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9640: * documents
1.50 daniel 9641: *
1.229 veillard 9642: * parse an XML in-memory document and build a tree.
9643: * It use the given SAX function block to handle the parsing callback.
9644: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 9645: *
1.229 veillard 9646: * Returns the resulting document tree
1.32 daniel 9647: */
9648:
1.229 veillard 9649: xmlDocPtr
9650: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9651: xmlDocPtr ret;
9652: xmlParserCtxtPtr ctxt;
9653:
9654: if (cur == NULL) return(NULL);
1.32 daniel 9655:
9656:
1.229 veillard 9657: ctxt = xmlCreateDocParserCtxt(cur);
9658: if (ctxt == NULL) return(NULL);
9659: if (sax != NULL) {
9660: ctxt->sax = sax;
9661: ctxt->userData = NULL;
9662: }
1.32 daniel 9663:
1.229 veillard 9664: xmlParseDocument(ctxt);
9665: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9666: else {
9667: ret = NULL;
9668: xmlFreeDoc(ctxt->myDoc);
9669: ctxt->myDoc = NULL;
9670: }
9671: if (sax != NULL)
9672: ctxt->sax = NULL;
9673: xmlFreeParserCtxt(ctxt);
9674:
9675: return(ret);
1.32 daniel 9676: }
9677:
1.50 daniel 9678: /**
1.229 veillard 9679: * xmlParseDoc:
9680: * @cur: a pointer to an array of xmlChar
1.50 daniel 9681: *
1.229 veillard 9682: * parse an XML in-memory document and build a tree.
1.50 daniel 9683: *
1.229 veillard 9684: * Returns the resulting document tree
1.32 daniel 9685: */
9686:
1.229 veillard 9687: xmlDocPtr
9688: xmlParseDoc(xmlChar *cur) {
9689: return(xmlSAXParseDoc(NULL, cur, 0));
9690: }
1.32 daniel 9691:
9692:
1.229 veillard 9693: /************************************************************************
9694: * *
9695: * Miscellaneous *
9696: * *
9697: ************************************************************************/
1.32 daniel 9698:
1.237 veillard 9699: #ifdef LIBXML_XPATH_ENABLED
9700: #include <libxml/xpath.h>
9701: #endif
9702:
1.235 veillard 9703: static int xmlParserInitialized = 0;
9704:
9705: /**
9706: * xmlInitParser:
9707: *
9708: * Initialization function for the XML parser.
9709: * This is not reentrant. Call once before processing in case of
9710: * use in multithreaded programs.
9711: */
9712:
9713: void
9714: xmlInitParser(void) {
9715: if (xmlParserInitialized) return;
9716:
9717: xmlInitCharEncodingHandlers();
9718: xmlInitializePredefinedEntities();
9719: xmlDefaultSAXHandlerInit();
1.237 veillard 9720: xmlRegisterDefaultInputCallbacks();
9721: xmlRegisterDefaultOutputCallbacks();
1.235 veillard 9722: #ifdef LIBXML_HTML_ENABLED
9723: htmlInitAutoClose();
9724: htmlDefaultSAXHandlerInit();
1.237 veillard 9725: #endif
9726: #ifdef LIBXML_XPATH_ENABLED
9727: xmlXPathInit();
1.235 veillard 9728: #endif
9729: xmlParserInitialized = 1;
9730: }
9731:
1.50 daniel 9732: /**
1.229 veillard 9733: * xmlCleanupParser:
1.50 daniel 9734: *
1.229 veillard 9735: * Cleanup function for the XML parser. It tries to reclaim all
9736: * parsing related global memory allocated for the parser processing.
9737: * It doesn't deallocate any document related memory. Calling this
9738: * function should not prevent reusing the parser.
1.32 daniel 9739: */
1.229 veillard 9740:
1.55 daniel 9741: void
1.229 veillard 9742: xmlCleanupParser(void) {
1.235 veillard 9743: xmlParserInitialized = 0;
1.229 veillard 9744: xmlCleanupCharEncodingHandlers();
9745: xmlCleanupPredefinedEntities();
1.32 daniel 9746: }
1.220 veillard 9747:
9748: /**
9749: * xmlPedanticParserDefault:
9750: * @val: int 0 or 1
9751: *
9752: * Set and return the previous value for enabling pedantic warnings.
9753: *
9754: * Returns the last value for 0 for no substitution, 1 for substitution.
9755: */
9756:
9757: int
9758: xmlPedanticParserDefault(int val) {
9759: int old = xmlPedanticParserDefaultValue;
9760:
9761: xmlPedanticParserDefaultValue = val;
9762: return(old);
9763: }
1.98 daniel 9764:
9765: /**
1.181 daniel 9766: * xmlSubstituteEntitiesDefault:
1.98 daniel 9767: * @val: int 0 or 1
9768: *
9769: * Set and return the previous value for default entity support.
9770: * Initially the parser always keep entity references instead of substituting
9771: * entity values in the output. This function has to be used to change the
9772: * default parser behaviour
9773: * SAX::subtituteEntities() has to be used for changing that on a file by
9774: * file basis.
9775: *
9776: * Returns the last value for 0 for no substitution, 1 for substitution.
9777: */
9778:
9779: int
9780: xmlSubstituteEntitiesDefault(int val) {
9781: int old = xmlSubstituteEntitiesDefaultValue;
9782:
9783: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 9784: return(old);
9785: }
9786:
9787: /**
9788: * xmlKeepBlanksDefault:
9789: * @val: int 0 or 1
9790: *
9791: * Set and return the previous value for default blanks text nodes support.
9792: * The 1.x version of the parser used an heuristic to try to detect
9793: * ignorable white spaces. As a result the SAX callback was generating
9794: * ignorableWhitespace() callbacks instead of characters() one, and when
9795: * using the DOM output text nodes containing those blanks were not generated.
9796: * The 2.x and later version will switch to the XML standard way and
9797: * ignorableWhitespace() are only generated when running the parser in
9798: * validating mode and when the current element doesn't allow CDATA or
9799: * mixed content.
9800: * This function is provided as a way to force the standard behaviour
9801: * on 1.X libs and to switch back to the old mode for compatibility when
9802: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9803: * by using xmlIsBlankNode() commodity function to detect the "empty"
9804: * nodes generated.
9805: * This value also affect autogeneration of indentation when saving code
9806: * if blanks sections are kept, indentation is not generated.
9807: *
9808: * Returns the last value for 0 for no substitution, 1 for substitution.
9809: */
9810:
9811: int
9812: xmlKeepBlanksDefault(int val) {
9813: int old = xmlKeepBlanksDefaultValue;
9814:
9815: xmlKeepBlanksDefaultValue = val;
9816: xmlIndentTreeOutput = !val;
1.98 daniel 9817: return(old);
9818: }
1.77 daniel 9819:
Webmaster