Annotation of XML/parser.c, revision 1.240
1.1 veillard 1: /*
1.229 veillard 2: * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3: * implemented on top of the SAX interfaces
1.15 veillard 4: *
1.222 veillard 5: * References:
6: * The XML specification:
7: * http://www.w3.org/TR/REC-xml
8: * Original 1.0 version:
9: * http://www.w3.org/TR/1998/REC-xml-19980210
10: * XML second edition working draft
11: * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12: *
1.229 veillard 13: * Okay this is a big file, the parser core is around 7000 lines, then it
14: * is followed by the progressive parser top routines, then the various
15: * high level APIs to call the parser and a few miscelaneous functions.
16: * A number of helper functions and deprecated ones have been moved to
17: * parserInternals.c to reduce this file size.
18: * As much as possible the functions are associated with their relative
19: * production in the XML specification. A few productions defining the
20: * different ranges of character are actually implanted either in
21: * parserInternals.h or parserInternals.c
22: * The DOM tree build is realized from the default SAX callbacks in
23: * the module SAX.c.
24: * The routines doing the validation checks are in valid.c and called either
25: * from the SAx callbacks or as standalones functions using a preparsed
26: * document.
27: *
1.15 veillard 28: * See Copyright for the status of this software.
29: *
1.60 daniel 30: * Daniel.Veillard@w3.org
1.1 veillard 31: */
32:
1.26 daniel 33: #ifdef WIN32
1.138 daniel 34: #include "win32config.h"
1.226 veillard 35: #define XML_DIR_SEP '\\'
1.26 daniel 36: #else
1.121 daniel 37: #include "config.h"
1.226 veillard 38: #define XML_DIR_SEP '/'
1.26 daniel 39: #endif
1.121 daniel 40:
1.1 veillard 41: #include <stdio.h>
1.238 veillard 42: #include <stdlib.h>
1.204 veillard 43: #include <string.h>
1.238 veillard 44: #include <libxml/xmlmemory.h>
45: #include <libxml/tree.h>
46: #include <libxml/parser.h>
47: #include <libxml/parserInternals.h>
48: #include <libxml/valid.h>
49: #include <libxml/entities.h>
50: #include <libxml/xmlerror.h>
51: #include <libxml/encoding.h>
52: #include <libxml/xmlIO.h>
53: #include <libxml/uri.h>
54:
1.121 daniel 55: #ifdef HAVE_CTYPE_H
1.1 veillard 56: #include <ctype.h>
1.121 daniel 57: #endif
58: #ifdef HAVE_STDLIB_H
1.50 daniel 59: #include <stdlib.h>
1.121 daniel 60: #endif
61: #ifdef HAVE_SYS_STAT_H
1.9 httpng 62: #include <sys/stat.h>
1.121 daniel 63: #endif
1.9 httpng 64: #ifdef HAVE_FCNTL_H
65: #include <fcntl.h>
66: #endif
1.10 httpng 67: #ifdef HAVE_UNISTD_H
68: #include <unistd.h>
69: #endif
1.20 daniel 70: #ifdef HAVE_ZLIB_H
71: #include <zlib.h>
72: #endif
1.1 veillard 73:
74:
1.140 daniel 75: #define XML_PARSER_BIG_BUFFER_SIZE 1000
76: #define XML_PARSER_BUFFER_SIZE 100
77:
1.229 veillard 78: /*
79: * Various global defaults for parsing
80: */
1.160 daniel 81: int xmlGetWarningsDefaultValue = 1;
1.220 veillard 82: int xmlParserDebugEntities = 0;
1.229 veillard 83: int xmlSubstituteEntitiesDefaultValue = 0;
84: int xmlDoValidityCheckingDefaultValue = 0;
85: int xmlPedanticParserDefaultValue = 0;
86: int xmlKeepBlanksDefaultValue = 1;
1.86 daniel 87:
1.139 daniel 88: /*
89: * List of XML prefixed PI allowed by W3C specs
90: */
91:
92: const char *xmlW3CPIs[] = {
93: "xml-stylesheet",
94: NULL
95: };
1.91 daniel 96:
1.229 veillard 97: /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
1.151 daniel 98: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
99: xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
100: const xmlChar **str);
1.91 daniel 101:
102:
1.45 daniel 103: /************************************************************************
104: * *
105: * Parser stacks related functions and macros *
106: * *
107: ************************************************************************/
1.79 daniel 108:
1.135 daniel 109: xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
110: const xmlChar ** str);
1.79 daniel 111:
1.1 veillard 112: /*
1.40 daniel 113: * Generic function for accessing stacks in the Parser Context
1.1 veillard 114: */
115:
1.140 daniel 116: #define PUSH_AND_POP(scope, type, name) \
117: scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 118: if (ctxt->name##Nr >= ctxt->name##Max) { \
119: ctxt->name##Max *= 2; \
1.204 veillard 120: ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
1.40 daniel 121: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
122: if (ctxt->name##Tab == NULL) { \
1.31 daniel 123: fprintf(stderr, "realloc failed !\n"); \
1.145 daniel 124: return(0); \
1.31 daniel 125: } \
126: } \
1.40 daniel 127: ctxt->name##Tab[ctxt->name##Nr] = value; \
128: ctxt->name = value; \
129: return(ctxt->name##Nr++); \
1.31 daniel 130: } \
1.140 daniel 131: scope type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 132: type ret; \
1.40 daniel 133: if (ctxt->name##Nr <= 0) return(0); \
134: ctxt->name##Nr--; \
1.50 daniel 135: if (ctxt->name##Nr > 0) \
136: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
137: else \
138: ctxt->name = NULL; \
1.69 daniel 139: ret = ctxt->name##Tab[ctxt->name##Nr]; \
140: ctxt->name##Tab[ctxt->name##Nr] = 0; \
141: return(ret); \
1.31 daniel 142: } \
143:
1.229 veillard 144: /*
145: * Those macros actually generate the functions
146: */
1.140 daniel 147: PUSH_AND_POP(extern, xmlParserInputPtr, input)
148: PUSH_AND_POP(extern, xmlNodePtr, node)
149: PUSH_AND_POP(extern, xmlChar*, name)
1.40 daniel 150:
1.176 daniel 151: int spacePush(xmlParserCtxtPtr ctxt, int val) {
152: if (ctxt->spaceNr >= ctxt->spaceMax) {
153: ctxt->spaceMax *= 2;
1.204 veillard 154: ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1.176 daniel 155: ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
156: if (ctxt->spaceTab == NULL) {
157: fprintf(stderr, "realloc failed !\n");
158: return(0);
159: }
160: }
161: ctxt->spaceTab[ctxt->spaceNr] = val;
162: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
163: return(ctxt->spaceNr++);
164: }
165:
166: int spacePop(xmlParserCtxtPtr ctxt) {
167: int ret;
168: if (ctxt->spaceNr <= 0) return(0);
169: ctxt->spaceNr--;
170: if (ctxt->spaceNr > 0)
171: ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
172: else
173: ctxt->space = NULL;
174: ret = ctxt->spaceTab[ctxt->spaceNr];
175: ctxt->spaceTab[ctxt->spaceNr] = -1;
176: return(ret);
177: }
178:
1.55 daniel 179: /*
180: * Macros for accessing the content. Those should be used only by the parser,
181: * and not exported.
182: *
1.229 veillard 183: * Dirty macros, i.e. one often need to make assumption on the context to
184: * use them
1.55 daniel 185: *
1.123 daniel 186: * CUR_PTR return the current pointer to the xmlChar to be parsed.
1.151 daniel 187: * To be used with extreme caution since operations consuming
188: * characters may move the input buffer to a different location !
1.123 daniel 189: * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1.151 daniel 190: * This should be used internally by the parser
1.55 daniel 191: * only to compare to ASCII values otherwise it would break when
192: * running with UTF-8 encoding.
1.229 veillard 193: * RAW same as CUR but in the input buffer, bypass any token
194: * extraction that may have been done
1.123 daniel 195: * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1.55 daniel 196: * to compare on ASCII based substring.
1.123 daniel 197: * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1.55 daniel 198: * strings within the parser.
199: *
1.77 daniel 200: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 201: *
202: * NEXT Skip to the next character, this does the proper decoding
203: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.229 veillard 204: * NEXTL(l) Skip l xmlChars in the input buffer
205: * CUR_CHAR(l) returns the current unicode character (int), set l
206: * to the number of xmlChars used for the encoding [0-5].
207: * CUR_SCHAR same but operate on a string instead of the context
208: * COPY_BUF copy the current unicode char to the target buffer, increment
209: * the index
210: * GROW, SHRINK handling of input buffers
1.55 daniel 211: */
1.45 daniel 212:
1.152 daniel 213: #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
1.97 daniel 214: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 215: #define NXT(val) ctxt->input->cur[(val)]
216: #define CUR_PTR ctxt->input->cur
1.154 daniel 217:
1.240 ! veillard 218: #define SKIP(val) do { \
! 219: ctxt->nbChars += (val),ctxt->input->cur += (val); \
1.164 daniel 220: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.229 veillard 221: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
1.168 daniel 222: if ((*ctxt->input->cur == 0) && \
223: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 ! veillard 224: xmlPopInput(ctxt); \
! 225: } while (0)
1.164 daniel 226:
1.240 ! veillard 227: #define SHRINK do { \
! 228: xmlParserInputShrink(ctxt->input); \
1.97 daniel 229: if ((*ctxt->input->cur == 0) && \
230: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 ! veillard 231: xmlPopInput(ctxt); \
! 232: } while (0)
1.97 daniel 233:
1.240 ! veillard 234: #define GROW do { \
! 235: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.97 daniel 236: if ((*ctxt->input->cur == 0) && \
237: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1.240 ! veillard 238: xmlPopInput(ctxt); \
! 239: } while (0)
1.55 daniel 240:
1.240 ! veillard 241: #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1.154 daniel 242:
1.240 ! veillard 243: #define NEXT xmlNextChar(ctxt)
1.154 daniel 244:
1.240 ! veillard 245: #define NEXTL(l) do { \
1.153 daniel 246: if (*(ctxt->input->cur) == '\n') { \
247: ctxt->input->line++; ctxt->input->col = 1; \
248: } else ctxt->input->col++; \
1.154 daniel 249: ctxt->token = 0; ctxt->input->cur += l; \
250: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1.240 ! veillard 251: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
! 252: } while (0)
1.154 daniel 253:
1.240 ! veillard 254: #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
! 255: #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1.154 daniel 256:
1.152 daniel 257: #define COPY_BUF(l,b,i,v) \
258: if (l == 1) b[i++] = (xmlChar) v; \
1.240 ! veillard 259: else i += xmlCopyChar(l,&b[i],v)
1.151 daniel 260:
261: /**
1.229 veillard 262: * xmlSkipBlankChars:
1.151 daniel 263: * @ctxt: the XML parser context
264: *
1.229 veillard 265: * skip all blanks character found at that point in the input streams.
266: * It pops up finished entities in the process if allowable at that point.
267: *
268: * Returns the number of space chars skipped
1.151 daniel 269: */
1.55 daniel 270:
1.229 veillard 271: int
272: xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
273: int cur, res = 0;
1.201 daniel 274:
1.176 daniel 275: /*
1.229 veillard 276: * It's Okay to use CUR/NEXT here since all the blanks are on
277: * the ASCII range.
278: */
279: do {
280: cur = CUR;
281: while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
282: NEXT;
283: cur = CUR;
284: res++;
1.151 daniel 285: }
1.229 veillard 286: while ((cur == 0) && (ctxt->inputNr > 1) &&
287: (ctxt->instate != XML_PARSER_COMMENT)) {
1.168 daniel 288: xmlPopInput(ctxt);
1.229 veillard 289: cur = CUR;
290: }
1.222 veillard 291: /*
292: * Need to handle support of entities branching here
293: */
1.155 daniel 294: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1.229 veillard 295: /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
1.222 veillard 296: } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1.155 daniel 297: return(res);
1.152 daniel 298: }
299:
1.97 daniel 300: /************************************************************************
301: * *
1.229 veillard 302: * Commodity functions to handle entities *
1.97 daniel 303: * *
304: ************************************************************************/
1.40 daniel 305:
1.50 daniel 306: /**
307: * xmlPopInput:
308: * @ctxt: an XML parser context
309: *
1.40 daniel 310: * xmlPopInput: the current input pointed by ctxt->input came to an end
311: * pop it and return the next char.
1.45 daniel 312: *
1.123 daniel 313: * Returns the current xmlChar in the parser context
1.40 daniel 314: */
1.123 daniel 315: xmlChar
1.55 daniel 316: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 317: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.220 veillard 318: if (xmlParserDebugEntities)
319: fprintf(stderr, "Popping input %d\n", ctxt->inputNr);
1.69 daniel 320: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 321: if ((*ctxt->input->cur == 0) &&
322: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
323: return(xmlPopInput(ctxt));
1.40 daniel 324: return(CUR);
325: }
326:
1.50 daniel 327: /**
1.229 veillard 328: * xmlPushInput:
1.174 daniel 329: * @ctxt: an XML parser context
1.229 veillard 330: * @input: an XML parser input fragment (entity, XML fragment ...).
1.174 daniel 331: *
1.229 veillard 332: * xmlPushInput: switch to a new input stream which is stacked on top
333: * of the previous one(s).
1.174 daniel 334: */
1.229 veillard 335: void
336: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
337: if (input == NULL) return;
1.174 daniel 338:
1.229 veillard 339: if (xmlParserDebugEntities) {
340: if ((ctxt->input != NULL) && (ctxt->input->filename))
341: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
342: ctxt->input->line);
343: fprintf(stderr, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
344: }
345: inputPush(ctxt, input);
346: GROW;
1.174 daniel 347: }
1.97 daniel 348:
349: /**
350: * xmlParseCharRef:
351: * @ctxt: an XML parser context
352: *
353: * parse Reference declarations
354: *
355: * [66] CharRef ::= '&#' [0-9]+ ';' |
356: * '&#x' [0-9a-fA-F]+ ';'
357: *
1.98 daniel 358: * [ WFC: Legal Character ]
359: * Characters referred to using character references must match the
360: * production for Char.
361: *
1.135 daniel 362: * Returns the value parsed (as an int), 0 in case of error
1.77 daniel 363: */
1.97 daniel 364: int
365: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
366: int val = 0;
1.222 veillard 367: int count = 0;
1.97 daniel 368:
1.111 daniel 369: if (ctxt->token != 0) {
370: val = ctxt->token;
371: ctxt->token = 0;
372: return(val);
373: }
1.222 veillard 374: /*
375: * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
376: */
1.152 daniel 377: if ((RAW == '&') && (NXT(1) == '#') &&
1.97 daniel 378: (NXT(2) == 'x')) {
379: SKIP(3);
1.222 veillard 380: GROW;
381: while (RAW != ';') { /* loop blocked by count */
382: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 383: val = val * 16 + (CUR - '0');
1.222 veillard 384: else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1.97 daniel 385: val = val * 16 + (CUR - 'a') + 10;
1.222 veillard 386: else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1.97 daniel 387: val = val * 16 + (CUR - 'A') + 10;
388: else {
1.123 daniel 389: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1.97 daniel 390: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
391: ctxt->sax->error(ctxt->userData,
392: "xmlParseCharRef: invalid hexadecimal value\n");
393: ctxt->wellFormed = 0;
1.180 daniel 394: ctxt->disableSAX = 1;
1.97 daniel 395: val = 0;
396: break;
397: }
398: NEXT;
1.222 veillard 399: count++;
1.97 daniel 400: }
1.164 daniel 401: if (RAW == ';') {
402: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
403: ctxt->nbChars ++;
404: ctxt->input->cur++;
405: }
1.152 daniel 406: } else if ((RAW == '&') && (NXT(1) == '#')) {
1.97 daniel 407: SKIP(2);
1.222 veillard 408: GROW;
409: while (RAW != ';') { /* loop blocked by count */
410: if ((RAW >= '0') && (RAW <= '9') && (count < 20))
1.97 daniel 411: val = val * 10 + (CUR - '0');
412: else {
1.123 daniel 413: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1.97 daniel 414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
415: ctxt->sax->error(ctxt->userData,
416: "xmlParseCharRef: invalid decimal value\n");
417: ctxt->wellFormed = 0;
1.180 daniel 418: ctxt->disableSAX = 1;
1.97 daniel 419: val = 0;
420: break;
421: }
422: NEXT;
1.222 veillard 423: count++;
1.97 daniel 424: }
1.164 daniel 425: if (RAW == ';') {
426: /* on purpose to avoid reentrancy problems with NEXT and SKIP */
427: ctxt->nbChars ++;
428: ctxt->input->cur++;
429: }
1.97 daniel 430: } else {
1.123 daniel 431: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 433: ctxt->sax->error(ctxt->userData,
434: "xmlParseCharRef: invalid value\n");
1.97 daniel 435: ctxt->wellFormed = 0;
1.180 daniel 436: ctxt->disableSAX = 1;
1.97 daniel 437: }
1.229 veillard 438:
439: /*
440: * [ WFC: Legal Character ]
441: * Characters referred to using character references must match the
442: * production for Char.
443: */
444: if (IS_CHAR(val)) {
445: return(val);
446: } else {
447: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.97 daniel 448: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 449: ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
450: val);
1.97 daniel 451: ctxt->wellFormed = 0;
1.180 daniel 452: ctxt->disableSAX = 1;
1.97 daniel 453: }
1.229 veillard 454: return(0);
455: }
456:
457: /**
458: * xmlParseStringCharRef:
459: * @ctxt: an XML parser context
460: * @str: a pointer to an index in the string
461: *
462: * parse Reference declarations, variant parsing from a string rather
463: * than an an input flow.
464: *
465: * [66] CharRef ::= '&#' [0-9]+ ';' |
466: * '&#x' [0-9a-fA-F]+ ';'
467: *
468: * [ WFC: Legal Character ]
469: * Characters referred to using character references must match the
470: * production for Char.
471: *
472: * Returns the value parsed (as an int), 0 in case of error, str will be
473: * updated to the current value of the index
474: */
475: int
476: xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
477: const xmlChar *ptr;
478: xmlChar cur;
479: int val = 0;
1.98 daniel 480:
1.229 veillard 481: if ((str == NULL) || (*str == NULL)) return(0);
482: ptr = *str;
483: cur = *ptr;
484: if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
485: ptr += 3;
486: cur = *ptr;
487: while (cur != ';') { /* Non input consuming loop */
488: if ((cur >= '0') && (cur <= '9'))
489: val = val * 16 + (cur - '0');
490: else if ((cur >= 'a') && (cur <= 'f'))
491: val = val * 16 + (cur - 'a') + 10;
492: else if ((cur >= 'A') && (cur <= 'F'))
493: val = val * 16 + (cur - 'A') + 10;
494: else {
495: ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
497: ctxt->sax->error(ctxt->userData,
498: "xmlParseStringCharRef: invalid hexadecimal value\n");
499: ctxt->wellFormed = 0;
500: ctxt->disableSAX = 1;
501: val = 0;
502: break;
503: }
504: ptr++;
505: cur = *ptr;
506: }
507: if (cur == ';')
508: ptr++;
509: } else if ((cur == '&') && (ptr[1] == '#')){
510: ptr += 2;
511: cur = *ptr;
512: while (cur != ';') { /* Non input consuming loops */
513: if ((cur >= '0') && (cur <= '9'))
514: val = val * 10 + (cur - '0');
515: else {
516: ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
517: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518: ctxt->sax->error(ctxt->userData,
519: "xmlParseStringCharRef: invalid decimal value\n");
520: ctxt->wellFormed = 0;
521: ctxt->disableSAX = 1;
522: val = 0;
523: break;
524: }
525: ptr++;
526: cur = *ptr;
527: }
528: if (cur == ';')
529: ptr++;
530: } else {
531: ctxt->errNo = XML_ERR_INVALID_CHARREF;
1.97 daniel 532: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 533: ctxt->sax->error(ctxt->userData,
534: "xmlParseCharRef: invalid value\n");
1.97 daniel 535: ctxt->wellFormed = 0;
1.180 daniel 536: ctxt->disableSAX = 1;
1.229 veillard 537: return(0);
1.97 daniel 538: }
1.229 veillard 539: *str = ptr;
1.98 daniel 540:
541: /*
1.229 veillard 542: * [ WFC: Legal Character ]
543: * Characters referred to using character references must match the
544: * production for Char.
1.98 daniel 545: */
1.229 veillard 546: if (IS_CHAR(val)) {
547: return(val);
548: } else {
549: ctxt->errNo = XML_ERR_INVALID_CHAR;
1.98 daniel 550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.229 veillard 551: ctxt->sax->error(ctxt->userData,
552: "CharRef: invalid xmlChar value %d\n", val);
1.98 daniel 553: ctxt->wellFormed = 0;
1.180 daniel 554: ctxt->disableSAX = 1;
1.98 daniel 555: }
1.229 veillard 556: return(0);
1.96 daniel 557: }
558:
559: /**
560: * xmlParserHandlePEReference:
561: * @ctxt: the parser context
562: *
563: * [69] PEReference ::= '%' Name ';'
564: *
1.98 daniel 565: * [ WFC: No Recursion ]
1.229 veillard 566: * A parsed entity must not contain a recursive
1.98 daniel 567: * reference to itself, either directly or indirectly.
568: *
569: * [ WFC: Entity Declared ]
570: * In a document without any DTD, a document with only an internal DTD
571: * subset which contains no parameter entity references, or a document
572: * with "standalone='yes'", ... ... The declaration of a parameter
573: * entity must precede any reference to it...
574: *
575: * [ VC: Entity Declared ]
576: * In a document with an external subset or external parameter entities
577: * with "standalone='no'", ... ... The declaration of a parameter entity
578: * must precede any reference to it...
579: *
580: * [ WFC: In DTD ]
581: * Parameter-entity references may only appear in the DTD.
582: * NOTE: misleading but this is handled.
583: *
584: * A PEReference may have been detected in the current input stream
1.96 daniel 585: * the handling is done accordingly to
586: * http://www.w3.org/TR/REC-xml#entproc
587: * i.e.
588: * - Included in literal in entity values
589: * - Included as Paraemeter Entity reference within DTDs
590: */
591: void
592: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 593: xmlChar *name;
1.96 daniel 594: xmlEntityPtr entity = NULL;
595: xmlParserInputPtr input;
596:
1.126 daniel 597: if (ctxt->token != 0) {
598: return;
599: }
1.152 daniel 600: if (RAW != '%') return;
1.96 daniel 601: switch(ctxt->instate) {
1.109 daniel 602: case XML_PARSER_CDATA_SECTION:
603: return;
1.97 daniel 604: case XML_PARSER_COMMENT:
605: return;
1.140 daniel 606: case XML_PARSER_START_TAG:
607: return;
608: case XML_PARSER_END_TAG:
609: return;
1.96 daniel 610: case XML_PARSER_EOF:
1.123 daniel 611: ctxt->errNo = XML_ERR_PEREF_AT_EOF;
1.96 daniel 612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
613: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
614: ctxt->wellFormed = 0;
1.180 daniel 615: ctxt->disableSAX = 1;
1.96 daniel 616: return;
617: case XML_PARSER_PROLOG:
1.140 daniel 618: case XML_PARSER_START:
619: case XML_PARSER_MISC:
1.123 daniel 620: ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
1.96 daniel 621: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
622: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
623: ctxt->wellFormed = 0;
1.180 daniel 624: ctxt->disableSAX = 1;
1.96 daniel 625: return;
1.97 daniel 626: case XML_PARSER_ENTITY_DECL:
1.96 daniel 627: case XML_PARSER_CONTENT:
628: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 629: case XML_PARSER_PI:
1.168 daniel 630: case XML_PARSER_SYSTEM_LITERAL:
1.96 daniel 631: /* we just ignore it there */
632: return;
633: case XML_PARSER_EPILOG:
1.123 daniel 634: ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
1.96 daniel 635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 636: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 637: ctxt->wellFormed = 0;
1.180 daniel 638: ctxt->disableSAX = 1;
1.96 daniel 639: return;
1.97 daniel 640: case XML_PARSER_ENTITY_VALUE:
641: /*
642: * NOTE: in the case of entity values, we don't do the
1.127 daniel 643: * substitution here since we need the literal
1.97 daniel 644: * entity value to be able to save the internal
645: * subset of the document.
1.222 veillard 646: * This will be handled by xmlStringDecodeEntities
1.97 daniel 647: */
648: return;
1.96 daniel 649: case XML_PARSER_DTD:
1.98 daniel 650: /*
651: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
652: * In the internal DTD subset, parameter-entity references
653: * can occur only where markup declarations can occur, not
654: * within markup declarations.
655: * In that case this is handled in xmlParseMarkupDecl
656: */
657: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
658: return;
1.96 daniel 659: }
660:
661: NEXT;
662: name = xmlParseName(ctxt);
1.220 veillard 663: if (xmlParserDebugEntities)
664: fprintf(stderr, "PE Reference: %s\n", name);
1.96 daniel 665: if (name == NULL) {
1.123 daniel 666: ctxt->errNo = XML_ERR_PEREF_NO_NAME;
1.96 daniel 667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
668: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
669: ctxt->wellFormed = 0;
1.180 daniel 670: ctxt->disableSAX = 1;
1.96 daniel 671: } else {
1.152 daniel 672: if (RAW == ';') {
1.96 daniel 673: NEXT;
1.98 daniel 674: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
675: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 676: if (entity == NULL) {
1.98 daniel 677:
678: /*
679: * [ WFC: Entity Declared ]
680: * In a document without any DTD, a document with only an
681: * internal DTD subset which contains no parameter entity
682: * references, or a document with "standalone='yes'", ...
683: * ... The declaration of a parameter entity must precede
684: * any reference to it...
685: */
686: if ((ctxt->standalone == 1) ||
687: ((ctxt->hasExternalSubset == 0) &&
688: (ctxt->hasPErefs == 0))) {
689: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
690: ctxt->sax->error(ctxt->userData,
691: "PEReference: %%%s; not found\n", name);
692: ctxt->wellFormed = 0;
1.180 daniel 693: ctxt->disableSAX = 1;
1.98 daniel 694: } else {
695: /*
696: * [ VC: Entity Declared ]
697: * In a document with an external subset or external
698: * parameter entities with "standalone='no'", ...
699: * ... The declaration of a parameter entity must precede
700: * any reference to it...
701: */
1.220 veillard 702: if ((!ctxt->disableSAX) &&
703: (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1.212 veillard 704: ctxt->vctxt.error(ctxt->vctxt.userData,
705: "PEReference: %%%s; not found\n", name);
1.220 veillard 706: } else if ((!ctxt->disableSAX) &&
707: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 708: ctxt->sax->warning(ctxt->userData,
709: "PEReference: %%%s; not found\n", name);
710: ctxt->valid = 0;
711: }
1.96 daniel 712: } else {
1.159 daniel 713: if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
714: (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
1.96 daniel 715: /*
1.229 veillard 716: * handle the extra spaces added before and after
1.96 daniel 717: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1.229 veillard 718: * this is done independantly.
1.96 daniel 719: */
720: input = xmlNewEntityInputStream(ctxt, entity);
721: xmlPushInput(ctxt, input);
1.164 daniel 722: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
723: (RAW == '<') && (NXT(1) == '?') &&
724: (NXT(2) == 'x') && (NXT(3) == 'm') &&
725: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 726: xmlParseTextDecl(ctxt);
1.164 daniel 727: }
728: if (ctxt->token == 0)
729: ctxt->token = ' ';
1.96 daniel 730: } else {
731: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
732: ctxt->sax->error(ctxt->userData,
733: "xmlHandlePEReference: %s is not a parameter entity\n",
734: name);
735: ctxt->wellFormed = 0;
1.180 daniel 736: ctxt->disableSAX = 1;
1.96 daniel 737: }
738: }
739: } else {
1.123 daniel 740: ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
1.96 daniel 741: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
742: ctxt->sax->error(ctxt->userData,
743: "xmlHandlePEReference: expecting ';'\n");
744: ctxt->wellFormed = 0;
1.180 daniel 745: ctxt->disableSAX = 1;
1.96 daniel 746: }
1.119 daniel 747: xmlFree(name);
1.97 daniel 748: }
749: }
750:
751: /*
752: * Macro used to grow the current buffer.
753: */
754: #define growBuffer(buffer) { \
755: buffer##_size *= 2; \
1.145 daniel 756: buffer = (xmlChar *) \
757: xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1.97 daniel 758: if (buffer == NULL) { \
759: perror("realloc failed"); \
1.145 daniel 760: return(NULL); \
1.97 daniel 761: } \
1.96 daniel 762: }
1.77 daniel 763:
764: /**
1.135 daniel 765: * xmlStringDecodeEntities:
766: * @ctxt: the parser context
767: * @str: the input string
768: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
769: * @end: an end marker xmlChar, 0 if none
770: * @end2: an end marker xmlChar, 0 if none
771: * @end3: an end marker xmlChar, 0 if none
772: *
1.222 veillard 773: * Takes a entity string content and process to do the adequate subtitutions.
774: *
1.135 daniel 775: * [67] Reference ::= EntityRef | CharRef
776: *
777: * [69] PEReference ::= '%' Name ';'
778: *
779: * Returns A newly allocated string with the substitution done. The caller
780: * must deallocate it !
781: */
782: xmlChar *
783: xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
784: xmlChar end, xmlChar end2, xmlChar end3) {
785: xmlChar *buffer = NULL;
786: int buffer_size = 0;
787:
788: xmlChar *current = NULL;
789: xmlEntityPtr ent;
1.176 daniel 790: int c,l;
791: int nbchars = 0;
1.135 daniel 792:
1.211 veillard 793: if (str == NULL)
794: return(NULL);
795:
1.185 daniel 796: if (ctxt->depth > 40) {
1.230 veillard 797: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
799: ctxt->sax->error(ctxt->userData,
800: "Detected entity reference loop\n");
801: ctxt->wellFormed = 0;
802: ctxt->disableSAX = 1;
803: return(NULL);
804: }
805:
1.135 daniel 806: /*
807: * allocate a translation buffer.
808: */
1.140 daniel 809: buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
1.135 daniel 810: buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
811: if (buffer == NULL) {
812: perror("xmlDecodeEntities: malloc failed");
813: return(NULL);
814: }
815:
816: /*
817: * Ok loop until we reach one of the ending char or a size limit.
1.222 veillard 818: * we are operating on already parsed values.
1.135 daniel 819: */
1.176 daniel 820: c = CUR_SCHAR(str, l);
1.222 veillard 821: while ((c != 0) && (c != end) && /* non input consuming loop */
822: (c != end2) && (c != end3)) {
1.135 daniel 823:
1.176 daniel 824: if (c == 0) break;
825: if ((c == '&') && (str[1] == '#')) {
1.135 daniel 826: int val = xmlParseStringCharRef(ctxt, &str);
1.176 daniel 827: if (val != 0) {
828: COPY_BUF(0,buffer,nbchars,val);
829: }
830: } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1.220 veillard 831: if (xmlParserDebugEntities)
832: fprintf(stderr, "String decoding Entity Reference: %.30s\n",
833: str);
1.135 daniel 834: ent = xmlParseStringEntityRef(ctxt, &str);
1.222 veillard 835: if ((ent != NULL) &&
836: (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1.219 veillard 837: if (ent->content != NULL) {
838: COPY_BUF(0,buffer,nbchars,ent->content[0]);
839: } else {
840: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
841: ctxt->sax->error(ctxt->userData,
842: "internal error entity has no content\n");
843: }
844: } else if ((ent != NULL) && (ent->content != NULL)) {
1.185 daniel 845: xmlChar *rep;
846:
847: ctxt->depth++;
848: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
849: 0, 0, 0);
850: ctxt->depth--;
851: if (rep != NULL) {
852: current = rep;
1.222 veillard 853: while (*current != 0) { /* non input consuming loop */
1.185 daniel 854: buffer[nbchars++] = *current++;
855: if (nbchars >
856: buffer_size - XML_PARSER_BUFFER_SIZE) {
857: growBuffer(buffer);
858: }
1.135 daniel 859: }
1.185 daniel 860: xmlFree(rep);
1.135 daniel 861: }
862: } else if (ent != NULL) {
863: int i = xmlStrlen(ent->name);
864: const xmlChar *cur = ent->name;
865:
1.176 daniel 866: buffer[nbchars++] = '&';
867: if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 868: growBuffer(buffer);
869: }
870: for (;i > 0;i--)
1.176 daniel 871: buffer[nbchars++] = *cur++;
872: buffer[nbchars++] = ';';
1.135 daniel 873: }
1.176 daniel 874: } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1.220 veillard 875: if (xmlParserDebugEntities)
876: fprintf(stderr, "String decoding PE Reference: %.30s\n", str);
1.135 daniel 877: ent = xmlParseStringPEReference(ctxt, &str);
878: if (ent != NULL) {
1.185 daniel 879: xmlChar *rep;
880:
881: ctxt->depth++;
882: rep = xmlStringDecodeEntities(ctxt, ent->content, what,
883: 0, 0, 0);
884: ctxt->depth--;
885: if (rep != NULL) {
886: current = rep;
1.222 veillard 887: while (*current != 0) { /* non input consuming loop */
1.185 daniel 888: buffer[nbchars++] = *current++;
889: if (nbchars >
890: buffer_size - XML_PARSER_BUFFER_SIZE) {
891: growBuffer(buffer);
892: }
1.135 daniel 893: }
1.185 daniel 894: xmlFree(rep);
1.135 daniel 895: }
896: }
897: } else {
1.176 daniel 898: COPY_BUF(l,buffer,nbchars,c);
899: str += l;
900: if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1.135 daniel 901: growBuffer(buffer);
902: }
903: }
1.176 daniel 904: c = CUR_SCHAR(str, l);
1.135 daniel 905: }
1.229 veillard 906: buffer[nbchars++] = 0;
907: return(buffer);
1.172 daniel 908: }
909:
1.229 veillard 910:
911: /************************************************************************
912: * *
1.123 daniel 913: * Commodity functions to handle xmlChars *
1.28 daniel 914: * *
915: ************************************************************************/
916:
1.50 daniel 917: /**
918: * xmlStrndup:
1.123 daniel 919: * @cur: the input xmlChar *
1.50 daniel 920: * @len: the len of @cur
921: *
1.123 daniel 922: * a strndup for array of xmlChar's
1.68 daniel 923: *
1.123 daniel 924: * Returns a new xmlChar * or NULL
1.1 veillard 925: */
1.123 daniel 926: xmlChar *
927: xmlStrndup(const xmlChar *cur, int len) {
1.135 daniel 928: xmlChar *ret;
929:
930: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 931: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.1 veillard 932: if (ret == NULL) {
1.86 daniel 933: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 934: (len + 1) * (long)sizeof(xmlChar));
1.1 veillard 935: return(NULL);
936: }
1.123 daniel 937: memcpy(ret, cur, len * sizeof(xmlChar));
1.1 veillard 938: ret[len] = 0;
939: return(ret);
940: }
941:
1.50 daniel 942: /**
943: * xmlStrdup:
1.123 daniel 944: * @cur: the input xmlChar *
1.50 daniel 945: *
1.152 daniel 946: * a strdup for array of xmlChar's. Since they are supposed to be
947: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
948: * a termination mark of '0'.
1.68 daniel 949: *
1.123 daniel 950: * Returns a new xmlChar * or NULL
1.1 veillard 951: */
1.123 daniel 952: xmlChar *
953: xmlStrdup(const xmlChar *cur) {
954: const xmlChar *p = cur;
1.1 veillard 955:
1.135 daniel 956: if (cur == NULL) return(NULL);
1.222 veillard 957: while (*p != 0) p++; /* non input consuming */
1.1 veillard 958: return(xmlStrndup(cur, p - cur));
959: }
960:
1.50 daniel 961: /**
962: * xmlCharStrndup:
963: * @cur: the input char *
964: * @len: the len of @cur
965: *
1.123 daniel 966: * a strndup for char's to xmlChar's
1.68 daniel 967: *
1.123 daniel 968: * Returns a new xmlChar * or NULL
1.45 daniel 969: */
970:
1.123 daniel 971: xmlChar *
1.55 daniel 972: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 973: int i;
1.135 daniel 974: xmlChar *ret;
975:
976: if ((cur == NULL) || (len < 0)) return(NULL);
1.204 veillard 977: ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1.45 daniel 978: if (ret == NULL) {
1.86 daniel 979: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 980: (len + 1) * (long)sizeof(xmlChar));
1.45 daniel 981: return(NULL);
982: }
983: for (i = 0;i < len;i++)
1.123 daniel 984: ret[i] = (xmlChar) cur[i];
1.45 daniel 985: ret[len] = 0;
986: return(ret);
987: }
988:
1.50 daniel 989: /**
990: * xmlCharStrdup:
991: * @cur: the input char *
992: * @len: the len of @cur
993: *
1.123 daniel 994: * a strdup for char's to xmlChar's
1.68 daniel 995: *
1.123 daniel 996: * Returns a new xmlChar * or NULL
1.45 daniel 997: */
998:
1.123 daniel 999: xmlChar *
1.55 daniel 1000: xmlCharStrdup(const char *cur) {
1.45 daniel 1001: const char *p = cur;
1002:
1.135 daniel 1003: if (cur == NULL) return(NULL);
1.222 veillard 1004: while (*p != '\0') p++; /* non input consuming */
1.45 daniel 1005: return(xmlCharStrndup(cur, p - cur));
1006: }
1007:
1.50 daniel 1008: /**
1009: * xmlStrcmp:
1.123 daniel 1010: * @str1: the first xmlChar *
1011: * @str2: the second xmlChar *
1.50 daniel 1012: *
1.123 daniel 1013: * a strcmp for xmlChar's
1.68 daniel 1014: *
1015: * Returns the integer result of the comparison
1.14 veillard 1016: */
1017:
1.55 daniel 1018: int
1.123 daniel 1019: xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1.14 veillard 1020: register int tmp;
1021:
1.229 veillard 1022: if (str1 == str2) return(0);
1.135 daniel 1023: if (str1 == NULL) return(-1);
1024: if (str2 == NULL) return(1);
1.14 veillard 1025: do {
1.232 veillard 1026: tmp = *str1++ - *str2;
1.14 veillard 1027: if (tmp != 0) return(tmp);
1.232 veillard 1028: } while (*str2++ != 0);
1029: return 0;
1.14 veillard 1030: }
1031:
1.50 daniel 1032: /**
1.236 veillard 1033: * xmlStrEqual:
1034: * @str1: the first xmlChar *
1035: * @str2: the second xmlChar *
1036: *
1037: * Check if both string are equal of have same content
1038: * Should be a bit more readable and faster than xmlStrEqual()
1039: *
1040: * Returns 1 if they are equal, 0 if they are different
1041: */
1042:
1043: int
1044: xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1045: if (str1 == str2) return(1);
1046: if (str1 == NULL) return(0);
1047: if (str2 == NULL) return(0);
1048: do {
1049: if (*str1++ != *str2) return(0);
1050: } while (*str2++);
1051: return(1);
1052: }
1053:
1054: /**
1.50 daniel 1055: * xmlStrncmp:
1.123 daniel 1056: * @str1: the first xmlChar *
1057: * @str2: the second xmlChar *
1.50 daniel 1058: * @len: the max comparison length
1059: *
1.123 daniel 1060: * a strncmp for xmlChar's
1.68 daniel 1061: *
1062: * Returns the integer result of the comparison
1.14 veillard 1063: */
1064:
1.55 daniel 1065: int
1.123 daniel 1066: xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1.14 veillard 1067: register int tmp;
1068:
1069: if (len <= 0) return(0);
1.232 veillard 1070: if (str1 == str2) return(0);
1.135 daniel 1071: if (str1 == NULL) return(-1);
1072: if (str2 == NULL) return(1);
1.14 veillard 1073: do {
1.232 veillard 1074: tmp = *str1++ - *str2;
1075: if (tmp != 0 || --len == 0) return(tmp);
1076: } while (*str2++ != 0);
1077: return 0;
1078: }
1079:
1080: static xmlChar casemap[256] = {
1081: 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1082: 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1083: 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1084: 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1085: 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1086: 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1087: 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1088: 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1089: 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1090: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1091: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1092: 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1093: 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1094: 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1095: 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1096: 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1097: 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1098: 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1099: 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1100: 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1101: 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1102: 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1103: 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1104: 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1105: 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1106: 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1107: 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1108: 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1109: 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1110: 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1111: 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1112: 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1113: };
1114:
1115: /**
1116: * xmlStrcasecmp:
1117: * @str1: the first xmlChar *
1118: * @str2: the second xmlChar *
1119: *
1120: * a strcasecmp for xmlChar's
1121: *
1122: * Returns the integer result of the comparison
1123: */
1124:
1125: int
1126: xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1127: register int tmp;
1128:
1129: if (str1 == str2) return(0);
1130: if (str1 == NULL) return(-1);
1131: if (str2 == NULL) return(1);
1132: do {
1133: tmp = casemap[*str1++] - casemap[*str2];
1.14 veillard 1134: if (tmp != 0) return(tmp);
1.232 veillard 1135: } while (*str2++ != 0);
1136: return 0;
1137: }
1138:
1139: /**
1140: * xmlStrncasecmp:
1141: * @str1: the first xmlChar *
1142: * @str2: the second xmlChar *
1143: * @len: the max comparison length
1144: *
1145: * a strncasecmp for xmlChar's
1146: *
1147: * Returns the integer result of the comparison
1148: */
1149:
1150: int
1151: xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1152: register int tmp;
1153:
1154: if (len <= 0) return(0);
1155: if (str1 == str2) return(0);
1156: if (str1 == NULL) return(-1);
1157: if (str2 == NULL) return(1);
1158: do {
1159: tmp = casemap[*str1++] - casemap[*str2];
1160: if (tmp != 0 || --len == 0) return(tmp);
1161: } while (*str2++ != 0);
1162: return 0;
1.14 veillard 1163: }
1164:
1.50 daniel 1165: /**
1166: * xmlStrchr:
1.123 daniel 1167: * @str: the xmlChar * array
1168: * @val: the xmlChar to search
1.50 daniel 1169: *
1.123 daniel 1170: * a strchr for xmlChar's
1.68 daniel 1171: *
1.123 daniel 1172: * Returns the xmlChar * for the first occurence or NULL.
1.14 veillard 1173: */
1174:
1.123 daniel 1175: const xmlChar *
1176: xmlStrchr(const xmlChar *str, xmlChar val) {
1.135 daniel 1177: if (str == NULL) return(NULL);
1.222 veillard 1178: while (*str != 0) { /* non input consuming */
1.123 daniel 1179: if (*str == val) return((xmlChar *) str);
1.14 veillard 1180: str++;
1181: }
1182: return(NULL);
1.89 daniel 1183: }
1184:
1185: /**
1186: * xmlStrstr:
1.123 daniel 1187: * @str: the xmlChar * array (haystack)
1188: * @val: the xmlChar to search (needle)
1.89 daniel 1189: *
1.123 daniel 1190: * a strstr for xmlChar's
1.89 daniel 1191: *
1.123 daniel 1192: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1193: */
1194:
1.123 daniel 1195: const xmlChar *
1196: xmlStrstr(const xmlChar *str, xmlChar *val) {
1.89 daniel 1197: int n;
1198:
1199: if (str == NULL) return(NULL);
1200: if (val == NULL) return(NULL);
1201: n = xmlStrlen(val);
1202:
1203: if (n == 0) return(str);
1.222 veillard 1204: while (*str != 0) { /* non input consuming */
1.89 daniel 1205: if (*str == *val) {
1.123 daniel 1206: if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1.89 daniel 1207: }
1.232 veillard 1208: str++;
1209: }
1210: return(NULL);
1211: }
1212:
1213: /**
1214: * xmlStrcasestr:
1215: * @str: the xmlChar * array (haystack)
1216: * @val: the xmlChar to search (needle)
1217: *
1218: * a case-ignoring strstr for xmlChar's
1219: *
1220: * Returns the xmlChar * for the first occurence or NULL.
1221: */
1222:
1223: const xmlChar *
1224: xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1225: int n;
1226:
1227: if (str == NULL) return(NULL);
1228: if (val == NULL) return(NULL);
1229: n = xmlStrlen(val);
1230:
1231: if (n == 0) return(str);
1232: while (*str != 0) { /* non input consuming */
1233: if (casemap[*str] == casemap[*val])
1234: if (!xmlStrncasecmp(str, val, n)) return(str);
1.89 daniel 1235: str++;
1236: }
1237: return(NULL);
1238: }
1239:
1240: /**
1241: * xmlStrsub:
1.123 daniel 1242: * @str: the xmlChar * array (haystack)
1.89 daniel 1243: * @start: the index of the first char (zero based)
1244: * @len: the length of the substring
1245: *
1246: * Extract a substring of a given string
1247: *
1.123 daniel 1248: * Returns the xmlChar * for the first occurence or NULL.
1.89 daniel 1249: */
1250:
1.123 daniel 1251: xmlChar *
1252: xmlStrsub(const xmlChar *str, int start, int len) {
1.89 daniel 1253: int i;
1254:
1255: if (str == NULL) return(NULL);
1256: if (start < 0) return(NULL);
1.90 daniel 1257: if (len < 0) return(NULL);
1.89 daniel 1258:
1259: for (i = 0;i < start;i++) {
1260: if (*str == 0) return(NULL);
1261: str++;
1262: }
1263: if (*str == 0) return(NULL);
1264: return(xmlStrndup(str, len));
1.14 veillard 1265: }
1.28 daniel 1266:
1.50 daniel 1267: /**
1268: * xmlStrlen:
1.123 daniel 1269: * @str: the xmlChar * array
1.50 daniel 1270: *
1.127 daniel 1271: * length of a xmlChar's string
1.68 daniel 1272: *
1.123 daniel 1273: * Returns the number of xmlChar contained in the ARRAY.
1.45 daniel 1274: */
1275:
1.55 daniel 1276: int
1.123 daniel 1277: xmlStrlen(const xmlChar *str) {
1.45 daniel 1278: int len = 0;
1279:
1280: if (str == NULL) return(0);
1.222 veillard 1281: while (*str != 0) { /* non input consuming */
1.45 daniel 1282: str++;
1283: len++;
1284: }
1285: return(len);
1286: }
1287:
1.50 daniel 1288: /**
1289: * xmlStrncat:
1.123 daniel 1290: * @cur: the original xmlChar * array
1291: * @add: the xmlChar * array added
1.50 daniel 1292: * @len: the length of @add
1293: *
1.123 daniel 1294: * a strncat for array of xmlChar's
1.68 daniel 1295: *
1.123 daniel 1296: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1297: */
1298:
1.123 daniel 1299: xmlChar *
1300: xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1.45 daniel 1301: int size;
1.123 daniel 1302: xmlChar *ret;
1.45 daniel 1303:
1304: if ((add == NULL) || (len == 0))
1305: return(cur);
1306: if (cur == NULL)
1307: return(xmlStrndup(add, len));
1308:
1309: size = xmlStrlen(cur);
1.204 veillard 1310: ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1.45 daniel 1311: if (ret == NULL) {
1.86 daniel 1312: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1.123 daniel 1313: (size + len + 1) * (long)sizeof(xmlChar));
1.45 daniel 1314: return(cur);
1315: }
1.123 daniel 1316: memcpy(&ret[size], add, len * sizeof(xmlChar));
1.45 daniel 1317: ret[size + len] = 0;
1318: return(ret);
1319: }
1320:
1.50 daniel 1321: /**
1322: * xmlStrcat:
1.123 daniel 1323: * @cur: the original xmlChar * array
1324: * @add: the xmlChar * array added
1.50 daniel 1325: *
1.152 daniel 1326: * a strcat for array of xmlChar's. Since they are supposed to be
1327: * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1328: * a termination mark of '0'.
1.68 daniel 1329: *
1.123 daniel 1330: * Returns a new xmlChar * containing the concatenated string.
1.45 daniel 1331: */
1.123 daniel 1332: xmlChar *
1333: xmlStrcat(xmlChar *cur, const xmlChar *add) {
1334: const xmlChar *p = add;
1.45 daniel 1335:
1336: if (add == NULL) return(cur);
1337: if (cur == NULL)
1338: return(xmlStrdup(add));
1339:
1.222 veillard 1340: while (*p != 0) p++; /* non input consuming */
1.45 daniel 1341: return(xmlStrncat(cur, add, p - add));
1342: }
1343:
1344: /************************************************************************
1345: * *
1346: * Commodity functions, cleanup needed ? *
1347: * *
1348: ************************************************************************/
1349:
1.50 daniel 1350: /**
1351: * areBlanks:
1352: * @ctxt: an XML parser context
1.123 daniel 1353: * @str: a xmlChar *
1.50 daniel 1354: * @len: the size of @str
1355: *
1.45 daniel 1356: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1357: *
1.68 daniel 1358: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1359: */
1360:
1.123 daniel 1361: static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1.104 daniel 1362: int i, ret;
1.45 daniel 1363: xmlNodePtr lastChild;
1364:
1.176 daniel 1365: /*
1366: * Check for xml:space value.
1367: */
1368: if (*(ctxt->space) == 1)
1369: return(0);
1370:
1371: /*
1372: * Check that the string is made of blanks
1373: */
1.45 daniel 1374: for (i = 0;i < len;i++)
1375: if (!(IS_BLANK(str[i]))) return(0);
1376:
1.176 daniel 1377: /*
1378: * Look if the element is mixed content in the Dtd if available
1379: */
1.104 daniel 1380: if (ctxt->myDoc != NULL) {
1381: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1382: if (ret == 0) return(1);
1383: if (ret == 1) return(0);
1384: }
1.176 daniel 1385:
1.104 daniel 1386: /*
1.176 daniel 1387: * Otherwise, heuristic :-\
1.104 daniel 1388: */
1.179 daniel 1389: if (ctxt->keepBlanks)
1390: return(0);
1391: if (RAW != '<') return(0);
1392: if (ctxt->node == NULL) return(0);
1393: if ((ctxt->node->children == NULL) &&
1394: (RAW == '<') && (NXT(1) == '/')) return(0);
1395:
1.45 daniel 1396: lastChild = xmlGetLastChild(ctxt->node);
1397: if (lastChild == NULL) {
1398: if (ctxt->node->content != NULL) return(0);
1399: } else if (xmlNodeIsText(lastChild))
1400: return(0);
1.157 daniel 1401: else if ((ctxt->node->children != NULL) &&
1402: (xmlNodeIsText(ctxt->node->children)))
1.104 daniel 1403: return(0);
1.45 daniel 1404: return(1);
1405: }
1406:
1407: /*
1408: * Forward definition for recusive behaviour.
1409: */
1.77 daniel 1410: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1411: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1412:
1.28 daniel 1413: /************************************************************************
1414: * *
1415: * Extra stuff for namespace support *
1416: * Relates to http://www.w3.org/TR/WD-xml-names *
1417: * *
1418: ************************************************************************/
1419:
1.50 daniel 1420: /**
1.72 daniel 1421: * xmlSplitQName:
1.162 daniel 1422: * @ctxt: an XML parser context
1.72 daniel 1423: * @name: an XML parser context
1.123 daniel 1424: * @prefix: a xmlChar **
1.72 daniel 1425: *
1.206 veillard 1426: * parse an UTF8 encoded XML qualified name string
1.72 daniel 1427: *
1428: * [NS 5] QName ::= (Prefix ':')? LocalPart
1429: *
1430: * [NS 6] Prefix ::= NCName
1431: *
1432: * [NS 7] LocalPart ::= NCName
1433: *
1.127 daniel 1434: * Returns the local part, and prefix is updated
1.72 daniel 1435: * to get the Prefix if any.
1436: */
1437:
1.123 daniel 1438: xmlChar *
1.162 daniel 1439: xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1440: xmlChar buf[XML_MAX_NAMELEN + 5];
1.222 veillard 1441: xmlChar *buffer = NULL;
1.162 daniel 1442: int len = 0;
1.222 veillard 1443: int max = XML_MAX_NAMELEN;
1.123 daniel 1444: xmlChar *ret = NULL;
1445: const xmlChar *cur = name;
1.206 veillard 1446: int c;
1.72 daniel 1447:
1448: *prefix = NULL;
1.113 daniel 1449:
1450: /* xml: prefix is not really a namespace */
1451: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1452: (cur[2] == 'l') && (cur[3] == ':'))
1453: return(xmlStrdup(name));
1454:
1.162 daniel 1455: /* nasty but valid */
1456: if (cur[0] == ':')
1457: return(xmlStrdup(name));
1458:
1.206 veillard 1459: c = *cur++;
1.222 veillard 1460: while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1.206 veillard 1461: buf[len++] = c;
1462: c = *cur++;
1.162 daniel 1463: }
1.222 veillard 1464: if (len >= max) {
1465: /*
1466: * Okay someone managed to make a huge name, so he's ready to pay
1467: * for the processing speed.
1468: */
1469: max = len * 2;
1470:
1471: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1472: if (buffer == NULL) {
1473: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1474: ctxt->sax->error(ctxt->userData,
1475: "xmlSplitQName: out of memory\n");
1476: return(NULL);
1477: }
1478: memcpy(buffer, buf, len);
1479: while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1480: if (len + 10 > max) {
1481: max *= 2;
1482: buffer = (xmlChar *) xmlRealloc(buffer,
1483: max * sizeof(xmlChar));
1484: if (buffer == NULL) {
1485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1486: ctxt->sax->error(ctxt->userData,
1487: "xmlSplitQName: out of memory\n");
1488: return(NULL);
1489: }
1490: }
1491: buffer[len++] = c;
1492: c = *cur++;
1493: }
1494: buffer[len] = 0;
1495: }
1.72 daniel 1496:
1.222 veillard 1497: if (buffer == NULL)
1498: ret = xmlStrndup(buf, len);
1499: else {
1500: ret = buffer;
1501: buffer = NULL;
1502: max = XML_MAX_NAMELEN;
1503: }
1504:
1.72 daniel 1505:
1.162 daniel 1506: if (c == ':') {
1.206 veillard 1507: c = *cur++;
1508: if (c == 0) return(ret);
1.72 daniel 1509: *prefix = ret;
1.162 daniel 1510: len = 0;
1.72 daniel 1511:
1.222 veillard 1512: while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1.206 veillard 1513: buf[len++] = c;
1514: c = *cur++;
1.162 daniel 1515: }
1.222 veillard 1516: if (len >= max) {
1517: /*
1518: * Okay someone managed to make a huge name, so he's ready to pay
1519: * for the processing speed.
1520: */
1.229 veillard 1521: max = len * 2;
1522:
1523: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1524: if (buffer == NULL) {
1.55 daniel 1525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 1526: ctxt->sax->error(ctxt->userData,
1.229 veillard 1527: "xmlSplitQName: out of memory\n");
1528: return(NULL);
1529: }
1530: memcpy(buffer, buf, len);
1531: while (c != 0) { /* tested bigname2.xml */
1532: if (len + 10 > max) {
1533: max *= 2;
1534: buffer = (xmlChar *) xmlRealloc(buffer,
1535: max * sizeof(xmlChar));
1536: if (buffer == NULL) {
1537: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1538: ctxt->sax->error(ctxt->userData,
1539: "xmlSplitQName: out of memory\n");
1540: return(NULL);
1541: }
1542: }
1543: buffer[len++] = c;
1544: c = *cur++;
1.122 daniel 1545: }
1.229 veillard 1546: buffer[len] = 0;
1547: }
1548:
1549: if (buffer == NULL)
1550: ret = xmlStrndup(buf, len);
1551: else {
1552: ret = buffer;
1553: }
1.45 daniel 1554: }
1555:
1.229 veillard 1556: return(ret);
1.45 daniel 1557: }
1558:
1.28 daniel 1559: /************************************************************************
1560: * *
1561: * The parser itself *
1562: * Relates to http://www.w3.org/TR/REC-xml *
1563: * *
1564: ************************************************************************/
1.14 veillard 1565:
1.50 daniel 1566: /**
1567: * xmlParseName:
1568: * @ctxt: an XML parser context
1569: *
1570: * parse an XML name.
1.22 daniel 1571: *
1572: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1573: * CombiningChar | Extender
1574: *
1575: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1576: *
1577: * [6] Names ::= Name (S Name)*
1.68 daniel 1578: *
1579: * Returns the Name parsed or NULL
1.1 veillard 1580: */
1581:
1.123 daniel 1582: xmlChar *
1.55 daniel 1583: xmlParseName(xmlParserCtxtPtr ctxt) {
1.160 daniel 1584: xmlChar buf[XML_MAX_NAMELEN + 5];
1585: int len = 0, l;
1586: int c;
1.222 veillard 1587: int count = 0;
1.1 veillard 1588:
1.91 daniel 1589: GROW;
1.160 daniel 1590: c = CUR_CHAR(l);
1.190 daniel 1591: if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1592: (!IS_LETTER(c) && (c != '_') &&
1593: (c != ':'))) {
1.91 daniel 1594: return(NULL);
1595: }
1.40 daniel 1596:
1.222 veillard 1597: while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1.190 daniel 1598: ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1599: (c == '.') || (c == '-') ||
1600: (c == '_') || (c == ':') ||
1601: (IS_COMBINING(c)) ||
1602: (IS_EXTENDER(c)))) {
1.222 veillard 1603: if (count++ > 100) {
1604: count = 0;
1605: GROW;
1606: }
1.160 daniel 1607: COPY_BUF(l,buf,len,c);
1608: NEXTL(l);
1609: c = CUR_CHAR(l);
1.91 daniel 1610: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1611: /*
1612: * Okay someone managed to make a huge name, so he's ready to pay
1613: * for the processing speed.
1614: */
1615: xmlChar *buffer;
1616: int max = len * 2;
1617:
1618: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1619: if (buffer == NULL) {
1620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1621: ctxt->sax->error(ctxt->userData,
1622: "xmlParseName: out of memory\n");
1623: return(NULL);
1624: }
1625: memcpy(buffer, buf, len);
1626: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1.160 daniel 1627: (c == '.') || (c == '-') ||
1628: (c == '_') || (c == ':') ||
1629: (IS_COMBINING(c)) ||
1630: (IS_EXTENDER(c))) {
1.222 veillard 1631: if (count++ > 100) {
1632: count = 0;
1633: GROW;
1634: }
1635: if (len + 10 > max) {
1636: max *= 2;
1637: buffer = (xmlChar *) xmlRealloc(buffer,
1638: max * sizeof(xmlChar));
1639: if (buffer == NULL) {
1640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1641: ctxt->sax->error(ctxt->userData,
1642: "xmlParseName: out of memory\n");
1643: return(NULL);
1644: }
1645: }
1646: COPY_BUF(l,buffer,len,c);
1.160 daniel 1647: NEXTL(l);
1648: c = CUR_CHAR(l);
1.97 daniel 1649: }
1.222 veillard 1650: buffer[len] = 0;
1651: return(buffer);
1.91 daniel 1652: }
1653: }
1654: return(xmlStrndup(buf, len));
1.22 daniel 1655: }
1656:
1.50 daniel 1657: /**
1.135 daniel 1658: * xmlParseStringName:
1659: * @ctxt: an XML parser context
1.229 veillard 1660: * @str: a pointer to the string pointer (IN/OUT)
1.135 daniel 1661: *
1662: * parse an XML name.
1663: *
1664: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1665: * CombiningChar | Extender
1666: *
1667: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1668: *
1669: * [6] Names ::= Name (S Name)*
1670: *
1671: * Returns the Name parsed or NULL. The str pointer
1672: * is updated to the current location in the string.
1673: */
1674:
1675: xmlChar *
1676: xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1.176 daniel 1677: xmlChar buf[XML_MAX_NAMELEN + 5];
1678: const xmlChar *cur = *str;
1679: int len = 0, l;
1680: int c;
1.135 daniel 1681:
1.176 daniel 1682: c = CUR_SCHAR(cur, l);
1683: if (!IS_LETTER(c) && (c != '_') &&
1684: (c != ':')) {
1.135 daniel 1685: return(NULL);
1686: }
1687:
1.222 veillard 1688: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1689: (c == '.') || (c == '-') ||
1690: (c == '_') || (c == ':') ||
1691: (IS_COMBINING(c)) ||
1692: (IS_EXTENDER(c))) {
1693: COPY_BUF(l,buf,len,c);
1694: cur += l;
1695: c = CUR_SCHAR(cur, l);
1.222 veillard 1696: if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1697: /*
1698: * Okay someone managed to make a huge name, so he's ready to pay
1699: * for the processing speed.
1700: */
1701: xmlChar *buffer;
1702: int max = len * 2;
1703:
1704: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1705: if (buffer == NULL) {
1706: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1707: ctxt->sax->error(ctxt->userData,
1708: "xmlParseStringName: out of memory\n");
1709: return(NULL);
1710: }
1711: memcpy(buffer, buf, len);
1712: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1.176 daniel 1713: (c == '.') || (c == '-') ||
1714: (c == '_') || (c == ':') ||
1715: (IS_COMBINING(c)) ||
1716: (IS_EXTENDER(c))) {
1.222 veillard 1717: if (len + 10 > max) {
1718: max *= 2;
1719: buffer = (xmlChar *) xmlRealloc(buffer,
1720: max * sizeof(xmlChar));
1721: if (buffer == NULL) {
1722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1723: ctxt->sax->error(ctxt->userData,
1724: "xmlParseStringName: out of memory\n");
1725: return(NULL);
1726: }
1727: }
1728: COPY_BUF(l,buffer,len,c);
1.176 daniel 1729: cur += l;
1730: c = CUR_SCHAR(cur, l);
1731: }
1.222 veillard 1732: buffer[len] = 0;
1733: *str = cur;
1734: return(buffer);
1.176 daniel 1735: }
1.135 daniel 1736: }
1.176 daniel 1737: *str = cur;
1738: return(xmlStrndup(buf, len));
1.135 daniel 1739: }
1740:
1741: /**
1.50 daniel 1742: * xmlParseNmtoken:
1743: * @ctxt: an XML parser context
1744: *
1745: * parse an XML Nmtoken.
1.22 daniel 1746: *
1747: * [7] Nmtoken ::= (NameChar)+
1748: *
1749: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1750: *
1751: * Returns the Nmtoken parsed or NULL
1.22 daniel 1752: */
1753:
1.123 daniel 1754: xmlChar *
1.55 daniel 1755: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.222 veillard 1756: xmlChar buf[XML_MAX_NAMELEN + 5];
1757: int len = 0, l;
1758: int c;
1759: int count = 0;
1.22 daniel 1760:
1.91 daniel 1761: GROW;
1.160 daniel 1762: c = CUR_CHAR(l);
1.222 veillard 1763:
1764: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1765: (c == '.') || (c == '-') ||
1766: (c == '_') || (c == ':') ||
1767: (IS_COMBINING(c)) ||
1768: (IS_EXTENDER(c))) {
1.222 veillard 1769: if (count++ > 100) {
1770: count = 0;
1771: GROW;
1772: }
1.160 daniel 1773: COPY_BUF(l,buf,len,c);
1774: NEXTL(l);
1775: c = CUR_CHAR(l);
1.91 daniel 1776: if (len >= XML_MAX_NAMELEN) {
1.222 veillard 1777: /*
1778: * Okay someone managed to make a huge token, so he's ready to pay
1779: * for the processing speed.
1780: */
1781: xmlChar *buffer;
1782: int max = len * 2;
1783:
1784: buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1785: if (buffer == NULL) {
1786: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1787: ctxt->sax->error(ctxt->userData,
1788: "xmlParseNmtoken: out of memory\n");
1789: return(NULL);
1790: }
1791: memcpy(buffer, buf, len);
1792: while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1.160 daniel 1793: (c == '.') || (c == '-') ||
1794: (c == '_') || (c == ':') ||
1795: (IS_COMBINING(c)) ||
1796: (IS_EXTENDER(c))) {
1.222 veillard 1797: if (count++ > 100) {
1798: count = 0;
1799: GROW;
1800: }
1801: if (len + 10 > max) {
1802: max *= 2;
1803: buffer = (xmlChar *) xmlRealloc(buffer,
1804: max * sizeof(xmlChar));
1805: if (buffer == NULL) {
1806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1807: ctxt->sax->error(ctxt->userData,
1808: "xmlParseName: out of memory\n");
1809: return(NULL);
1810: }
1811: }
1812: COPY_BUF(l,buffer,len,c);
1.160 daniel 1813: NEXTL(l);
1814: c = CUR_CHAR(l);
1815: }
1.222 veillard 1816: buffer[len] = 0;
1817: return(buffer);
1.91 daniel 1818: }
1819: }
1.168 daniel 1820: if (len == 0)
1821: return(NULL);
1.91 daniel 1822: return(xmlStrndup(buf, len));
1.1 veillard 1823: }
1824:
1.50 daniel 1825: /**
1826: * xmlParseEntityValue:
1827: * @ctxt: an XML parser context
1.78 daniel 1828: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1829: *
1.229 veillard 1830: * parse a value for ENTITY declarations
1.24 daniel 1831: *
1832: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1833: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1834: *
1.78 daniel 1835: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1836: */
1837:
1.123 daniel 1838: xmlChar *
1839: xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1.135 daniel 1840: xmlChar *buf = NULL;
1841: int len = 0;
1.140 daniel 1842: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 1843: int c, l;
1.135 daniel 1844: xmlChar stop;
1.123 daniel 1845: xmlChar *ret = NULL;
1.176 daniel 1846: const xmlChar *cur = NULL;
1.98 daniel 1847: xmlParserInputPtr input;
1.24 daniel 1848:
1.152 daniel 1849: if (RAW == '"') stop = '"';
1850: else if (RAW == '\'') stop = '\'';
1.135 daniel 1851: else {
1852: ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1854: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1855: ctxt->wellFormed = 0;
1.180 daniel 1856: ctxt->disableSAX = 1;
1.135 daniel 1857: return(NULL);
1858: }
1859: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1860: if (buf == NULL) {
1861: fprintf(stderr, "malloc of %d byte failed\n", size);
1862: return(NULL);
1863: }
1.94 daniel 1864:
1.135 daniel 1865: /*
1866: * The content of the entity definition is copied in a buffer.
1867: */
1.94 daniel 1868:
1.135 daniel 1869: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1870: input = ctxt->input;
1871: GROW;
1872: NEXT;
1.152 daniel 1873: c = CUR_CHAR(l);
1.135 daniel 1874: /*
1875: * NOTE: 4.4.5 Included in Literal
1876: * When a parameter entity reference appears in a literal entity
1877: * value, ... a single or double quote character in the replacement
1878: * text is always treated as a normal data character and will not
1879: * terminate the literal.
1880: * In practice it means we stop the loop only when back at parsing
1881: * the initial entity and the quote is found
1882: */
1.222 veillard 1883: while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1884: (ctxt->input != input))) {
1.152 daniel 1885: if (len + 5 >= size) {
1.135 daniel 1886: size *= 2;
1.204 veillard 1887: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 1888: if (buf == NULL) {
1889: fprintf(stderr, "realloc of %d byte failed\n", size);
1890: return(NULL);
1.94 daniel 1891: }
1.79 daniel 1892: }
1.152 daniel 1893: COPY_BUF(l,buf,len,c);
1894: NEXTL(l);
1.98 daniel 1895: /*
1.135 daniel 1896: * Pop-up of finished entities.
1.98 daniel 1897: */
1.222 veillard 1898: while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1.135 daniel 1899: xmlPopInput(ctxt);
1.152 daniel 1900:
1.221 veillard 1901: GROW;
1.152 daniel 1902: c = CUR_CHAR(l);
1.135 daniel 1903: if (c == 0) {
1.94 daniel 1904: GROW;
1.152 daniel 1905: c = CUR_CHAR(l);
1.79 daniel 1906: }
1.135 daniel 1907: }
1908: buf[len] = 0;
1909:
1910: /*
1.176 daniel 1911: * Raise problem w.r.t. '&' and '%' being used in non-entities
1912: * reference constructs. Note Charref will be handled in
1913: * xmlStringDecodeEntities()
1914: */
1915: cur = buf;
1.223 veillard 1916: while (*cur != 0) { /* non input consuming */
1.176 daniel 1917: if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1918: xmlChar *name;
1919: xmlChar tmp = *cur;
1920:
1921: cur++;
1922: name = xmlParseStringName(ctxt, &cur);
1923: if ((name == NULL) || (*cur != ';')) {
1.230 veillard 1924: ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1.176 daniel 1925: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1926: ctxt->sax->error(ctxt->userData,
1927: "EntityValue: '%c' forbidden except for entities references\n",
1928: tmp);
1929: ctxt->wellFormed = 0;
1.180 daniel 1930: ctxt->disableSAX = 1;
1.176 daniel 1931: }
1932: if ((ctxt->inSubset == 1) && (tmp == '%')) {
1.230 veillard 1933: ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1.176 daniel 1934: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1935: ctxt->sax->error(ctxt->userData,
1936: "EntityValue: PEReferences forbidden in internal subset\n",
1937: tmp);
1938: ctxt->wellFormed = 0;
1.180 daniel 1939: ctxt->disableSAX = 1;
1.176 daniel 1940: }
1941: if (name != NULL)
1942: xmlFree(name);
1943: }
1944: cur++;
1945: }
1946:
1947: /*
1.135 daniel 1948: * Then PEReference entities are substituted.
1949: */
1950: if (c != stop) {
1951: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 1952: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.135 daniel 1953: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1954: ctxt->wellFormed = 0;
1.180 daniel 1955: ctxt->disableSAX = 1;
1.170 daniel 1956: xmlFree(buf);
1.135 daniel 1957: } else {
1958: NEXT;
1959: /*
1960: * NOTE: 4.4.7 Bypassed
1961: * When a general entity reference appears in the EntityValue in
1962: * an entity declaration, it is bypassed and left as is.
1.176 daniel 1963: * so XML_SUBSTITUTE_REF is not set here.
1.135 daniel 1964: */
1965: ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1966: 0, 0, 0);
1967: if (orig != NULL)
1968: *orig = buf;
1969: else
1970: xmlFree(buf);
1.24 daniel 1971: }
1972:
1973: return(ret);
1974: }
1975:
1.50 daniel 1976: /**
1977: * xmlParseAttValue:
1978: * @ctxt: an XML parser context
1979: *
1980: * parse a value for an attribute
1.78 daniel 1981: * Note: the parser won't do substitution of entities here, this
1.113 daniel 1982: * will be handled later in xmlStringGetNodeList
1.29 daniel 1983: *
1984: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1985: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 1986: *
1.129 daniel 1987: * 3.3.3 Attribute-Value Normalization:
1988: * Before the value of an attribute is passed to the application or
1989: * checked for validity, the XML processor must normalize it as follows:
1990: * - a character reference is processed by appending the referenced
1991: * character to the attribute value
1992: * - an entity reference is processed by recursively processing the
1993: * replacement text of the entity
1994: * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
1995: * appending #x20 to the normalized value, except that only a single
1996: * #x20 is appended for a "#xD#xA" sequence that is part of an external
1997: * parsed entity or the literal entity value of an internal parsed entity
1998: * - other characters are processed by appending them to the normalized value
1.130 daniel 1999: * If the declared value is not CDATA, then the XML processor must further
2000: * process the normalized attribute value by discarding any leading and
2001: * trailing space (#x20) characters, and by replacing sequences of space
2002: * (#x20) characters by a single space (#x20) character.
2003: * All attributes for which no declaration has been read should be treated
2004: * by a non-validating parser as if declared CDATA.
1.129 daniel 2005: *
2006: * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
1.29 daniel 2007: */
2008:
1.123 daniel 2009: xmlChar *
1.55 daniel 2010: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.129 daniel 2011: xmlChar limit = 0;
1.198 daniel 2012: xmlChar *buf = NULL;
2013: int len = 0;
2014: int buf_size = 0;
2015: int c, l;
1.129 daniel 2016: xmlChar *current = NULL;
2017: xmlEntityPtr ent;
2018:
1.29 daniel 2019:
1.91 daniel 2020: SHRINK;
1.151 daniel 2021: if (NXT(0) == '"') {
1.96 daniel 2022: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.129 daniel 2023: limit = '"';
1.40 daniel 2024: NEXT;
1.151 daniel 2025: } else if (NXT(0) == '\'') {
1.129 daniel 2026: limit = '\'';
1.96 daniel 2027: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2028: NEXT;
1.29 daniel 2029: } else {
1.123 daniel 2030: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
1.55 daniel 2031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2032: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2033: ctxt->wellFormed = 0;
1.180 daniel 2034: ctxt->disableSAX = 1;
1.129 daniel 2035: return(NULL);
1.29 daniel 2036: }
2037:
1.129 daniel 2038: /*
2039: * allocate a translation buffer.
2040: */
1.198 daniel 2041: buf_size = XML_PARSER_BUFFER_SIZE;
2042: buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2043: if (buf == NULL) {
1.129 daniel 2044: perror("xmlParseAttValue: malloc failed");
2045: return(NULL);
2046: }
2047:
2048: /*
2049: * Ok loop until we reach one of the ending char or a size limit.
2050: */
1.198 daniel 2051: c = CUR_CHAR(l);
1.223 veillard 2052: while (((NXT(0) != limit) && /* checked */
2053: (c != '<')) || (ctxt->token != 0)) {
1.198 daniel 2054: if (c == 0) break;
1.205 veillard 2055: if (ctxt->token == '&') {
1.229 veillard 2056: /*
2057: * The reparsing will be done in xmlStringGetNodeList()
2058: * called by the attribute() function in SAX.c
2059: */
1.205 veillard 2060: static xmlChar buffer[6] = "&";
2061:
2062: if (len > buf_size - 10) {
2063: growBuffer(buf);
2064: }
2065: current = &buffer[0];
1.223 veillard 2066: while (*current != 0) { /* non input consuming */
1.205 veillard 2067: buf[len++] = *current++;
2068: }
2069: ctxt->token = 0;
2070: } else if ((c == '&') && (NXT(1) == '#')) {
1.129 daniel 2071: int val = xmlParseCharRef(ctxt);
1.229 veillard 2072: if (val == '&') {
2073: /*
2074: * The reparsing will be done in xmlStringGetNodeList()
2075: * called by the attribute() function in SAX.c
2076: */
2077: static xmlChar buffer[6] = "&";
2078:
2079: if (len > buf_size - 10) {
2080: growBuffer(buf);
2081: }
2082: current = &buffer[0];
2083: while (*current != 0) { /* non input consuming */
2084: buf[len++] = *current++;
2085: }
2086: } else {
1.239 veillard 2087: len += xmlCopyChar(0, &buf[len], val);
1.229 veillard 2088: }
1.198 daniel 2089: } else if (c == '&') {
1.129 daniel 2090: ent = xmlParseEntityRef(ctxt);
2091: if ((ent != NULL) &&
2092: (ctxt->replaceEntities != 0)) {
1.185 daniel 2093: xmlChar *rep;
2094:
1.186 daniel 2095: if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2096: rep = xmlStringDecodeEntities(ctxt, ent->content,
1.185 daniel 2097: XML_SUBSTITUTE_REF, 0, 0, 0);
1.186 daniel 2098: if (rep != NULL) {
2099: current = rep;
1.223 veillard 2100: while (*current != 0) { /* non input consuming */
1.198 daniel 2101: buf[len++] = *current++;
2102: if (len > buf_size - 10) {
2103: growBuffer(buf);
1.186 daniel 2104: }
1.185 daniel 2105: }
1.186 daniel 2106: xmlFree(rep);
1.129 daniel 2107: }
1.186 daniel 2108: } else {
2109: if (ent->content != NULL)
1.198 daniel 2110: buf[len++] = ent->content[0];
1.129 daniel 2111: }
2112: } else if (ent != NULL) {
2113: int i = xmlStrlen(ent->name);
2114: const xmlChar *cur = ent->name;
2115:
1.186 daniel 2116: /*
2117: * This may look absurd but is needed to detect
2118: * entities problems
2119: */
1.211 veillard 2120: if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2121: (ent->content != NULL)) {
1.186 daniel 2122: xmlChar *rep;
2123: rep = xmlStringDecodeEntities(ctxt, ent->content,
2124: XML_SUBSTITUTE_REF, 0, 0, 0);
2125: if (rep != NULL)
2126: xmlFree(rep);
2127: }
2128:
2129: /*
2130: * Just output the reference
2131: */
1.198 daniel 2132: buf[len++] = '&';
2133: if (len > buf_size - i - 10) {
2134: growBuffer(buf);
1.129 daniel 2135: }
2136: for (;i > 0;i--)
1.198 daniel 2137: buf[len++] = *cur++;
2138: buf[len++] = ';';
1.129 daniel 2139: }
2140: } else {
1.198 daniel 2141: if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2142: COPY_BUF(l,buf,len,0x20);
2143: if (len > buf_size - 10) {
2144: growBuffer(buf);
1.129 daniel 2145: }
2146: } else {
1.198 daniel 2147: COPY_BUF(l,buf,len,c);
2148: if (len > buf_size - 10) {
2149: growBuffer(buf);
1.129 daniel 2150: }
2151: }
1.198 daniel 2152: NEXTL(l);
1.129 daniel 2153: }
1.198 daniel 2154: GROW;
2155: c = CUR_CHAR(l);
1.129 daniel 2156: }
1.198 daniel 2157: buf[len++] = 0;
1.152 daniel 2158: if (RAW == '<') {
1.230 veillard 2159: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.129 daniel 2160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2161: ctxt->sax->error(ctxt->userData,
2162: "Unescaped '<' not allowed in attributes values\n");
2163: ctxt->wellFormed = 0;
1.180 daniel 2164: ctxt->disableSAX = 1;
1.152 daniel 2165: } else if (RAW != limit) {
1.230 veillard 2166: ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
1.129 daniel 2167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2168: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2169: ctxt->wellFormed = 0;
1.180 daniel 2170: ctxt->disableSAX = 1;
1.129 daniel 2171: } else
2172: NEXT;
1.198 daniel 2173: return(buf);
1.29 daniel 2174: }
2175:
1.50 daniel 2176: /**
2177: * xmlParseSystemLiteral:
2178: * @ctxt: an XML parser context
2179: *
2180: * parse an XML Literal
1.21 daniel 2181: *
1.22 daniel 2182: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2183: *
2184: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2185: */
2186:
1.123 daniel 2187: xmlChar *
1.55 daniel 2188: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2189: xmlChar *buf = NULL;
2190: int len = 0;
1.140 daniel 2191: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2192: int cur, l;
1.135 daniel 2193: xmlChar stop;
1.168 daniel 2194: int state = ctxt->instate;
1.223 veillard 2195: int count = 0;
1.21 daniel 2196:
1.91 daniel 2197: SHRINK;
1.152 daniel 2198: if (RAW == '"') {
1.40 daniel 2199: NEXT;
1.135 daniel 2200: stop = '"';
1.152 daniel 2201: } else if (RAW == '\'') {
1.40 daniel 2202: NEXT;
1.135 daniel 2203: stop = '\'';
1.21 daniel 2204: } else {
1.230 veillard 2205: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2206: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2207: ctxt->sax->error(ctxt->userData,
2208: "SystemLiteral \" or ' expected\n");
1.59 daniel 2209: ctxt->wellFormed = 0;
1.180 daniel 2210: ctxt->disableSAX = 1;
1.135 daniel 2211: return(NULL);
1.21 daniel 2212: }
2213:
1.135 daniel 2214: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2215: if (buf == NULL) {
2216: fprintf(stderr, "malloc of %d byte failed\n", size);
2217: return(NULL);
2218: }
1.168 daniel 2219: ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
1.152 daniel 2220: cur = CUR_CHAR(l);
1.223 veillard 2221: while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
1.152 daniel 2222: if (len + 5 >= size) {
1.135 daniel 2223: size *= 2;
1.204 veillard 2224: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2225: if (buf == NULL) {
2226: fprintf(stderr, "realloc of %d byte failed\n", size);
1.204 veillard 2227: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2228: return(NULL);
2229: }
2230: }
1.223 veillard 2231: count++;
2232: if (count > 50) {
2233: GROW;
2234: count = 0;
2235: }
1.152 daniel 2236: COPY_BUF(l,buf,len,cur);
2237: NEXTL(l);
2238: cur = CUR_CHAR(l);
1.135 daniel 2239: if (cur == 0) {
2240: GROW;
2241: SHRINK;
1.152 daniel 2242: cur = CUR_CHAR(l);
1.135 daniel 2243: }
2244: }
2245: buf[len] = 0;
1.204 veillard 2246: ctxt->instate = (xmlParserInputState) state;
1.135 daniel 2247: if (!IS_CHAR(cur)) {
1.230 veillard 2248: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2249: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2250: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2251: ctxt->wellFormed = 0;
1.180 daniel 2252: ctxt->disableSAX = 1;
1.135 daniel 2253: } else {
2254: NEXT;
2255: }
2256: return(buf);
1.21 daniel 2257: }
2258:
1.50 daniel 2259: /**
2260: * xmlParsePubidLiteral:
2261: * @ctxt: an XML parser context
1.21 daniel 2262: *
1.50 daniel 2263: * parse an XML public literal
1.68 daniel 2264: *
2265: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2266: *
2267: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2268: */
2269:
1.123 daniel 2270: xmlChar *
1.55 daniel 2271: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.135 daniel 2272: xmlChar *buf = NULL;
2273: int len = 0;
1.140 daniel 2274: int size = XML_PARSER_BUFFER_SIZE;
1.135 daniel 2275: xmlChar cur;
2276: xmlChar stop;
1.223 veillard 2277: int count = 0;
1.125 daniel 2278:
1.91 daniel 2279: SHRINK;
1.152 daniel 2280: if (RAW == '"') {
1.40 daniel 2281: NEXT;
1.135 daniel 2282: stop = '"';
1.152 daniel 2283: } else if (RAW == '\'') {
1.40 daniel 2284: NEXT;
1.135 daniel 2285: stop = '\'';
1.21 daniel 2286: } else {
1.230 veillard 2287: ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
1.55 daniel 2288: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2289: ctxt->sax->error(ctxt->userData,
2290: "SystemLiteral \" or ' expected\n");
1.59 daniel 2291: ctxt->wellFormed = 0;
1.180 daniel 2292: ctxt->disableSAX = 1;
1.135 daniel 2293: return(NULL);
2294: }
2295: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2296: if (buf == NULL) {
2297: fprintf(stderr, "malloc of %d byte failed\n", size);
2298: return(NULL);
2299: }
2300: cur = CUR;
1.223 veillard 2301: while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
1.135 daniel 2302: if (len + 1 >= size) {
2303: size *= 2;
1.204 veillard 2304: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2305: if (buf == NULL) {
2306: fprintf(stderr, "realloc of %d byte failed\n", size);
2307: return(NULL);
2308: }
2309: }
2310: buf[len++] = cur;
1.223 veillard 2311: count++;
2312: if (count > 50) {
2313: GROW;
2314: count = 0;
2315: }
1.135 daniel 2316: NEXT;
2317: cur = CUR;
2318: if (cur == 0) {
2319: GROW;
2320: SHRINK;
2321: cur = CUR;
2322: }
2323: }
2324: buf[len] = 0;
2325: if (cur != stop) {
1.230 veillard 2326: ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
1.135 daniel 2327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2328: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2329: ctxt->wellFormed = 0;
1.180 daniel 2330: ctxt->disableSAX = 1;
1.135 daniel 2331: } else {
2332: NEXT;
1.21 daniel 2333: }
1.135 daniel 2334: return(buf);
1.21 daniel 2335: }
2336:
1.50 daniel 2337: /**
2338: * xmlParseCharData:
2339: * @ctxt: an XML parser context
2340: * @cdata: int indicating whether we are within a CDATA section
2341: *
2342: * parse a CharData section.
2343: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2344: *
1.151 daniel 2345: * The right angle bracket (>) may be represented using the string ">",
2346: * and must, for compatibility, be escaped using ">" or a character
2347: * reference when it appears in the string "]]>" in content, when that
2348: * string is not marking the end of a CDATA section.
2349: *
1.27 daniel 2350: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2351: */
2352:
1.55 daniel 2353: void
2354: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.152 daniel 2355: xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
1.91 daniel 2356: int nbchar = 0;
1.152 daniel 2357: int cur, l;
1.223 veillard 2358: int count = 0;
1.27 daniel 2359:
1.91 daniel 2360: SHRINK;
1.223 veillard 2361: GROW;
1.152 daniel 2362: cur = CUR_CHAR(l);
1.223 veillard 2363: while (((cur != '<') || (ctxt->token == '<')) && /* checked */
1.190 daniel 2364: ((cur != '&') || (ctxt->token == '&')) &&
1.229 veillard 2365: (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
1.97 daniel 2366: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2367: (NXT(2) == '>')) {
2368: if (cdata) break;
2369: else {
1.230 veillard 2370: ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
1.59 daniel 2371: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.151 daniel 2372: ctxt->sax->error(ctxt->userData,
1.59 daniel 2373: "Sequence ']]>' not allowed in content\n");
1.151 daniel 2374: /* Should this be relaxed ??? I see a "must here */
2375: ctxt->wellFormed = 0;
1.180 daniel 2376: ctxt->disableSAX = 1;
1.59 daniel 2377: }
2378: }
1.152 daniel 2379: COPY_BUF(l,buf,nbchar,cur);
2380: if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
1.91 daniel 2381: /*
2382: * Ok the segment is to be consumed as chars.
2383: */
1.171 daniel 2384: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2385: if (areBlanks(ctxt, buf, nbchar)) {
2386: if (ctxt->sax->ignorableWhitespace != NULL)
2387: ctxt->sax->ignorableWhitespace(ctxt->userData,
2388: buf, nbchar);
2389: } else {
2390: if (ctxt->sax->characters != NULL)
2391: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2392: }
2393: }
2394: nbchar = 0;
2395: }
1.223 veillard 2396: count++;
2397: if (count > 50) {
2398: GROW;
2399: count = 0;
2400: }
1.152 daniel 2401: NEXTL(l);
2402: cur = CUR_CHAR(l);
1.27 daniel 2403: }
1.91 daniel 2404: if (nbchar != 0) {
2405: /*
2406: * Ok the segment is to be consumed as chars.
2407: */
1.171 daniel 2408: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.91 daniel 2409: if (areBlanks(ctxt, buf, nbchar)) {
2410: if (ctxt->sax->ignorableWhitespace != NULL)
2411: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2412: } else {
2413: if (ctxt->sax->characters != NULL)
2414: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2415: }
2416: }
1.45 daniel 2417: }
1.27 daniel 2418: }
2419:
1.50 daniel 2420: /**
2421: * xmlParseExternalID:
2422: * @ctxt: an XML parser context
1.123 daniel 2423: * @publicID: a xmlChar** receiving PubidLiteral
1.67 daniel 2424: * @strict: indicate whether we should restrict parsing to only
2425: * production [75], see NOTE below
1.50 daniel 2426: *
1.67 daniel 2427: * Parse an External ID or a Public ID
2428: *
2429: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2430: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2431: *
2432: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2433: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2434: *
2435: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2436: *
1.68 daniel 2437: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2438: * case publicID receives PubidLiteral, is strict is off
2439: * it is possible to return NULL and have publicID set.
1.22 daniel 2440: */
2441:
1.123 daniel 2442: xmlChar *
2443: xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2444: xmlChar *URI = NULL;
1.22 daniel 2445:
1.91 daniel 2446: SHRINK;
1.152 daniel 2447: if ((RAW == 'S') && (NXT(1) == 'Y') &&
1.40 daniel 2448: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2449: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2450: SKIP(6);
1.59 daniel 2451: if (!IS_BLANK(CUR)) {
1.230 veillard 2452: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2453: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2454: ctxt->sax->error(ctxt->userData,
1.59 daniel 2455: "Space required after 'SYSTEM'\n");
2456: ctxt->wellFormed = 0;
1.180 daniel 2457: ctxt->disableSAX = 1;
1.59 daniel 2458: }
1.42 daniel 2459: SKIP_BLANKS;
1.39 daniel 2460: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2461: if (URI == NULL) {
1.230 veillard 2462: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2464: ctxt->sax->error(ctxt->userData,
1.39 daniel 2465: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2466: ctxt->wellFormed = 0;
1.180 daniel 2467: ctxt->disableSAX = 1;
1.59 daniel 2468: }
1.152 daniel 2469: } else if ((RAW == 'P') && (NXT(1) == 'U') &&
1.40 daniel 2470: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2471: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2472: SKIP(6);
1.59 daniel 2473: if (!IS_BLANK(CUR)) {
1.230 veillard 2474: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2475: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2476: ctxt->sax->error(ctxt->userData,
1.59 daniel 2477: "Space required after 'PUBLIC'\n");
2478: ctxt->wellFormed = 0;
1.180 daniel 2479: ctxt->disableSAX = 1;
1.59 daniel 2480: }
1.42 daniel 2481: SKIP_BLANKS;
1.39 daniel 2482: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2483: if (*publicID == NULL) {
1.230 veillard 2484: ctxt->errNo = XML_ERR_PUBID_REQUIRED;
1.55 daniel 2485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2486: ctxt->sax->error(ctxt->userData,
1.39 daniel 2487: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2488: ctxt->wellFormed = 0;
1.180 daniel 2489: ctxt->disableSAX = 1;
1.59 daniel 2490: }
1.67 daniel 2491: if (strict) {
2492: /*
2493: * We don't handle [83] so "S SystemLiteral" is required.
2494: */
2495: if (!IS_BLANK(CUR)) {
1.230 veillard 2496: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2498: ctxt->sax->error(ctxt->userData,
1.67 daniel 2499: "Space required after the Public Identifier\n");
2500: ctxt->wellFormed = 0;
1.180 daniel 2501: ctxt->disableSAX = 1;
1.67 daniel 2502: }
2503: } else {
2504: /*
2505: * We handle [83] so we return immediately, if
2506: * "S SystemLiteral" is not detected. From a purely parsing
2507: * point of view that's a nice mess.
2508: */
1.135 daniel 2509: const xmlChar *ptr;
2510: GROW;
2511:
2512: ptr = CUR_PTR;
1.67 daniel 2513: if (!IS_BLANK(*ptr)) return(NULL);
2514:
1.223 veillard 2515: while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
1.173 daniel 2516: if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
1.59 daniel 2517: }
1.42 daniel 2518: SKIP_BLANKS;
1.39 daniel 2519: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2520: if (URI == NULL) {
1.230 veillard 2521: ctxt->errNo = XML_ERR_URI_REQUIRED;
1.55 daniel 2522: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2523: ctxt->sax->error(ctxt->userData,
1.39 daniel 2524: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2525: ctxt->wellFormed = 0;
1.180 daniel 2526: ctxt->disableSAX = 1;
1.59 daniel 2527: }
1.22 daniel 2528: }
1.39 daniel 2529: return(URI);
1.22 daniel 2530: }
2531:
1.50 daniel 2532: /**
2533: * xmlParseComment:
1.69 daniel 2534: * @ctxt: an XML parser context
1.50 daniel 2535: *
1.3 veillard 2536: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2537: * The spec says that "For compatibility, the string "--" (double-hyphen)
2538: * must not occur within comments. "
1.22 daniel 2539: *
2540: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2541: */
1.72 daniel 2542: void
1.114 daniel 2543: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.135 daniel 2544: xmlChar *buf = NULL;
1.195 daniel 2545: int len;
1.140 daniel 2546: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2547: int q, ql;
2548: int r, rl;
2549: int cur, l;
1.140 daniel 2550: xmlParserInputState state;
1.187 daniel 2551: xmlParserInputPtr input = ctxt->input;
1.223 veillard 2552: int count = 0;
1.3 veillard 2553:
2554: /*
1.22 daniel 2555: * Check that there is a comment right here.
1.3 veillard 2556: */
1.152 daniel 2557: if ((RAW != '<') || (NXT(1) != '!') ||
1.72 daniel 2558: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2559:
1.140 daniel 2560: state = ctxt->instate;
1.97 daniel 2561: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2562: SHRINK;
1.40 daniel 2563: SKIP(4);
1.135 daniel 2564: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2565: if (buf == NULL) {
2566: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2567: ctxt->instate = state;
1.135 daniel 2568: return;
2569: }
1.152 daniel 2570: q = CUR_CHAR(ql);
2571: NEXTL(ql);
2572: r = CUR_CHAR(rl);
2573: NEXTL(rl);
2574: cur = CUR_CHAR(l);
1.195 daniel 2575: len = 0;
1.223 veillard 2576: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2577: ((cur != '>') ||
2578: (r != '-') || (q != '-'))) {
1.195 daniel 2579: if ((r == '-') && (q == '-') && (len > 1)) {
1.230 veillard 2580: ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
1.55 daniel 2581: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2582: ctxt->sax->error(ctxt->userData,
1.38 daniel 2583: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2584: ctxt->wellFormed = 0;
1.180 daniel 2585: ctxt->disableSAX = 1;
1.59 daniel 2586: }
1.152 daniel 2587: if (len + 5 >= size) {
1.135 daniel 2588: size *= 2;
1.204 veillard 2589: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2590: if (buf == NULL) {
2591: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2592: ctxt->instate = state;
1.135 daniel 2593: return;
2594: }
2595: }
1.152 daniel 2596: COPY_BUF(ql,buf,len,q);
1.135 daniel 2597: q = r;
1.152 daniel 2598: ql = rl;
1.135 daniel 2599: r = cur;
1.152 daniel 2600: rl = l;
1.223 veillard 2601:
2602: count++;
2603: if (count > 50) {
2604: GROW;
2605: count = 0;
2606: }
1.152 daniel 2607: NEXTL(l);
2608: cur = CUR_CHAR(l);
1.135 daniel 2609: if (cur == 0) {
2610: SHRINK;
2611: GROW;
1.152 daniel 2612: cur = CUR_CHAR(l);
1.135 daniel 2613: }
1.3 veillard 2614: }
1.135 daniel 2615: buf[len] = 0;
2616: if (!IS_CHAR(cur)) {
1.230 veillard 2617: ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
1.55 daniel 2618: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2619: ctxt->sax->error(ctxt->userData,
1.135 daniel 2620: "Comment not terminated \n<!--%.50s\n", buf);
1.59 daniel 2621: ctxt->wellFormed = 0;
1.180 daniel 2622: ctxt->disableSAX = 1;
1.178 daniel 2623: xmlFree(buf);
1.3 veillard 2624: } else {
1.187 daniel 2625: if (input != ctxt->input) {
1.230 veillard 2626: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2628: ctxt->sax->error(ctxt->userData,
2629: "Comment doesn't start and stop in the same entity\n");
2630: ctxt->wellFormed = 0;
2631: ctxt->disableSAX = 1;
2632: }
1.40 daniel 2633: NEXT;
1.171 daniel 2634: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2635: (!ctxt->disableSAX))
1.135 daniel 2636: ctxt->sax->comment(ctxt->userData, buf);
2637: xmlFree(buf);
1.3 veillard 2638: }
1.140 daniel 2639: ctxt->instate = state;
1.3 veillard 2640: }
2641:
1.50 daniel 2642: /**
2643: * xmlParsePITarget:
2644: * @ctxt: an XML parser context
2645: *
2646: * parse the name of a PI
1.22 daniel 2647: *
2648: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2649: *
2650: * Returns the PITarget name or NULL
1.22 daniel 2651: */
2652:
1.123 daniel 2653: xmlChar *
1.55 daniel 2654: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.123 daniel 2655: xmlChar *name;
1.22 daniel 2656:
2657: name = xmlParseName(ctxt);
1.139 daniel 2658: if ((name != NULL) &&
1.22 daniel 2659: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2660: ((name[1] == 'm') || (name[1] == 'M')) &&
2661: ((name[2] == 'l') || (name[2] == 'L'))) {
1.139 daniel 2662: int i;
1.177 daniel 2663: if ((name[0] == 'x') && (name[1] == 'm') &&
2664: (name[2] == 'l') && (name[3] == 0)) {
1.230 veillard 2665: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2667: ctxt->sax->error(ctxt->userData,
2668: "XML declaration allowed only at the start of the document\n");
2669: ctxt->wellFormed = 0;
1.180 daniel 2670: ctxt->disableSAX = 1;
1.151 daniel 2671: return(name);
2672: } else if (name[3] == 0) {
1.230 veillard 2673: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.151 daniel 2674: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2675: ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2676: ctxt->wellFormed = 0;
1.180 daniel 2677: ctxt->disableSAX = 1;
1.151 daniel 2678: return(name);
2679: }
1.139 daniel 2680: for (i = 0;;i++) {
2681: if (xmlW3CPIs[i] == NULL) break;
1.236 veillard 2682: if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
1.139 daniel 2683: return(name);
2684: }
2685: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
1.230 veillard 2686: ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
1.139 daniel 2687: ctxt->sax->warning(ctxt->userData,
1.122 daniel 2688: "xmlParsePItarget: invalid name prefix 'xml'\n");
2689: }
1.22 daniel 2690: }
2691: return(name);
2692: }
2693:
1.50 daniel 2694: /**
2695: * xmlParsePI:
2696: * @ctxt: an XML parser context
2697: *
2698: * parse an XML Processing Instruction.
1.22 daniel 2699: *
2700: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2701: *
1.69 daniel 2702: * The processing is transfered to SAX once parsed.
1.3 veillard 2703: */
2704:
1.55 daniel 2705: void
2706: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.135 daniel 2707: xmlChar *buf = NULL;
2708: int len = 0;
1.140 daniel 2709: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 2710: int cur, l;
1.123 daniel 2711: xmlChar *target;
1.140 daniel 2712: xmlParserInputState state;
1.223 veillard 2713: int count = 0;
1.22 daniel 2714:
1.152 daniel 2715: if ((RAW == '<') && (NXT(1) == '?')) {
1.187 daniel 2716: xmlParserInputPtr input = ctxt->input;
1.140 daniel 2717: state = ctxt->instate;
2718: ctxt->instate = XML_PARSER_PI;
1.3 veillard 2719: /*
2720: * this is a Processing Instruction.
2721: */
1.40 daniel 2722: SKIP(2);
1.91 daniel 2723: SHRINK;
1.3 veillard 2724:
2725: /*
1.22 daniel 2726: * Parse the target name and check for special support like
2727: * namespace.
1.3 veillard 2728: */
1.22 daniel 2729: target = xmlParsePITarget(ctxt);
2730: if (target != NULL) {
1.156 daniel 2731: if ((RAW == '?') && (NXT(1) == '>')) {
1.187 daniel 2732: if (input != ctxt->input) {
1.230 veillard 2733: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2735: ctxt->sax->error(ctxt->userData,
2736: "PI declaration doesn't start and stop in the same entity\n");
2737: ctxt->wellFormed = 0;
2738: ctxt->disableSAX = 1;
2739: }
1.156 daniel 2740: SKIP(2);
2741:
2742: /*
2743: * SAX: PI detected.
2744: */
1.171 daniel 2745: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.156 daniel 2746: (ctxt->sax->processingInstruction != NULL))
2747: ctxt->sax->processingInstruction(ctxt->userData,
2748: target, NULL);
2749: ctxt->instate = state;
1.170 daniel 2750: xmlFree(target);
1.156 daniel 2751: return;
2752: }
1.135 daniel 2753: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2754: if (buf == NULL) {
2755: fprintf(stderr, "malloc of %d byte failed\n", size);
1.140 daniel 2756: ctxt->instate = state;
1.135 daniel 2757: return;
2758: }
2759: cur = CUR;
2760: if (!IS_BLANK(cur)) {
1.230 veillard 2761: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.114 daniel 2762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2763: ctxt->sax->error(ctxt->userData,
2764: "xmlParsePI: PI %s space expected\n", target);
2765: ctxt->wellFormed = 0;
1.180 daniel 2766: ctxt->disableSAX = 1;
1.114 daniel 2767: }
2768: SKIP_BLANKS;
1.152 daniel 2769: cur = CUR_CHAR(l);
1.223 veillard 2770: while (IS_CHAR(cur) && /* checked */
1.135 daniel 2771: ((cur != '?') || (NXT(1) != '>'))) {
1.152 daniel 2772: if (len + 5 >= size) {
1.135 daniel 2773: size *= 2;
1.204 veillard 2774: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 2775: if (buf == NULL) {
2776: fprintf(stderr, "realloc of %d byte failed\n", size);
1.140 daniel 2777: ctxt->instate = state;
1.135 daniel 2778: return;
2779: }
1.223 veillard 2780: }
2781: count++;
2782: if (count > 50) {
2783: GROW;
2784: count = 0;
1.135 daniel 2785: }
1.152 daniel 2786: COPY_BUF(l,buf,len,cur);
2787: NEXTL(l);
2788: cur = CUR_CHAR(l);
1.135 daniel 2789: if (cur == 0) {
2790: SHRINK;
2791: GROW;
1.152 daniel 2792: cur = CUR_CHAR(l);
1.135 daniel 2793: }
2794: }
2795: buf[len] = 0;
1.152 daniel 2796: if (cur != '?') {
1.230 veillard 2797: ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
1.72 daniel 2798: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2799: ctxt->sax->error(ctxt->userData,
1.72 daniel 2800: "xmlParsePI: PI %s never end ...\n", target);
2801: ctxt->wellFormed = 0;
1.180 daniel 2802: ctxt->disableSAX = 1;
1.22 daniel 2803: } else {
1.187 daniel 2804: if (input != ctxt->input) {
1.230 veillard 2805: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2806: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2807: ctxt->sax->error(ctxt->userData,
2808: "PI declaration doesn't start and stop in the same entity\n");
2809: ctxt->wellFormed = 0;
2810: ctxt->disableSAX = 1;
2811: }
1.72 daniel 2812: SKIP(2);
1.44 daniel 2813:
1.72 daniel 2814: /*
2815: * SAX: PI detected.
2816: */
1.171 daniel 2817: if ((ctxt->sax) && (!ctxt->disableSAX) &&
1.72 daniel 2818: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2819: ctxt->sax->processingInstruction(ctxt->userData,
1.135 daniel 2820: target, buf);
1.22 daniel 2821: }
1.135 daniel 2822: xmlFree(buf);
1.119 daniel 2823: xmlFree(target);
1.3 veillard 2824: } else {
1.230 veillard 2825: ctxt->errNo = XML_ERR_PI_NOT_STARTED;
1.55 daniel 2826: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2827: ctxt->sax->error(ctxt->userData,
2828: "xmlParsePI : no target name\n");
1.59 daniel 2829: ctxt->wellFormed = 0;
1.180 daniel 2830: ctxt->disableSAX = 1;
1.22 daniel 2831: }
1.140 daniel 2832: ctxt->instate = state;
1.22 daniel 2833: }
2834: }
2835:
1.50 daniel 2836: /**
2837: * xmlParseNotationDecl:
2838: * @ctxt: an XML parser context
2839: *
2840: * parse a notation declaration
1.22 daniel 2841: *
2842: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2843: *
2844: * Hence there is actually 3 choices:
2845: * 'PUBLIC' S PubidLiteral
2846: * 'PUBLIC' S PubidLiteral S SystemLiteral
2847: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2848: *
1.67 daniel 2849: * See the NOTE on xmlParseExternalID().
1.22 daniel 2850: */
2851:
1.55 daniel 2852: void
2853: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2854: xmlChar *name;
2855: xmlChar *Pubid;
2856: xmlChar *Systemid;
1.22 daniel 2857:
1.152 daniel 2858: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2859: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2860: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2861: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2862: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.187 daniel 2863: xmlParserInputPtr input = ctxt->input;
1.91 daniel 2864: SHRINK;
1.40 daniel 2865: SKIP(10);
1.67 daniel 2866: if (!IS_BLANK(CUR)) {
1.230 veillard 2867: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2868: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2869: ctxt->sax->error(ctxt->userData,
2870: "Space required after '<!NOTATION'\n");
1.67 daniel 2871: ctxt->wellFormed = 0;
1.180 daniel 2872: ctxt->disableSAX = 1;
1.67 daniel 2873: return;
2874: }
2875: SKIP_BLANKS;
1.22 daniel 2876:
2877: name = xmlParseName(ctxt);
2878: if (name == NULL) {
1.230 veillard 2879: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.55 daniel 2880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2881: ctxt->sax->error(ctxt->userData,
2882: "NOTATION: Name expected here\n");
1.67 daniel 2883: ctxt->wellFormed = 0;
1.180 daniel 2884: ctxt->disableSAX = 1;
1.67 daniel 2885: return;
2886: }
2887: if (!IS_BLANK(CUR)) {
1.230 veillard 2888: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.67 daniel 2889: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2890: ctxt->sax->error(ctxt->userData,
1.67 daniel 2891: "Space required after the NOTATION name'\n");
1.59 daniel 2892: ctxt->wellFormed = 0;
1.180 daniel 2893: ctxt->disableSAX = 1;
1.22 daniel 2894: return;
2895: }
1.42 daniel 2896: SKIP_BLANKS;
1.67 daniel 2897:
1.22 daniel 2898: /*
1.67 daniel 2899: * Parse the IDs.
1.22 daniel 2900: */
1.160 daniel 2901: Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
1.67 daniel 2902: SKIP_BLANKS;
2903:
1.152 daniel 2904: if (RAW == '>') {
1.187 daniel 2905: if (input != ctxt->input) {
1.230 veillard 2906: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 2907: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2908: ctxt->sax->error(ctxt->userData,
2909: "Notation declaration doesn't start and stop in the same entity\n");
2910: ctxt->wellFormed = 0;
2911: ctxt->disableSAX = 1;
2912: }
1.40 daniel 2913: NEXT;
1.171 daniel 2914: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2915: (ctxt->sax->notationDecl != NULL))
1.74 daniel 2916: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2917: } else {
1.230 veillard 2918: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.67 daniel 2919: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2920: ctxt->sax->error(ctxt->userData,
1.67 daniel 2921: "'>' required to close NOTATION declaration\n");
2922: ctxt->wellFormed = 0;
1.180 daniel 2923: ctxt->disableSAX = 1;
1.67 daniel 2924: }
1.119 daniel 2925: xmlFree(name);
2926: if (Systemid != NULL) xmlFree(Systemid);
2927: if (Pubid != NULL) xmlFree(Pubid);
1.22 daniel 2928: }
2929: }
2930:
1.50 daniel 2931: /**
2932: * xmlParseEntityDecl:
2933: * @ctxt: an XML parser context
2934: *
2935: * parse <!ENTITY declarations
1.22 daniel 2936: *
2937: * [70] EntityDecl ::= GEDecl | PEDecl
2938: *
2939: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2940: *
2941: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2942: *
2943: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2944: *
2945: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2946: *
2947: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2948: *
2949: * [ VC: Notation Declared ]
1.116 daniel 2950: * The Name must match the declared name of a notation.
1.22 daniel 2951: */
2952:
1.55 daniel 2953: void
2954: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 2955: xmlChar *name = NULL;
2956: xmlChar *value = NULL;
2957: xmlChar *URI = NULL, *literal = NULL;
2958: xmlChar *ndata = NULL;
1.39 daniel 2959: int isParameter = 0;
1.123 daniel 2960: xmlChar *orig = NULL;
1.22 daniel 2961:
1.94 daniel 2962: GROW;
1.152 daniel 2963: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 2964: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2965: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2966: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.187 daniel 2967: xmlParserInputPtr input = ctxt->input;
1.96 daniel 2968: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2969: SHRINK;
1.40 daniel 2970: SKIP(8);
1.59 daniel 2971: if (!IS_BLANK(CUR)) {
1.230 veillard 2972: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2973: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2974: ctxt->sax->error(ctxt->userData,
2975: "Space required after '<!ENTITY'\n");
1.59 daniel 2976: ctxt->wellFormed = 0;
1.180 daniel 2977: ctxt->disableSAX = 1;
1.59 daniel 2978: }
2979: SKIP_BLANKS;
1.40 daniel 2980:
1.152 daniel 2981: if (RAW == '%') {
1.40 daniel 2982: NEXT;
1.59 daniel 2983: if (!IS_BLANK(CUR)) {
1.230 veillard 2984: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 2985: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 2986: ctxt->sax->error(ctxt->userData,
2987: "Space required after '%'\n");
1.59 daniel 2988: ctxt->wellFormed = 0;
1.180 daniel 2989: ctxt->disableSAX = 1;
1.59 daniel 2990: }
1.42 daniel 2991: SKIP_BLANKS;
1.39 daniel 2992: isParameter = 1;
1.22 daniel 2993: }
2994:
2995: name = xmlParseName(ctxt);
1.24 daniel 2996: if (name == NULL) {
1.230 veillard 2997: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 2998: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2999: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3000: ctxt->wellFormed = 0;
1.180 daniel 3001: ctxt->disableSAX = 1;
1.24 daniel 3002: return;
3003: }
1.59 daniel 3004: if (!IS_BLANK(CUR)) {
1.230 veillard 3005: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3007: ctxt->sax->error(ctxt->userData,
1.59 daniel 3008: "Space required after the entity name\n");
3009: ctxt->wellFormed = 0;
1.180 daniel 3010: ctxt->disableSAX = 1;
1.59 daniel 3011: }
1.42 daniel 3012: SKIP_BLANKS;
1.24 daniel 3013:
1.22 daniel 3014: /*
1.68 daniel 3015: * handle the various case of definitions...
1.22 daniel 3016: */
1.39 daniel 3017: if (isParameter) {
1.225 veillard 3018: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3019: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3020: if (value) {
1.171 daniel 3021: if ((ctxt->sax != NULL) &&
3022: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3023: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3024: XML_INTERNAL_PARAMETER_ENTITY,
3025: NULL, NULL, value);
3026: }
1.225 veillard 3027: } else {
1.67 daniel 3028: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3029: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3030: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3032: ctxt->sax->error(ctxt->userData,
3033: "Entity value required\n");
3034: ctxt->wellFormed = 0;
1.180 daniel 3035: ctxt->disableSAX = 1;
1.169 daniel 3036: }
1.39 daniel 3037: if (URI) {
1.193 daniel 3038: xmlURIPtr uri;
3039:
3040: uri = xmlParseURI((const char *) URI);
3041: if (uri == NULL) {
1.230 veillard 3042: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3043: if ((ctxt->sax != NULL) &&
3044: (!ctxt->disableSAX) &&
3045: (ctxt->sax->error != NULL))
3046: ctxt->sax->error(ctxt->userData,
3047: "Invalid URI: %s\n", URI);
3048: ctxt->wellFormed = 0;
3049: } else {
3050: if (uri->fragment != NULL) {
1.230 veillard 3051: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3052: if ((ctxt->sax != NULL) &&
3053: (!ctxt->disableSAX) &&
3054: (ctxt->sax->error != NULL))
3055: ctxt->sax->error(ctxt->userData,
3056: "Fragment not allowed: %s\n", URI);
3057: ctxt->wellFormed = 0;
3058: } else {
3059: if ((ctxt->sax != NULL) &&
3060: (!ctxt->disableSAX) &&
3061: (ctxt->sax->entityDecl != NULL))
3062: ctxt->sax->entityDecl(ctxt->userData, name,
3063: XML_EXTERNAL_PARAMETER_ENTITY,
3064: literal, URI, NULL);
3065: }
3066: xmlFreeURI(uri);
3067: }
1.39 daniel 3068: }
1.24 daniel 3069: }
3070: } else {
1.152 daniel 3071: if ((RAW == '"') || (RAW == '\'')) {
1.78 daniel 3072: value = xmlParseEntityValue(ctxt, &orig);
1.171 daniel 3073: if ((ctxt->sax != NULL) &&
3074: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3075: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3076: XML_INTERNAL_GENERAL_ENTITY,
3077: NULL, NULL, value);
3078: } else {
1.67 daniel 3079: URI = xmlParseExternalID(ctxt, &literal, 1);
1.169 daniel 3080: if ((URI == NULL) && (literal == NULL)) {
1.230 veillard 3081: ctxt->errNo = XML_ERR_VALUE_REQUIRED;
1.169 daniel 3082: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3083: ctxt->sax->error(ctxt->userData,
3084: "Entity value required\n");
3085: ctxt->wellFormed = 0;
1.180 daniel 3086: ctxt->disableSAX = 1;
1.169 daniel 3087: }
1.193 daniel 3088: if (URI) {
3089: xmlURIPtr uri;
3090:
3091: uri = xmlParseURI((const char *)URI);
3092: if (uri == NULL) {
1.230 veillard 3093: ctxt->errNo = XML_ERR_INVALID_URI;
1.193 daniel 3094: if ((ctxt->sax != NULL) &&
3095: (!ctxt->disableSAX) &&
3096: (ctxt->sax->error != NULL))
3097: ctxt->sax->error(ctxt->userData,
3098: "Invalid URI: %s\n", URI);
3099: ctxt->wellFormed = 0;
3100: } else {
3101: if (uri->fragment != NULL) {
1.230 veillard 3102: ctxt->errNo = XML_ERR_URI_FRAGMENT;
1.193 daniel 3103: if ((ctxt->sax != NULL) &&
3104: (!ctxt->disableSAX) &&
3105: (ctxt->sax->error != NULL))
3106: ctxt->sax->error(ctxt->userData,
3107: "Fragment not allowed: %s\n", URI);
3108: ctxt->wellFormed = 0;
3109: }
3110: xmlFreeURI(uri);
3111: }
3112: }
1.152 daniel 3113: if ((RAW != '>') && (!IS_BLANK(CUR))) {
1.230 veillard 3114: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3116: ctxt->sax->error(ctxt->userData,
1.59 daniel 3117: "Space required before 'NDATA'\n");
3118: ctxt->wellFormed = 0;
1.180 daniel 3119: ctxt->disableSAX = 1;
1.59 daniel 3120: }
1.42 daniel 3121: SKIP_BLANKS;
1.152 daniel 3122: if ((RAW == 'N') && (NXT(1) == 'D') &&
1.40 daniel 3123: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3124: (NXT(4) == 'A')) {
3125: SKIP(5);
1.59 daniel 3126: if (!IS_BLANK(CUR)) {
1.230 veillard 3127: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3128: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3129: ctxt->sax->error(ctxt->userData,
1.59 daniel 3130: "Space required after 'NDATA'\n");
3131: ctxt->wellFormed = 0;
1.180 daniel 3132: ctxt->disableSAX = 1;
1.59 daniel 3133: }
1.42 daniel 3134: SKIP_BLANKS;
1.24 daniel 3135: ndata = xmlParseName(ctxt);
1.171 daniel 3136: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.116 daniel 3137: (ctxt->sax->unparsedEntityDecl != NULL))
3138: ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
1.39 daniel 3139: literal, URI, ndata);
3140: } else {
1.171 daniel 3141: if ((ctxt->sax != NULL) &&
3142: (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3143: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3144: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3145: literal, URI, NULL);
1.24 daniel 3146: }
3147: }
3148: }
1.42 daniel 3149: SKIP_BLANKS;
1.152 daniel 3150: if (RAW != '>') {
1.230 veillard 3151: ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1.55 daniel 3152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3153: ctxt->sax->error(ctxt->userData,
1.31 daniel 3154: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3155: ctxt->wellFormed = 0;
1.180 daniel 3156: ctxt->disableSAX = 1;
1.187 daniel 3157: } else {
3158: if (input != ctxt->input) {
1.230 veillard 3159: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161: ctxt->sax->error(ctxt->userData,
3162: "Entity declaration doesn't start and stop in the same entity\n");
3163: ctxt->wellFormed = 0;
3164: ctxt->disableSAX = 1;
3165: }
1.40 daniel 3166: NEXT;
1.187 daniel 3167: }
1.78 daniel 3168: if (orig != NULL) {
3169: /*
1.98 daniel 3170: * Ugly mechanism to save the raw entity value.
1.78 daniel 3171: */
3172: xmlEntityPtr cur = NULL;
3173:
1.98 daniel 3174: if (isParameter) {
3175: if ((ctxt->sax != NULL) &&
3176: (ctxt->sax->getParameterEntity != NULL))
1.120 daniel 3177: cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.98 daniel 3178: } else {
3179: if ((ctxt->sax != NULL) &&
3180: (ctxt->sax->getEntity != NULL))
1.120 daniel 3181: cur = ctxt->sax->getEntity(ctxt->userData, name);
1.98 daniel 3182: }
3183: if (cur != NULL) {
3184: if (cur->orig != NULL)
1.119 daniel 3185: xmlFree(orig);
1.98 daniel 3186: else
3187: cur->orig = orig;
3188: } else
1.119 daniel 3189: xmlFree(orig);
1.78 daniel 3190: }
1.119 daniel 3191: if (name != NULL) xmlFree(name);
3192: if (value != NULL) xmlFree(value);
3193: if (URI != NULL) xmlFree(URI);
3194: if (literal != NULL) xmlFree(literal);
3195: if (ndata != NULL) xmlFree(ndata);
1.22 daniel 3196: }
3197: }
3198:
1.50 daniel 3199: /**
1.59 daniel 3200: * xmlParseDefaultDecl:
3201: * @ctxt: an XML parser context
3202: * @value: Receive a possible fixed default value for the attribute
3203: *
3204: * Parse an attribute default declaration
3205: *
3206: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3207: *
1.99 daniel 3208: * [ VC: Required Attribute ]
1.117 daniel 3209: * if the default declaration is the keyword #REQUIRED, then the
3210: * attribute must be specified for all elements of the type in the
3211: * attribute-list declaration.
1.99 daniel 3212: *
3213: * [ VC: Attribute Default Legal ]
1.102 daniel 3214: * The declared default value must meet the lexical constraints of
3215: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3216: *
3217: * [ VC: Fixed Attribute Default ]
1.117 daniel 3218: * if an attribute has a default value declared with the #FIXED
3219: * keyword, instances of that attribute must match the default value.
1.99 daniel 3220: *
3221: * [ WFC: No < in Attribute Values ]
3222: * handled in xmlParseAttValue()
3223: *
1.59 daniel 3224: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3225: * or XML_ATTRIBUTE_FIXED.
3226: */
3227:
3228: int
1.123 daniel 3229: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
1.59 daniel 3230: int val;
1.123 daniel 3231: xmlChar *ret;
1.59 daniel 3232:
3233: *value = NULL;
1.152 daniel 3234: if ((RAW == '#') && (NXT(1) == 'R') &&
1.59 daniel 3235: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3236: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3237: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3238: (NXT(8) == 'D')) {
3239: SKIP(9);
3240: return(XML_ATTRIBUTE_REQUIRED);
3241: }
1.152 daniel 3242: if ((RAW == '#') && (NXT(1) == 'I') &&
1.59 daniel 3243: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3244: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3245: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3246: SKIP(8);
3247: return(XML_ATTRIBUTE_IMPLIED);
3248: }
3249: val = XML_ATTRIBUTE_NONE;
1.152 daniel 3250: if ((RAW == '#') && (NXT(1) == 'F') &&
1.59 daniel 3251: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3252: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3253: SKIP(6);
3254: val = XML_ATTRIBUTE_FIXED;
3255: if (!IS_BLANK(CUR)) {
1.230 veillard 3256: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3257: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3258: ctxt->sax->error(ctxt->userData,
3259: "Space required after '#FIXED'\n");
1.59 daniel 3260: ctxt->wellFormed = 0;
1.180 daniel 3261: ctxt->disableSAX = 1;
1.59 daniel 3262: }
3263: SKIP_BLANKS;
3264: }
3265: ret = xmlParseAttValue(ctxt);
1.96 daniel 3266: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3267: if (ret == NULL) {
3268: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3269: ctxt->sax->error(ctxt->userData,
1.59 daniel 3270: "Attribute default value declaration error\n");
3271: ctxt->wellFormed = 0;
1.180 daniel 3272: ctxt->disableSAX = 1;
1.59 daniel 3273: } else
3274: *value = ret;
3275: return(val);
3276: }
3277:
3278: /**
1.66 daniel 3279: * xmlParseNotationType:
3280: * @ctxt: an XML parser context
3281: *
3282: * parse an Notation attribute type.
3283: *
1.99 daniel 3284: * Note: the leading 'NOTATION' S part has already being parsed...
3285: *
1.66 daniel 3286: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3287: *
1.99 daniel 3288: * [ VC: Notation Attributes ]
1.117 daniel 3289: * Values of this type must match one of the notation names included
1.99 daniel 3290: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3291: *
3292: * Returns: the notation attribute tree built while parsing
3293: */
3294:
3295: xmlEnumerationPtr
3296: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3297: xmlChar *name;
1.66 daniel 3298: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3299:
1.152 daniel 3300: if (RAW != '(') {
1.230 veillard 3301: ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
1.66 daniel 3302: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3303: ctxt->sax->error(ctxt->userData,
3304: "'(' required to start 'NOTATION'\n");
1.66 daniel 3305: ctxt->wellFormed = 0;
1.180 daniel 3306: ctxt->disableSAX = 1;
1.66 daniel 3307: return(NULL);
3308: }
1.91 daniel 3309: SHRINK;
1.66 daniel 3310: do {
3311: NEXT;
3312: SKIP_BLANKS;
3313: name = xmlParseName(ctxt);
3314: if (name == NULL) {
1.230 veillard 3315: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.66 daniel 3316: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3317: ctxt->sax->error(ctxt->userData,
1.66 daniel 3318: "Name expected in NOTATION declaration\n");
3319: ctxt->wellFormed = 0;
1.180 daniel 3320: ctxt->disableSAX = 1;
1.66 daniel 3321: return(ret);
3322: }
3323: cur = xmlCreateEnumeration(name);
1.119 daniel 3324: xmlFree(name);
1.66 daniel 3325: if (cur == NULL) return(ret);
3326: if (last == NULL) ret = last = cur;
3327: else {
3328: last->next = cur;
3329: last = cur;
3330: }
3331: SKIP_BLANKS;
1.152 daniel 3332: } while (RAW == '|');
3333: if (RAW != ')') {
1.230 veillard 3334: ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
1.66 daniel 3335: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3336: ctxt->sax->error(ctxt->userData,
1.66 daniel 3337: "')' required to finish NOTATION declaration\n");
3338: ctxt->wellFormed = 0;
1.180 daniel 3339: ctxt->disableSAX = 1;
1.170 daniel 3340: if ((last != NULL) && (last != ret))
3341: xmlFreeEnumeration(last);
1.66 daniel 3342: return(ret);
3343: }
3344: NEXT;
3345: return(ret);
3346: }
3347:
3348: /**
3349: * xmlParseEnumerationType:
3350: * @ctxt: an XML parser context
3351: *
3352: * parse an Enumeration attribute type.
3353: *
3354: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3355: *
1.99 daniel 3356: * [ VC: Enumeration ]
1.117 daniel 3357: * Values of this type must match one of the Nmtoken tokens in
1.99 daniel 3358: * the declaration
3359: *
1.66 daniel 3360: * Returns: the enumeration attribute tree built while parsing
3361: */
3362:
3363: xmlEnumerationPtr
3364: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
1.123 daniel 3365: xmlChar *name;
1.66 daniel 3366: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3367:
1.152 daniel 3368: if (RAW != '(') {
1.230 veillard 3369: ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
1.66 daniel 3370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3371: ctxt->sax->error(ctxt->userData,
1.66 daniel 3372: "'(' required to start ATTLIST enumeration\n");
3373: ctxt->wellFormed = 0;
1.180 daniel 3374: ctxt->disableSAX = 1;
1.66 daniel 3375: return(NULL);
3376: }
1.91 daniel 3377: SHRINK;
1.66 daniel 3378: do {
3379: NEXT;
3380: SKIP_BLANKS;
3381: name = xmlParseNmtoken(ctxt);
3382: if (name == NULL) {
1.230 veillard 3383: ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
1.66 daniel 3384: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3385: ctxt->sax->error(ctxt->userData,
1.66 daniel 3386: "NmToken expected in ATTLIST enumeration\n");
3387: ctxt->wellFormed = 0;
1.180 daniel 3388: ctxt->disableSAX = 1;
1.66 daniel 3389: return(ret);
3390: }
3391: cur = xmlCreateEnumeration(name);
1.119 daniel 3392: xmlFree(name);
1.66 daniel 3393: if (cur == NULL) return(ret);
3394: if (last == NULL) ret = last = cur;
3395: else {
3396: last->next = cur;
3397: last = cur;
3398: }
3399: SKIP_BLANKS;
1.152 daniel 3400: } while (RAW == '|');
3401: if (RAW != ')') {
1.230 veillard 3402: ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
1.66 daniel 3403: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3404: ctxt->sax->error(ctxt->userData,
1.66 daniel 3405: "')' required to finish ATTLIST enumeration\n");
3406: ctxt->wellFormed = 0;
1.180 daniel 3407: ctxt->disableSAX = 1;
1.66 daniel 3408: return(ret);
3409: }
3410: NEXT;
3411: return(ret);
3412: }
3413:
3414: /**
1.50 daniel 3415: * xmlParseEnumeratedType:
3416: * @ctxt: an XML parser context
1.66 daniel 3417: * @tree: the enumeration tree built while parsing
1.50 daniel 3418: *
1.66 daniel 3419: * parse an Enumerated attribute type.
1.22 daniel 3420: *
3421: * [57] EnumeratedType ::= NotationType | Enumeration
3422: *
3423: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3424: *
1.50 daniel 3425: *
1.66 daniel 3426: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3427: */
3428:
1.66 daniel 3429: int
3430: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.152 daniel 3431: if ((RAW == 'N') && (NXT(1) == 'O') &&
1.66 daniel 3432: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3433: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3434: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3435: SKIP(8);
3436: if (!IS_BLANK(CUR)) {
1.230 veillard 3437: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.66 daniel 3438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3439: ctxt->sax->error(ctxt->userData,
3440: "Space required after 'NOTATION'\n");
1.66 daniel 3441: ctxt->wellFormed = 0;
1.180 daniel 3442: ctxt->disableSAX = 1;
1.66 daniel 3443: return(0);
3444: }
3445: SKIP_BLANKS;
3446: *tree = xmlParseNotationType(ctxt);
3447: if (*tree == NULL) return(0);
3448: return(XML_ATTRIBUTE_NOTATION);
3449: }
3450: *tree = xmlParseEnumerationType(ctxt);
3451: if (*tree == NULL) return(0);
3452: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3453: }
3454:
1.50 daniel 3455: /**
3456: * xmlParseAttributeType:
3457: * @ctxt: an XML parser context
1.66 daniel 3458: * @tree: the enumeration tree built while parsing
1.50 daniel 3459: *
1.59 daniel 3460: * parse the Attribute list def for an element
1.22 daniel 3461: *
3462: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3463: *
3464: * [55] StringType ::= 'CDATA'
3465: *
3466: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3467: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3468: *
1.102 daniel 3469: * Validity constraints for attribute values syntax are checked in
3470: * xmlValidateAttributeValue()
3471: *
1.99 daniel 3472: * [ VC: ID ]
1.117 daniel 3473: * Values of type ID must match the Name production. A name must not
1.99 daniel 3474: * appear more than once in an XML document as a value of this type;
3475: * i.e., ID values must uniquely identify the elements which bear them.
3476: *
3477: * [ VC: One ID per Element Type ]
1.117 daniel 3478: * No element type may have more than one ID attribute specified.
1.99 daniel 3479: *
3480: * [ VC: ID Attribute Default ]
1.117 daniel 3481: * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
1.99 daniel 3482: *
3483: * [ VC: IDREF ]
1.102 daniel 3484: * Values of type IDREF must match the Name production, and values
1.140 daniel 3485: * of type IDREFS must match Names; each IDREF Name must match the value
1.117 daniel 3486: * of an ID attribute on some element in the XML document; i.e. IDREF
1.99 daniel 3487: * values must match the value of some ID attribute.
3488: *
3489: * [ VC: Entity Name ]
1.102 daniel 3490: * Values of type ENTITY must match the Name production, values
1.140 daniel 3491: * of type ENTITIES must match Names; each Entity Name must match the
1.117 daniel 3492: * name of an unparsed entity declared in the DTD.
1.99 daniel 3493: *
3494: * [ VC: Name Token ]
1.102 daniel 3495: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3496: * of type NMTOKENS must match Nmtokens.
3497: *
1.69 daniel 3498: * Returns the attribute type
1.22 daniel 3499: */
1.59 daniel 3500: int
1.66 daniel 3501: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3502: SHRINK;
1.152 daniel 3503: if ((RAW == 'C') && (NXT(1) == 'D') &&
1.40 daniel 3504: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3505: (NXT(4) == 'A')) {
3506: SKIP(5);
1.66 daniel 3507: return(XML_ATTRIBUTE_CDATA);
1.152 daniel 3508: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.40 daniel 3509: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3510: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3511: SKIP(6);
3512: return(XML_ATTRIBUTE_IDREFS);
1.152 daniel 3513: } else if ((RAW == 'I') && (NXT(1) == 'D') &&
1.97 daniel 3514: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3515: (NXT(4) == 'F')) {
3516: SKIP(5);
1.59 daniel 3517: return(XML_ATTRIBUTE_IDREF);
1.152 daniel 3518: } else if ((RAW == 'I') && (NXT(1) == 'D')) {
1.66 daniel 3519: SKIP(2);
3520: return(XML_ATTRIBUTE_ID);
1.152 daniel 3521: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3522: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3523: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3524: SKIP(6);
1.59 daniel 3525: return(XML_ATTRIBUTE_ENTITY);
1.152 daniel 3526: } else if ((RAW == 'E') && (NXT(1) == 'N') &&
1.40 daniel 3527: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3528: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3529: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3530: SKIP(8);
1.59 daniel 3531: return(XML_ATTRIBUTE_ENTITIES);
1.152 daniel 3532: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.40 daniel 3533: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3534: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3535: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3536: SKIP(8);
3537: return(XML_ATTRIBUTE_NMTOKENS);
1.152 daniel 3538: } else if ((RAW == 'N') && (NXT(1) == 'M') &&
1.66 daniel 3539: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3540: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3541: (NXT(6) == 'N')) {
3542: SKIP(7);
1.59 daniel 3543: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3544: }
1.66 daniel 3545: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3546: }
3547:
1.50 daniel 3548: /**
3549: * xmlParseAttributeListDecl:
3550: * @ctxt: an XML parser context
3551: *
3552: * : parse the Attribute list def for an element
1.22 daniel 3553: *
3554: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3555: *
3556: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3557: *
1.22 daniel 3558: */
1.55 daniel 3559: void
3560: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 3561: xmlChar *elemName;
3562: xmlChar *attrName;
1.103 daniel 3563: xmlEnumerationPtr tree;
1.22 daniel 3564:
1.152 daniel 3565: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 3566: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3567: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3568: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3569: (NXT(8) == 'T')) {
1.187 daniel 3570: xmlParserInputPtr input = ctxt->input;
3571:
1.40 daniel 3572: SKIP(9);
1.59 daniel 3573: if (!IS_BLANK(CUR)) {
1.230 veillard 3574: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3576: ctxt->sax->error(ctxt->userData,
3577: "Space required after '<!ATTLIST'\n");
1.59 daniel 3578: ctxt->wellFormed = 0;
1.180 daniel 3579: ctxt->disableSAX = 1;
1.59 daniel 3580: }
1.42 daniel 3581: SKIP_BLANKS;
1.59 daniel 3582: elemName = xmlParseName(ctxt);
3583: if (elemName == NULL) {
1.230 veillard 3584: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 3585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3586: ctxt->sax->error(ctxt->userData,
3587: "ATTLIST: no name for Element\n");
1.59 daniel 3588: ctxt->wellFormed = 0;
1.180 daniel 3589: ctxt->disableSAX = 1;
1.22 daniel 3590: return;
3591: }
1.42 daniel 3592: SKIP_BLANKS;
1.220 veillard 3593: GROW;
1.152 daniel 3594: while (RAW != '>') {
1.123 daniel 3595: const xmlChar *check = CUR_PTR;
1.59 daniel 3596: int type;
3597: int def;
1.123 daniel 3598: xmlChar *defaultValue = NULL;
1.59 daniel 3599:
1.220 veillard 3600: GROW;
1.103 daniel 3601: tree = NULL;
1.59 daniel 3602: attrName = xmlParseName(ctxt);
3603: if (attrName == NULL) {
1.230 veillard 3604: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 3605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 3606: ctxt->sax->error(ctxt->userData,
3607: "ATTLIST: no name for Attribute\n");
1.59 daniel 3608: ctxt->wellFormed = 0;
1.180 daniel 3609: ctxt->disableSAX = 1;
1.59 daniel 3610: break;
3611: }
1.97 daniel 3612: GROW;
1.59 daniel 3613: if (!IS_BLANK(CUR)) {
1.230 veillard 3614: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3616: ctxt->sax->error(ctxt->userData,
1.59 daniel 3617: "Space required after the attribute name\n");
3618: ctxt->wellFormed = 0;
1.180 daniel 3619: ctxt->disableSAX = 1;
1.170 daniel 3620: if (attrName != NULL)
3621: xmlFree(attrName);
3622: if (defaultValue != NULL)
3623: xmlFree(defaultValue);
1.59 daniel 3624: break;
3625: }
3626: SKIP_BLANKS;
3627:
1.66 daniel 3628: type = xmlParseAttributeType(ctxt, &tree);
1.170 daniel 3629: if (type <= 0) {
3630: if (attrName != NULL)
3631: xmlFree(attrName);
3632: if (defaultValue != NULL)
3633: xmlFree(defaultValue);
3634: break;
3635: }
1.22 daniel 3636:
1.97 daniel 3637: GROW;
1.59 daniel 3638: if (!IS_BLANK(CUR)) {
1.230 veillard 3639: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3641: ctxt->sax->error(ctxt->userData,
1.59 daniel 3642: "Space required after the attribute type\n");
3643: ctxt->wellFormed = 0;
1.180 daniel 3644: ctxt->disableSAX = 1;
1.170 daniel 3645: if (attrName != NULL)
3646: xmlFree(attrName);
3647: if (defaultValue != NULL)
3648: xmlFree(defaultValue);
3649: if (tree != NULL)
3650: xmlFreeEnumeration(tree);
1.59 daniel 3651: break;
3652: }
1.42 daniel 3653: SKIP_BLANKS;
1.59 daniel 3654:
3655: def = xmlParseDefaultDecl(ctxt, &defaultValue);
1.170 daniel 3656: if (def <= 0) {
3657: if (attrName != NULL)
3658: xmlFree(attrName);
3659: if (defaultValue != NULL)
3660: xmlFree(defaultValue);
3661: if (tree != NULL)
3662: xmlFreeEnumeration(tree);
3663: break;
3664: }
1.59 daniel 3665:
1.97 daniel 3666: GROW;
1.152 daniel 3667: if (RAW != '>') {
1.59 daniel 3668: if (!IS_BLANK(CUR)) {
1.230 veillard 3669: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 3670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3671: ctxt->sax->error(ctxt->userData,
1.59 daniel 3672: "Space required after the attribute default value\n");
3673: ctxt->wellFormed = 0;
1.180 daniel 3674: ctxt->disableSAX = 1;
1.170 daniel 3675: if (attrName != NULL)
3676: xmlFree(attrName);
3677: if (defaultValue != NULL)
3678: xmlFree(defaultValue);
3679: if (tree != NULL)
3680: xmlFreeEnumeration(tree);
1.59 daniel 3681: break;
3682: }
3683: SKIP_BLANKS;
3684: }
1.40 daniel 3685: if (check == CUR_PTR) {
1.230 veillard 3686: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 3687: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3688: ctxt->sax->error(ctxt->userData,
1.59 daniel 3689: "xmlParseAttributeListDecl: detected internal error\n");
1.170 daniel 3690: if (attrName != NULL)
3691: xmlFree(attrName);
3692: if (defaultValue != NULL)
3693: xmlFree(defaultValue);
3694: if (tree != NULL)
3695: xmlFreeEnumeration(tree);
1.22 daniel 3696: break;
3697: }
1.171 daniel 3698: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3699: (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3700: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3701: type, def, defaultValue, tree);
1.59 daniel 3702: if (attrName != NULL)
1.119 daniel 3703: xmlFree(attrName);
1.59 daniel 3704: if (defaultValue != NULL)
1.119 daniel 3705: xmlFree(defaultValue);
1.97 daniel 3706: GROW;
1.22 daniel 3707: }
1.187 daniel 3708: if (RAW == '>') {
3709: if (input != ctxt->input) {
1.230 veillard 3710: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 3711: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3712: ctxt->sax->error(ctxt->userData,
3713: "Attribute list declaration doesn't start and stop in the same entity\n");
3714: ctxt->wellFormed = 0;
3715: ctxt->disableSAX = 1;
3716: }
1.40 daniel 3717: NEXT;
1.187 daniel 3718: }
1.22 daniel 3719:
1.119 daniel 3720: xmlFree(elemName);
1.22 daniel 3721: }
3722: }
3723:
1.50 daniel 3724: /**
1.61 daniel 3725: * xmlParseElementMixedContentDecl:
3726: * @ctxt: an XML parser context
3727: *
3728: * parse the declaration for a Mixed Element content
3729: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3730: *
3731: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3732: * '(' S? '#PCDATA' S? ')'
3733: *
1.99 daniel 3734: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3735: *
3736: * [ VC: No Duplicate Types ]
1.117 daniel 3737: * The same name must not appear more than once in a single
3738: * mixed-content declaration.
1.99 daniel 3739: *
1.61 daniel 3740: * returns: the list of the xmlElementContentPtr describing the element choices
3741: */
3742: xmlElementContentPtr
1.62 daniel 3743: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3744: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.123 daniel 3745: xmlChar *elem = NULL;
1.61 daniel 3746:
1.97 daniel 3747: GROW;
1.152 daniel 3748: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 3749: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3750: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3751: (NXT(6) == 'A')) {
3752: SKIP(7);
3753: SKIP_BLANKS;
1.91 daniel 3754: SHRINK;
1.152 daniel 3755: if (RAW == ')') {
1.187 daniel 3756: ctxt->entity = ctxt->input;
1.63 daniel 3757: NEXT;
3758: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
1.152 daniel 3759: if (RAW == '*') {
1.136 daniel 3760: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3761: NEXT;
3762: }
1.63 daniel 3763: return(ret);
3764: }
1.152 daniel 3765: if ((RAW == '(') || (RAW == '|')) {
1.61 daniel 3766: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3767: if (ret == NULL) return(NULL);
1.99 daniel 3768: }
1.152 daniel 3769: while (RAW == '|') {
1.64 daniel 3770: NEXT;
1.61 daniel 3771: if (elem == NULL) {
3772: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3773: if (ret == NULL) return(NULL);
3774: ret->c1 = cur;
1.64 daniel 3775: cur = ret;
1.61 daniel 3776: } else {
1.64 daniel 3777: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3778: if (n == NULL) return(NULL);
3779: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3780: cur->c2 = n;
3781: cur = n;
1.119 daniel 3782: xmlFree(elem);
1.61 daniel 3783: }
3784: SKIP_BLANKS;
3785: elem = xmlParseName(ctxt);
3786: if (elem == NULL) {
1.230 veillard 3787: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.61 daniel 3788: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3789: ctxt->sax->error(ctxt->userData,
1.61 daniel 3790: "xmlParseElementMixedContentDecl : Name expected\n");
3791: ctxt->wellFormed = 0;
1.180 daniel 3792: ctxt->disableSAX = 1;
1.61 daniel 3793: xmlFreeElementContent(cur);
3794: return(NULL);
3795: }
3796: SKIP_BLANKS;
1.97 daniel 3797: GROW;
1.61 daniel 3798: }
1.152 daniel 3799: if ((RAW == ')') && (NXT(1) == '*')) {
1.66 daniel 3800: if (elem != NULL) {
1.61 daniel 3801: cur->c2 = xmlNewElementContent(elem,
3802: XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 3803: xmlFree(elem);
1.66 daniel 3804: }
1.65 daniel 3805: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.187 daniel 3806: ctxt->entity = ctxt->input;
1.64 daniel 3807: SKIP(2);
1.61 daniel 3808: } else {
1.119 daniel 3809: if (elem != NULL) xmlFree(elem);
1.230 veillard 3810: xmlFreeElementContent(ret);
3811: ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
1.61 daniel 3812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3813: ctxt->sax->error(ctxt->userData,
1.63 daniel 3814: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3815: ctxt->wellFormed = 0;
1.180 daniel 3816: ctxt->disableSAX = 1;
1.61 daniel 3817: return(NULL);
3818: }
3819:
3820: } else {
1.230 veillard 3821: ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
1.61 daniel 3822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3823: ctxt->sax->error(ctxt->userData,
1.61 daniel 3824: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3825: ctxt->wellFormed = 0;
1.180 daniel 3826: ctxt->disableSAX = 1;
1.61 daniel 3827: }
3828: return(ret);
3829: }
3830:
3831: /**
3832: * xmlParseElementChildrenContentDecl:
1.50 daniel 3833: * @ctxt: an XML parser context
3834: *
1.61 daniel 3835: * parse the declaration for a Mixed Element content
3836: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3837: *
1.61 daniel 3838: *
1.22 daniel 3839: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3840: *
3841: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3842: *
3843: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3844: *
3845: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3846: *
1.99 daniel 3847: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3848: * TODO Parameter-entity replacement text must be properly nested
3849: * with parenthetized groups. That is to say, if either of the
3850: * opening or closing parentheses in a choice, seq, or Mixed
3851: * construct is contained in the replacement text for a parameter
3852: * entity, both must be contained in the same replacement text. For
3853: * interoperability, if a parameter-entity reference appears in a
3854: * choice, seq, or Mixed construct, its replacement text should not
3855: * be empty, and neither the first nor last non-blank character of
3856: * the replacement text should be a connector (| or ,).
3857: *
1.62 daniel 3858: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3859: * hierarchy.
3860: */
3861: xmlElementContentPtr
1.62 daniel 3862: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3863: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.123 daniel 3864: xmlChar *elem;
3865: xmlChar type = 0;
1.62 daniel 3866:
3867: SKIP_BLANKS;
1.94 daniel 3868: GROW;
1.152 daniel 3869: if (RAW == '(') {
1.63 daniel 3870: /* Recurse on first child */
1.62 daniel 3871: NEXT;
3872: SKIP_BLANKS;
3873: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3874: SKIP_BLANKS;
1.101 daniel 3875: GROW;
1.62 daniel 3876: } else {
3877: elem = xmlParseName(ctxt);
3878: if (elem == NULL) {
1.230 veillard 3879: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 3880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3881: ctxt->sax->error(ctxt->userData,
1.62 daniel 3882: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3883: ctxt->wellFormed = 0;
1.180 daniel 3884: ctxt->disableSAX = 1;
1.62 daniel 3885: return(NULL);
3886: }
3887: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3888: GROW;
1.152 daniel 3889: if (RAW == '?') {
1.104 daniel 3890: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3891: NEXT;
1.152 daniel 3892: } else if (RAW == '*') {
1.104 daniel 3893: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3894: NEXT;
1.152 daniel 3895: } else if (RAW == '+') {
1.104 daniel 3896: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3897: NEXT;
3898: } else {
1.104 daniel 3899: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3900: }
1.119 daniel 3901: xmlFree(elem);
1.101 daniel 3902: GROW;
1.62 daniel 3903: }
3904: SKIP_BLANKS;
1.91 daniel 3905: SHRINK;
1.152 daniel 3906: while (RAW != ')') {
1.63 daniel 3907: /*
3908: * Each loop we parse one separator and one element.
3909: */
1.152 daniel 3910: if (RAW == ',') {
1.62 daniel 3911: if (type == 0) type = CUR;
3912:
3913: /*
3914: * Detect "Name | Name , Name" error
3915: */
3916: else if (type != CUR) {
1.230 veillard 3917: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3919: ctxt->sax->error(ctxt->userData,
1.62 daniel 3920: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3921: type);
3922: ctxt->wellFormed = 0;
1.180 daniel 3923: ctxt->disableSAX = 1;
1.170 daniel 3924: if ((op != NULL) && (op != ret))
3925: xmlFreeElementContent(op);
1.211 veillard 3926: if ((last != NULL) && (last != ret) &&
3927: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3928: xmlFreeElementContent(last);
3929: if (ret != NULL)
3930: xmlFreeElementContent(ret);
1.62 daniel 3931: return(NULL);
3932: }
1.64 daniel 3933: NEXT;
1.62 daniel 3934:
1.63 daniel 3935: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3936: if (op == NULL) {
3937: xmlFreeElementContent(ret);
3938: return(NULL);
3939: }
3940: if (last == NULL) {
3941: op->c1 = ret;
1.65 daniel 3942: ret = cur = op;
1.63 daniel 3943: } else {
3944: cur->c2 = op;
3945: op->c1 = last;
3946: cur =op;
1.65 daniel 3947: last = NULL;
1.63 daniel 3948: }
1.152 daniel 3949: } else if (RAW == '|') {
1.62 daniel 3950: if (type == 0) type = CUR;
3951:
3952: /*
1.63 daniel 3953: * Detect "Name , Name | Name" error
1.62 daniel 3954: */
3955: else if (type != CUR) {
1.230 veillard 3956: ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
1.62 daniel 3957: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3958: ctxt->sax->error(ctxt->userData,
1.62 daniel 3959: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3960: type);
3961: ctxt->wellFormed = 0;
1.180 daniel 3962: ctxt->disableSAX = 1;
1.211 veillard 3963: if ((op != NULL) && (op != ret) && (op != last))
1.170 daniel 3964: xmlFreeElementContent(op);
1.211 veillard 3965: if ((last != NULL) && (last != ret) &&
3966: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3967: xmlFreeElementContent(last);
3968: if (ret != NULL)
3969: xmlFreeElementContent(ret);
1.62 daniel 3970: return(NULL);
3971: }
1.64 daniel 3972: NEXT;
1.62 daniel 3973:
1.63 daniel 3974: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3975: if (op == NULL) {
1.170 daniel 3976: if ((op != NULL) && (op != ret))
3977: xmlFreeElementContent(op);
1.211 veillard 3978: if ((last != NULL) && (last != ret) &&
3979: (last != ret->c1) && (last != ret->c2))
1.170 daniel 3980: xmlFreeElementContent(last);
3981: if (ret != NULL)
3982: xmlFreeElementContent(ret);
1.63 daniel 3983: return(NULL);
3984: }
3985: if (last == NULL) {
3986: op->c1 = ret;
1.65 daniel 3987: ret = cur = op;
1.63 daniel 3988: } else {
3989: cur->c2 = op;
3990: op->c1 = last;
3991: cur =op;
1.65 daniel 3992: last = NULL;
1.63 daniel 3993: }
1.62 daniel 3994: } else {
1.230 veillard 3995: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
1.62 daniel 3996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3997: ctxt->sax->error(ctxt->userData,
1.62 daniel 3998: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3999: ctxt->wellFormed = 0;
1.180 daniel 4000: ctxt->disableSAX = 1;
1.170 daniel 4001: if ((op != NULL) && (op != ret))
4002: xmlFreeElementContent(op);
1.211 veillard 4003: if ((last != NULL) && (last != ret) &&
4004: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4005: xmlFreeElementContent(last);
4006: if (ret != NULL)
4007: xmlFreeElementContent(ret);
1.62 daniel 4008: return(NULL);
4009: }
1.101 daniel 4010: GROW;
1.62 daniel 4011: SKIP_BLANKS;
1.101 daniel 4012: GROW;
1.152 daniel 4013: if (RAW == '(') {
1.63 daniel 4014: /* Recurse on second child */
1.62 daniel 4015: NEXT;
4016: SKIP_BLANKS;
1.65 daniel 4017: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 4018: SKIP_BLANKS;
4019: } else {
4020: elem = xmlParseName(ctxt);
4021: if (elem == NULL) {
1.230 veillard 4022: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.62 daniel 4023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4024: ctxt->sax->error(ctxt->userData,
1.122 daniel 4025: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
1.62 daniel 4026: ctxt->wellFormed = 0;
1.180 daniel 4027: ctxt->disableSAX = 1;
1.170 daniel 4028: if ((op != NULL) && (op != ret))
4029: xmlFreeElementContent(op);
1.211 veillard 4030: if ((last != NULL) && (last != ret) &&
4031: (last != ret->c1) && (last != ret->c2))
1.170 daniel 4032: xmlFreeElementContent(last);
4033: if (ret != NULL)
4034: xmlFreeElementContent(ret);
1.62 daniel 4035: return(NULL);
4036: }
1.65 daniel 4037: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.119 daniel 4038: xmlFree(elem);
1.152 daniel 4039: if (RAW == '?') {
1.105 daniel 4040: last->ocur = XML_ELEMENT_CONTENT_OPT;
4041: NEXT;
1.152 daniel 4042: } else if (RAW == '*') {
1.105 daniel 4043: last->ocur = XML_ELEMENT_CONTENT_MULT;
4044: NEXT;
1.152 daniel 4045: } else if (RAW == '+') {
1.105 daniel 4046: last->ocur = XML_ELEMENT_CONTENT_PLUS;
4047: NEXT;
4048: } else {
4049: last->ocur = XML_ELEMENT_CONTENT_ONCE;
4050: }
1.63 daniel 4051: }
4052: SKIP_BLANKS;
1.97 daniel 4053: GROW;
1.64 daniel 4054: }
1.65 daniel 4055: if ((cur != NULL) && (last != NULL)) {
4056: cur->c2 = last;
1.62 daniel 4057: }
1.187 daniel 4058: ctxt->entity = ctxt->input;
1.62 daniel 4059: NEXT;
1.152 daniel 4060: if (RAW == '?') {
1.62 daniel 4061: ret->ocur = XML_ELEMENT_CONTENT_OPT;
4062: NEXT;
1.152 daniel 4063: } else if (RAW == '*') {
1.62 daniel 4064: ret->ocur = XML_ELEMENT_CONTENT_MULT;
4065: NEXT;
1.152 daniel 4066: } else if (RAW == '+') {
1.62 daniel 4067: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4068: NEXT;
4069: }
4070: return(ret);
1.61 daniel 4071: }
4072:
4073: /**
4074: * xmlParseElementContentDecl:
4075: * @ctxt: an XML parser context
4076: * @name: the name of the element being defined.
4077: * @result: the Element Content pointer will be stored here if any
1.22 daniel 4078: *
1.61 daniel 4079: * parse the declaration for an Element content either Mixed or Children,
4080: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4081: *
4082: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 4083: *
1.61 daniel 4084: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 4085: */
4086:
1.61 daniel 4087: int
1.123 daniel 4088: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
1.61 daniel 4089: xmlElementContentPtr *result) {
4090:
4091: xmlElementContentPtr tree = NULL;
1.187 daniel 4092: xmlParserInputPtr input = ctxt->input;
1.61 daniel 4093: int res;
4094:
4095: *result = NULL;
4096:
1.152 daniel 4097: if (RAW != '(') {
1.230 veillard 4098: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.61 daniel 4099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4100: ctxt->sax->error(ctxt->userData,
1.61 daniel 4101: "xmlParseElementContentDecl : '(' expected\n");
4102: ctxt->wellFormed = 0;
1.180 daniel 4103: ctxt->disableSAX = 1;
1.61 daniel 4104: return(-1);
4105: }
4106: NEXT;
1.97 daniel 4107: GROW;
1.61 daniel 4108: SKIP_BLANKS;
1.152 daniel 4109: if ((RAW == '#') && (NXT(1) == 'P') &&
1.61 daniel 4110: (NXT(2) == 'C') && (NXT(3) == 'D') &&
4111: (NXT(4) == 'A') && (NXT(5) == 'T') &&
4112: (NXT(6) == 'A')) {
1.62 daniel 4113: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 4114: res = XML_ELEMENT_TYPE_MIXED;
4115: } else {
1.62 daniel 4116: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 4117: res = XML_ELEMENT_TYPE_ELEMENT;
4118: }
1.187 daniel 4119: if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
1.230 veillard 4120: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4122: ctxt->sax->error(ctxt->userData,
4123: "Element content declaration doesn't start and stop in the same entity\n");
4124: ctxt->wellFormed = 0;
4125: ctxt->disableSAX = 1;
4126: }
1.61 daniel 4127: SKIP_BLANKS;
1.63 daniel 4128: *result = tree;
1.61 daniel 4129: return(res);
1.22 daniel 4130: }
4131:
1.50 daniel 4132: /**
4133: * xmlParseElementDecl:
4134: * @ctxt: an XML parser context
4135: *
4136: * parse an Element declaration.
1.22 daniel 4137: *
4138: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4139: *
1.99 daniel 4140: * [ VC: Unique Element Type Declaration ]
1.117 daniel 4141: * No element type may be declared more than once
1.69 daniel 4142: *
4143: * Returns the type of the element, or -1 in case of error
1.22 daniel 4144: */
1.59 daniel 4145: int
1.55 daniel 4146: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4147: xmlChar *name;
1.59 daniel 4148: int ret = -1;
1.61 daniel 4149: xmlElementContentPtr content = NULL;
1.22 daniel 4150:
1.97 daniel 4151: GROW;
1.152 daniel 4152: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 4153: (NXT(2) == 'E') && (NXT(3) == 'L') &&
4154: (NXT(4) == 'E') && (NXT(5) == 'M') &&
4155: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 4156: (NXT(8) == 'T')) {
1.187 daniel 4157: xmlParserInputPtr input = ctxt->input;
4158:
1.40 daniel 4159: SKIP(9);
1.59 daniel 4160: if (!IS_BLANK(CUR)) {
1.230 veillard 4161: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4163: ctxt->sax->error(ctxt->userData,
1.59 daniel 4164: "Space required after 'ELEMENT'\n");
4165: ctxt->wellFormed = 0;
1.180 daniel 4166: ctxt->disableSAX = 1;
1.59 daniel 4167: }
1.42 daniel 4168: SKIP_BLANKS;
1.22 daniel 4169: name = xmlParseName(ctxt);
4170: if (name == NULL) {
1.230 veillard 4171: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4173: ctxt->sax->error(ctxt->userData,
1.59 daniel 4174: "xmlParseElementDecl: no name for Element\n");
4175: ctxt->wellFormed = 0;
1.180 daniel 4176: ctxt->disableSAX = 1;
1.59 daniel 4177: return(-1);
4178: }
4179: if (!IS_BLANK(CUR)) {
1.230 veillard 4180: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 4181: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4182: ctxt->sax->error(ctxt->userData,
1.59 daniel 4183: "Space required after the element name\n");
4184: ctxt->wellFormed = 0;
1.180 daniel 4185: ctxt->disableSAX = 1;
1.22 daniel 4186: }
1.42 daniel 4187: SKIP_BLANKS;
1.152 daniel 4188: if ((RAW == 'E') && (NXT(1) == 'M') &&
1.40 daniel 4189: (NXT(2) == 'P') && (NXT(3) == 'T') &&
4190: (NXT(4) == 'Y')) {
4191: SKIP(5);
1.22 daniel 4192: /*
4193: * Element must always be empty.
4194: */
1.59 daniel 4195: ret = XML_ELEMENT_TYPE_EMPTY;
1.152 daniel 4196: } else if ((RAW == 'A') && (NXT(1) == 'N') &&
1.40 daniel 4197: (NXT(2) == 'Y')) {
4198: SKIP(3);
1.22 daniel 4199: /*
4200: * Element is a generic container.
4201: */
1.59 daniel 4202: ret = XML_ELEMENT_TYPE_ANY;
1.152 daniel 4203: } else if (RAW == '(') {
1.61 daniel 4204: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4205: } else {
1.98 daniel 4206: /*
4207: * [ WFC: PEs in Internal Subset ] error handling.
4208: */
1.152 daniel 4209: if ((RAW == '%') && (ctxt->external == 0) &&
1.98 daniel 4210: (ctxt->inputNr == 1)) {
1.230 veillard 4211: ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
1.98 daniel 4212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4213: ctxt->sax->error(ctxt->userData,
4214: "PEReference: forbidden within markup decl in internal subset\n");
4215: } else {
1.230 veillard 4216: ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
1.98 daniel 4217: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4218: ctxt->sax->error(ctxt->userData,
4219: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4220: }
1.61 daniel 4221: ctxt->wellFormed = 0;
1.180 daniel 4222: ctxt->disableSAX = 1;
1.119 daniel 4223: if (name != NULL) xmlFree(name);
1.61 daniel 4224: return(-1);
1.22 daniel 4225: }
1.142 daniel 4226:
4227: SKIP_BLANKS;
4228: /*
4229: * Pop-up of finished entities.
4230: */
1.152 daniel 4231: while ((RAW == 0) && (ctxt->inputNr > 1))
1.142 daniel 4232: xmlPopInput(ctxt);
1.42 daniel 4233: SKIP_BLANKS;
1.142 daniel 4234:
1.152 daniel 4235: if (RAW != '>') {
1.230 veillard 4236: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 4237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4238: ctxt->sax->error(ctxt->userData,
1.31 daniel 4239: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4240: ctxt->wellFormed = 0;
1.180 daniel 4241: ctxt->disableSAX = 1;
1.61 daniel 4242: } else {
1.187 daniel 4243: if (input != ctxt->input) {
1.230 veillard 4244: ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
1.187 daniel 4245: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4246: ctxt->sax->error(ctxt->userData,
4247: "Element declaration doesn't start and stop in the same entity\n");
4248: ctxt->wellFormed = 0;
4249: ctxt->disableSAX = 1;
4250: }
4251:
1.40 daniel 4252: NEXT;
1.171 daniel 4253: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4254: (ctxt->sax->elementDecl != NULL))
1.76 daniel 4255: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4256: content);
1.61 daniel 4257: }
1.84 daniel 4258: if (content != NULL) {
4259: xmlFreeElementContent(content);
4260: }
1.61 daniel 4261: if (name != NULL) {
1.119 daniel 4262: xmlFree(name);
1.61 daniel 4263: }
1.22 daniel 4264: }
1.59 daniel 4265: return(ret);
1.22 daniel 4266: }
4267:
1.50 daniel 4268: /**
4269: * xmlParseMarkupDecl:
4270: * @ctxt: an XML parser context
4271: *
4272: * parse Markup declarations
1.22 daniel 4273: *
4274: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4275: * NotationDecl | PI | Comment
4276: *
1.98 daniel 4277: * [ VC: Proper Declaration/PE Nesting ]
1.229 veillard 4278: * Parameter-entity replacement text must be properly nested with
1.98 daniel 4279: * markup declarations. That is to say, if either the first character
4280: * or the last character of a markup declaration (markupdecl above) is
4281: * contained in the replacement text for a parameter-entity reference,
4282: * both must be contained in the same replacement text.
4283: *
4284: * [ WFC: PEs in Internal Subset ]
4285: * In the internal DTD subset, parameter-entity references can occur
4286: * only where markup declarations can occur, not within markup declarations.
4287: * (This does not apply to references that occur in external parameter
4288: * entities or to the external subset.)
1.22 daniel 4289: */
1.55 daniel 4290: void
4291: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4292: GROW;
1.22 daniel 4293: xmlParseElementDecl(ctxt);
4294: xmlParseAttributeListDecl(ctxt);
4295: xmlParseEntityDecl(ctxt);
4296: xmlParseNotationDecl(ctxt);
4297: xmlParsePI(ctxt);
1.114 daniel 4298: xmlParseComment(ctxt);
1.98 daniel 4299: /*
4300: * This is only for internal subset. On external entities,
4301: * the replacement is done before parsing stage
4302: */
4303: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4304: xmlParsePEReference(ctxt);
1.97 daniel 4305: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4306: }
4307:
1.50 daniel 4308: /**
1.76 daniel 4309: * xmlParseTextDecl:
4310: * @ctxt: an XML parser context
4311: *
4312: * parse an XML declaration header for external entities
4313: *
4314: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1.176 daniel 4315: *
4316: * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
1.76 daniel 4317: */
4318:
1.172 daniel 4319: void
1.76 daniel 4320: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 4321: xmlChar *version;
1.76 daniel 4322:
4323: /*
4324: * We know that '<?xml' is here.
4325: */
1.193 daniel 4326: if ((RAW == '<') && (NXT(1) == '?') &&
4327: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4328: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4329: SKIP(5);
4330: } else {
1.230 veillard 4331: ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
1.193 daniel 4332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4333: ctxt->sax->error(ctxt->userData,
4334: "Text declaration '<?xml' required\n");
4335: ctxt->wellFormed = 0;
4336: ctxt->disableSAX = 1;
4337:
4338: return;
4339: }
1.76 daniel 4340:
4341: if (!IS_BLANK(CUR)) {
1.230 veillard 4342: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4343: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4344: ctxt->sax->error(ctxt->userData,
4345: "Space needed after '<?xml'\n");
1.76 daniel 4346: ctxt->wellFormed = 0;
1.180 daniel 4347: ctxt->disableSAX = 1;
1.76 daniel 4348: }
4349: SKIP_BLANKS;
4350:
4351: /*
4352: * We may have the VersionInfo here.
4353: */
4354: version = xmlParseVersionInfo(ctxt);
4355: if (version == NULL)
4356: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.172 daniel 4357: ctxt->input->version = version;
1.76 daniel 4358:
4359: /*
4360: * We must have the encoding declaration
4361: */
4362: if (!IS_BLANK(CUR)) {
1.230 veillard 4363: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.76 daniel 4364: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4365: ctxt->sax->error(ctxt->userData, "Space needed here\n");
1.76 daniel 4366: ctxt->wellFormed = 0;
1.180 daniel 4367: ctxt->disableSAX = 1;
1.76 daniel 4368: }
1.195 daniel 4369: xmlParseEncodingDecl(ctxt);
1.193 daniel 4370: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4371: /*
4372: * The XML REC instructs us to stop parsing right here
4373: */
4374: return;
4375: }
1.76 daniel 4376:
4377: SKIP_BLANKS;
1.152 daniel 4378: if ((RAW == '?') && (NXT(1) == '>')) {
1.76 daniel 4379: SKIP(2);
1.152 daniel 4380: } else if (RAW == '>') {
1.76 daniel 4381: /* Deprecated old WD ... */
1.230 veillard 4382: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4383: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4384: ctxt->sax->error(ctxt->userData,
4385: "XML declaration must end-up with '?>'\n");
1.76 daniel 4386: ctxt->wellFormed = 0;
1.180 daniel 4387: ctxt->disableSAX = 1;
1.76 daniel 4388: NEXT;
4389: } else {
1.230 veillard 4390: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.76 daniel 4391: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 4392: ctxt->sax->error(ctxt->userData,
4393: "parsing XML declaration: '?>' expected\n");
1.76 daniel 4394: ctxt->wellFormed = 0;
1.180 daniel 4395: ctxt->disableSAX = 1;
1.76 daniel 4396: MOVETO_ENDTAG(CUR_PTR);
4397: NEXT;
4398: }
4399: }
4400:
4401: /*
4402: * xmlParseConditionalSections
4403: * @ctxt: an XML parser context
4404: *
4405: * [61] conditionalSect ::= includeSect | ignoreSect
4406: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4407: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4408: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4409: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4410: */
4411:
4412: void
4413: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
1.165 daniel 4414: SKIP(3);
4415: SKIP_BLANKS;
1.168 daniel 4416: if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4417: (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4418: (NXT(6) == 'E')) {
1.165 daniel 4419: SKIP(7);
1.168 daniel 4420: SKIP_BLANKS;
4421: if (RAW != '[') {
1.230 veillard 4422: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4423: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4424: ctxt->sax->error(ctxt->userData,
4425: "XML conditional section '[' expected\n");
4426: ctxt->wellFormed = 0;
1.180 daniel 4427: ctxt->disableSAX = 1;
1.168 daniel 4428: } else {
4429: NEXT;
4430: }
1.220 veillard 4431: if (xmlParserDebugEntities) {
4432: if ((ctxt->input != NULL) && (ctxt->input->filename))
4433: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4434: ctxt->input->line);
4435: fprintf(stderr, "Entering INCLUDE Conditional Section\n");
4436: }
4437:
1.165 daniel 4438: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4439: (NXT(2) != '>'))) {
4440: const xmlChar *check = CUR_PTR;
4441: int cons = ctxt->input->consumed;
4442: int tok = ctxt->token;
4443:
4444: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4445: xmlParseConditionalSections(ctxt);
4446: } else if (IS_BLANK(CUR)) {
4447: NEXT;
4448: } else if (RAW == '%') {
4449: xmlParsePEReference(ctxt);
4450: } else
4451: xmlParseMarkupDecl(ctxt);
4452:
4453: /*
4454: * Pop-up of finished entities.
4455: */
4456: while ((RAW == 0) && (ctxt->inputNr > 1))
4457: xmlPopInput(ctxt);
4458:
4459: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4460: (tok == ctxt->token)) {
1.230 veillard 4461: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.165 daniel 4462: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4463: ctxt->sax->error(ctxt->userData,
4464: "Content error in the external subset\n");
4465: ctxt->wellFormed = 0;
1.180 daniel 4466: ctxt->disableSAX = 1;
1.165 daniel 4467: break;
4468: }
4469: }
1.220 veillard 4470: if (xmlParserDebugEntities) {
4471: if ((ctxt->input != NULL) && (ctxt->input->filename))
4472: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4473: ctxt->input->line);
4474: fprintf(stderr, "Leaving INCLUDE Conditional Section\n");
4475: }
4476:
1.168 daniel 4477: } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4478: (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
1.171 daniel 4479: int state;
4480:
1.168 daniel 4481: SKIP(6);
4482: SKIP_BLANKS;
4483: if (RAW != '[') {
1.230 veillard 4484: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4485: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4486: ctxt->sax->error(ctxt->userData,
4487: "XML conditional section '[' expected\n");
4488: ctxt->wellFormed = 0;
1.180 daniel 4489: ctxt->disableSAX = 1;
1.168 daniel 4490: } else {
4491: NEXT;
4492: }
1.220 veillard 4493: if (xmlParserDebugEntities) {
4494: if ((ctxt->input != NULL) && (ctxt->input->filename))
4495: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4496: ctxt->input->line);
4497: fprintf(stderr, "Entering IGNORE Conditional Section\n");
4498: }
1.171 daniel 4499:
1.143 daniel 4500: /*
1.171 daniel 4501: * Parse up to the end of the conditionnal section
4502: * But disable SAX event generating DTD building in the meantime
1.143 daniel 4503: */
1.171 daniel 4504: state = ctxt->disableSAX;
1.220 veillard 4505: ctxt->disableSAX = 1;
1.165 daniel 4506: while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4507: (NXT(2) != '>'))) {
1.171 daniel 4508: const xmlChar *check = CUR_PTR;
4509: int cons = ctxt->input->consumed;
4510: int tok = ctxt->token;
4511:
4512: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4513: xmlParseConditionalSections(ctxt);
4514: } else if (IS_BLANK(CUR)) {
4515: NEXT;
4516: } else if (RAW == '%') {
4517: xmlParsePEReference(ctxt);
4518: } else
4519: xmlParseMarkupDecl(ctxt);
4520:
1.165 daniel 4521: /*
4522: * Pop-up of finished entities.
4523: */
4524: while ((RAW == 0) && (ctxt->inputNr > 1))
4525: xmlPopInput(ctxt);
1.143 daniel 4526:
1.171 daniel 4527: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4528: (tok == ctxt->token)) {
1.230 veillard 4529: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.171 daniel 4530: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4531: ctxt->sax->error(ctxt->userData,
4532: "Content error in the external subset\n");
4533: ctxt->wellFormed = 0;
1.180 daniel 4534: ctxt->disableSAX = 1;
1.171 daniel 4535: break;
4536: }
1.165 daniel 4537: }
1.171 daniel 4538: ctxt->disableSAX = state;
1.220 veillard 4539: if (xmlParserDebugEntities) {
4540: if ((ctxt->input != NULL) && (ctxt->input->filename))
4541: fprintf(stderr, "%s(%d): ", ctxt->input->filename,
4542: ctxt->input->line);
4543: fprintf(stderr, "Leaving IGNORE Conditional Section\n");
4544: }
4545:
1.168 daniel 4546: } else {
1.230 veillard 4547: ctxt->errNo = XML_ERR_CONDSEC_INVALID;
1.168 daniel 4548: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549: ctxt->sax->error(ctxt->userData,
4550: "XML conditional section INCLUDE or IGNORE keyword expected\n");
4551: ctxt->wellFormed = 0;
1.180 daniel 4552: ctxt->disableSAX = 1;
1.143 daniel 4553: }
4554:
1.152 daniel 4555: if (RAW == 0)
1.143 daniel 4556: SHRINK;
4557:
1.152 daniel 4558: if (RAW == 0) {
1.230 veillard 4559: ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
1.76 daniel 4560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4561: ctxt->sax->error(ctxt->userData,
4562: "XML conditional section not closed\n");
4563: ctxt->wellFormed = 0;
1.180 daniel 4564: ctxt->disableSAX = 1;
1.143 daniel 4565: } else {
4566: SKIP(3);
1.76 daniel 4567: }
4568: }
4569:
4570: /**
1.124 daniel 4571: * xmlParseExternalSubset:
1.76 daniel 4572: * @ctxt: an XML parser context
1.124 daniel 4573: * @ExternalID: the external identifier
4574: * @SystemID: the system identifier (or URL)
1.76 daniel 4575: *
4576: * parse Markup declarations from an external subset
4577: *
4578: * [30] extSubset ::= textDecl? extSubsetDecl
4579: *
4580: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4581: */
4582: void
1.123 daniel 4583: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4584: const xmlChar *SystemID) {
1.132 daniel 4585: GROW;
1.152 daniel 4586: if ((RAW == '<') && (NXT(1) == '?') &&
1.76 daniel 4587: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4588: (NXT(4) == 'l')) {
1.172 daniel 4589: xmlParseTextDecl(ctxt);
1.193 daniel 4590: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4591: /*
4592: * The XML REC instructs us to stop parsing right here
4593: */
4594: ctxt->instate = XML_PARSER_EOF;
4595: return;
4596: }
1.76 daniel 4597: }
1.79 daniel 4598: if (ctxt->myDoc == NULL) {
1.116 daniel 4599: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
1.79 daniel 4600: }
4601: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4602: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4603:
1.96 daniel 4604: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4605: ctxt->external = 1;
1.152 daniel 4606: while (((RAW == '<') && (NXT(1) == '?')) ||
4607: ((RAW == '<') && (NXT(1) == '!')) ||
1.164 daniel 4608: IS_BLANK(CUR)) {
1.123 daniel 4609: const xmlChar *check = CUR_PTR;
1.115 daniel 4610: int cons = ctxt->input->consumed;
1.164 daniel 4611: int tok = ctxt->token;
1.115 daniel 4612:
1.221 veillard 4613: GROW;
1.152 daniel 4614: if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
1.76 daniel 4615: xmlParseConditionalSections(ctxt);
4616: } else if (IS_BLANK(CUR)) {
4617: NEXT;
1.152 daniel 4618: } else if (RAW == '%') {
1.76 daniel 4619: xmlParsePEReference(ctxt);
4620: } else
4621: xmlParseMarkupDecl(ctxt);
1.77 daniel 4622:
4623: /*
4624: * Pop-up of finished entities.
4625: */
1.166 daniel 4626: while ((RAW == 0) && (ctxt->inputNr > 1))
1.77 daniel 4627: xmlPopInput(ctxt);
4628:
1.164 daniel 4629: if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4630: (tok == ctxt->token)) {
1.230 veillard 4631: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.115 daniel 4632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4633: ctxt->sax->error(ctxt->userData,
4634: "Content error in the external subset\n");
4635: ctxt->wellFormed = 0;
1.180 daniel 4636: ctxt->disableSAX = 1;
1.115 daniel 4637: break;
4638: }
1.76 daniel 4639: }
4640:
1.152 daniel 4641: if (RAW != 0) {
1.230 veillard 4642: ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
1.76 daniel 4643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4644: ctxt->sax->error(ctxt->userData,
4645: "Extra content at the end of the document\n");
4646: ctxt->wellFormed = 0;
1.180 daniel 4647: ctxt->disableSAX = 1;
1.76 daniel 4648: }
4649:
4650: }
4651:
4652: /**
1.77 daniel 4653: * xmlParseReference:
4654: * @ctxt: an XML parser context
4655: *
4656: * parse and handle entity references in content, depending on the SAX
4657: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4658: * CharRef, a predefined entity, if there is no reference() callback.
4659: * or if the parser was asked to switch to that mode.
1.77 daniel 4660: *
4661: * [67] Reference ::= EntityRef | CharRef
4662: */
4663: void
4664: xmlParseReference(xmlParserCtxtPtr ctxt) {
4665: xmlEntityPtr ent;
1.123 daniel 4666: xmlChar *val;
1.152 daniel 4667: if (RAW != '&') return;
1.77 daniel 4668:
4669: if (NXT(1) == '#') {
1.152 daniel 4670: int i = 0;
1.153 daniel 4671: xmlChar out[10];
4672: int hex = NXT(2);
1.77 daniel 4673: int val = xmlParseCharRef(ctxt);
1.152 daniel 4674:
1.198 daniel 4675: if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
1.153 daniel 4676: /*
4677: * So we are using non-UTF-8 buffers
4678: * Check that the char fit on 8bits, if not
4679: * generate a CharRef.
4680: */
4681: if (val <= 0xFF) {
4682: out[0] = val;
4683: out[1] = 0;
1.171 daniel 4684: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4685: (!ctxt->disableSAX))
1.153 daniel 4686: ctxt->sax->characters(ctxt->userData, out, 1);
4687: } else {
4688: if ((hex == 'x') || (hex == 'X'))
4689: sprintf((char *)out, "#x%X", val);
4690: else
4691: sprintf((char *)out, "#%d", val);
1.171 daniel 4692: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4693: (!ctxt->disableSAX))
1.153 daniel 4694: ctxt->sax->reference(ctxt->userData, out);
4695: }
4696: } else {
4697: /*
4698: * Just encode the value in UTF-8
4699: */
4700: COPY_BUF(0 ,out, i, val);
4701: out[i] = 0;
1.171 daniel 4702: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4703: (!ctxt->disableSAX))
1.153 daniel 4704: ctxt->sax->characters(ctxt->userData, out, i);
4705: }
1.77 daniel 4706: } else {
4707: ent = xmlParseEntityRef(ctxt);
4708: if (ent == NULL) return;
4709: if ((ent->name != NULL) &&
1.159 daniel 4710: (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
1.180 daniel 4711: xmlNodePtr list = NULL;
4712: int ret;
4713:
4714:
4715: /*
4716: * The first reference to the entity trigger a parsing phase
4717: * where the ent->children is filled with the result from
4718: * the parsing.
4719: */
4720: if (ent->children == NULL) {
4721: xmlChar *value;
4722: value = ent->content;
4723:
4724: /*
4725: * Check that this entity is well formed
4726: */
4727: if ((value != NULL) &&
4728: (value[1] == 0) && (value[0] == '<') &&
1.236 veillard 4729: (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
1.180 daniel 4730: /*
1.222 veillard 4731: * DONE: get definite answer on this !!!
1.180 daniel 4732: * Lots of entity decls are used to declare a single
4733: * char
4734: * <!ENTITY lt "<">
4735: * Which seems to be valid since
4736: * 2.4: The ampersand character (&) and the left angle
4737: * bracket (<) may appear in their literal form only
4738: * when used ... They are also legal within the literal
4739: * entity value of an internal entity declaration;i
4740: * see "4.3.2 Well-Formed Parsed Entities".
4741: * IMHO 2.4 and 4.3.2 are directly in contradiction.
4742: * Looking at the OASIS test suite and James Clark
4743: * tests, this is broken. However the XML REC uses
4744: * it. Is the XML REC not well-formed ????
4745: * This is a hack to avoid this problem
1.222 veillard 4746: *
4747: * ANSWER: since lt gt amp .. are already defined,
4748: * this is a redefinition and hence the fact that the
4749: * contentis not well balanced is not a Wf error, this
4750: * is lousy but acceptable.
1.180 daniel 4751: */
4752: list = xmlNewDocText(ctxt->myDoc, value);
4753: if (list != NULL) {
4754: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4755: (ent->children == NULL)) {
4756: ent->children = list;
4757: ent->last = list;
4758: list->parent = (xmlNodePtr) ent;
4759: } else {
4760: xmlFreeNodeList(list);
4761: }
4762: } else if (list != NULL) {
4763: xmlFreeNodeList(list);
4764: }
1.181 daniel 4765: } else {
1.180 daniel 4766: /*
4767: * 4.3.2: An internal general parsed entity is well-formed
4768: * if its replacement text matches the production labeled
4769: * content.
4770: */
1.185 daniel 4771: if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4772: ctxt->depth++;
1.180 daniel 4773: ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
1.185 daniel 4774: ctxt->sax, NULL, ctxt->depth,
4775: value, &list);
4776: ctxt->depth--;
4777: } else if (ent->etype ==
4778: XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4779: ctxt->depth++;
1.180 daniel 4780: ret = xmlParseExternalEntity(ctxt->myDoc,
1.185 daniel 4781: ctxt->sax, NULL, ctxt->depth,
1.228 veillard 4782: ent->URI, ent->ExternalID, &list);
1.185 daniel 4783: ctxt->depth--;
4784: } else {
1.180 daniel 4785: ret = -1;
4786: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4787: ctxt->sax->error(ctxt->userData,
4788: "Internal: invalid entity type\n");
4789: }
1.185 daniel 4790: if (ret == XML_ERR_ENTITY_LOOP) {
1.230 veillard 4791: ctxt->errNo = XML_ERR_ENTITY_LOOP;
1.185 daniel 4792: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4793: ctxt->sax->error(ctxt->userData,
4794: "Detected entity reference loop\n");
4795: ctxt->wellFormed = 0;
4796: ctxt->disableSAX = 1;
4797: } else if ((ret == 0) && (list != NULL)) {
1.180 daniel 4798: if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4799: (ent->children == NULL)) {
4800: ent->children = list;
4801: while (list != NULL) {
4802: list->parent = (xmlNodePtr) ent;
4803: if (list->next == NULL)
4804: ent->last = list;
4805: list = list->next;
4806: }
4807: } else {
4808: xmlFreeNodeList(list);
4809: }
4810: } else if (ret > 0) {
1.230 veillard 4811: ctxt->errNo = ret;
1.180 daniel 4812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4813: ctxt->sax->error(ctxt->userData,
4814: "Entity value required\n");
4815: ctxt->wellFormed = 0;
4816: ctxt->disableSAX = 1;
4817: } else if (list != NULL) {
4818: xmlFreeNodeList(list);
4819: }
4820: }
4821: }
1.113 daniel 4822: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
1.171 daniel 4823: (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
1.113 daniel 4824: /*
4825: * Create a node.
4826: */
4827: ctxt->sax->reference(ctxt->userData, ent->name);
4828: return;
4829: } else if (ctxt->replaceEntities) {
1.222 veillard 4830: if ((ctxt->node != NULL) && (ent->children != NULL)) {
4831: /*
4832: * Seems we are generating the DOM content, do
4833: * a simple tree copy
4834: */
4835: xmlNodePtr new;
4836: new = xmlCopyNodeList(ent->children);
4837:
4838: xmlAddChildList(ctxt->node, new);
4839: /*
4840: * This is to avoid a nasty side effect, see
4841: * characters() in SAX.c
4842: */
4843: ctxt->nodemem = 0;
4844: ctxt->nodelen = 0;
4845: return;
4846: } else {
4847: /*
4848: * Probably running in SAX mode
4849: */
4850: xmlParserInputPtr input;
1.79 daniel 4851:
1.222 veillard 4852: input = xmlNewEntityInputStream(ctxt, ent);
4853: xmlPushInput(ctxt, input);
4854: if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4855: (RAW == '<') && (NXT(1) == '?') &&
4856: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4857: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4858: xmlParseTextDecl(ctxt);
4859: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4860: /*
4861: * The XML REC instructs us to stop parsing right here
4862: */
4863: ctxt->instate = XML_PARSER_EOF;
4864: return;
4865: }
4866: if (input->standalone == 1) {
1.230 veillard 4867: ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
1.222 veillard 4868: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4869: ctxt->sax->error(ctxt->userData,
4870: "external parsed entities cannot be standalone\n");
4871: ctxt->wellFormed = 0;
4872: ctxt->disableSAX = 1;
4873: }
1.167 daniel 4874: }
1.222 veillard 4875: return;
1.167 daniel 4876: }
1.113 daniel 4877: }
1.222 veillard 4878: } else {
4879: val = ent->content;
4880: if (val == NULL) return;
4881: /*
4882: * inline the entity.
4883: */
4884: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4885: (!ctxt->disableSAX))
4886: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
1.77 daniel 4887: }
4888: }
1.24 daniel 4889: }
4890:
1.50 daniel 4891: /**
4892: * xmlParseEntityRef:
4893: * @ctxt: an XML parser context
4894: *
4895: * parse ENTITY references declarations
1.24 daniel 4896: *
4897: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4898: *
1.98 daniel 4899: * [ WFC: Entity Declared ]
4900: * In a document without any DTD, a document with only an internal DTD
4901: * subset which contains no parameter entity references, or a document
4902: * with "standalone='yes'", the Name given in the entity reference
4903: * must match that in an entity declaration, except that well-formed
4904: * documents need not declare any of the following entities: amp, lt,
4905: * gt, apos, quot. The declaration of a parameter entity must precede
4906: * any reference to it. Similarly, the declaration of a general entity
4907: * must precede any reference to it which appears in a default value in an
4908: * attribute-list declaration. Note that if entities are declared in the
4909: * external subset or in external parameter entities, a non-validating
4910: * processor is not obligated to read and process their declarations;
4911: * for such documents, the rule that an entity must be declared is a
4912: * well-formedness constraint only if standalone='yes'.
4913: *
4914: * [ WFC: Parsed Entity ]
4915: * An entity reference must not contain the name of an unparsed entity
4916: *
1.77 daniel 4917: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4918: */
1.77 daniel 4919: xmlEntityPtr
1.55 daniel 4920: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.123 daniel 4921: xmlChar *name;
1.72 daniel 4922: xmlEntityPtr ent = NULL;
1.24 daniel 4923:
1.91 daniel 4924: GROW;
1.111 daniel 4925:
1.152 daniel 4926: if (RAW == '&') {
1.40 daniel 4927: NEXT;
1.24 daniel 4928: name = xmlParseName(ctxt);
4929: if (name == NULL) {
1.230 veillard 4930: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 4931: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4932: ctxt->sax->error(ctxt->userData,
4933: "xmlParseEntityRef: no name\n");
1.59 daniel 4934: ctxt->wellFormed = 0;
1.180 daniel 4935: ctxt->disableSAX = 1;
1.24 daniel 4936: } else {
1.152 daniel 4937: if (RAW == ';') {
1.40 daniel 4938: NEXT;
1.24 daniel 4939: /*
1.77 daniel 4940: * Ask first SAX for entity resolution, otherwise try the
4941: * predefined set.
4942: */
4943: if (ctxt->sax != NULL) {
4944: if (ctxt->sax->getEntity != NULL)
4945: ent = ctxt->sax->getEntity(ctxt->userData, name);
4946: if (ent == NULL)
4947: ent = xmlGetPredefinedEntity(name);
4948: }
4949: /*
1.98 daniel 4950: * [ WFC: Entity Declared ]
4951: * In a document without any DTD, a document with only an
4952: * internal DTD subset which contains no parameter entity
4953: * references, or a document with "standalone='yes'", the
4954: * Name given in the entity reference must match that in an
4955: * entity declaration, except that well-formed documents
4956: * need not declare any of the following entities: amp, lt,
4957: * gt, apos, quot.
4958: * The declaration of a parameter entity must precede any
4959: * reference to it.
4960: * Similarly, the declaration of a general entity must
4961: * precede any reference to it which appears in a default
4962: * value in an attribute-list declaration. Note that if
4963: * entities are declared in the external subset or in
4964: * external parameter entities, a non-validating processor
4965: * is not obligated to read and process their declarations;
4966: * for such documents, the rule that an entity must be
4967: * declared is a well-formedness constraint only if
4968: * standalone='yes'.
1.59 daniel 4969: */
1.77 daniel 4970: if (ent == NULL) {
1.98 daniel 4971: if ((ctxt->standalone == 1) ||
4972: ((ctxt->hasExternalSubset == 0) &&
4973: (ctxt->hasPErefs == 0))) {
1.230 veillard 4974: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.98 daniel 4975: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4976: ctxt->sax->error(ctxt->userData,
4977: "Entity '%s' not defined\n", name);
4978: ctxt->wellFormed = 0;
1.180 daniel 4979: ctxt->disableSAX = 1;
1.77 daniel 4980: } else {
1.230 veillard 4981: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.98 daniel 4982: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4983: ctxt->sax->warning(ctxt->userData,
4984: "Entity '%s' not defined\n", name);
1.59 daniel 4985: }
1.77 daniel 4986: }
1.59 daniel 4987:
4988: /*
1.98 daniel 4989: * [ WFC: Parsed Entity ]
4990: * An entity reference must not contain the name of an
4991: * unparsed entity
4992: */
1.159 daniel 4993: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 4994: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.98 daniel 4995: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4996: ctxt->sax->error(ctxt->userData,
4997: "Entity reference to unparsed entity %s\n", name);
4998: ctxt->wellFormed = 0;
1.180 daniel 4999: ctxt->disableSAX = 1;
1.98 daniel 5000: }
5001:
5002: /*
5003: * [ WFC: No External Entity References ]
5004: * Attribute values cannot contain direct or indirect
5005: * entity references to external entities.
5006: */
5007: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5008: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5009: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.98 daniel 5010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5011: ctxt->sax->error(ctxt->userData,
5012: "Attribute references external entity '%s'\n", name);
5013: ctxt->wellFormed = 0;
1.180 daniel 5014: ctxt->disableSAX = 1;
1.98 daniel 5015: }
5016: /*
5017: * [ WFC: No < in Attribute Values ]
5018: * The replacement text of any entity referred to directly or
5019: * indirectly in an attribute value (other than "<") must
5020: * not contain a <.
1.59 daniel 5021: */
1.98 daniel 5022: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.116 daniel 5023: (ent != NULL) &&
1.236 veillard 5024: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.98 daniel 5025: (ent->content != NULL) &&
5026: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5027: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.98 daniel 5028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5029: ctxt->sax->error(ctxt->userData,
5030: "'<' in entity '%s' is not allowed in attributes values\n", name);
5031: ctxt->wellFormed = 0;
1.180 daniel 5032: ctxt->disableSAX = 1;
1.98 daniel 5033: }
5034:
5035: /*
5036: * Internal check, no parameter entities here ...
5037: */
5038: else {
1.159 daniel 5039: switch (ent->etype) {
1.59 daniel 5040: case XML_INTERNAL_PARAMETER_ENTITY:
5041: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5042: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.59 daniel 5043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5044: ctxt->sax->error(ctxt->userData,
1.59 daniel 5045: "Attempt to reference the parameter entity '%s'\n", name);
5046: ctxt->wellFormed = 0;
1.180 daniel 5047: ctxt->disableSAX = 1;
5048: break;
5049: default:
1.59 daniel 5050: break;
5051: }
5052: }
5053:
5054: /*
1.98 daniel 5055: * [ WFC: No Recursion ]
1.229 veillard 5056: * A parsed entity must not contain a recursive reference
1.117 daniel 5057: * to itself, either directly or indirectly.
1.229 veillard 5058: * Done somewhere else
1.59 daniel 5059: */
1.77 daniel 5060:
1.24 daniel 5061: } else {
1.230 veillard 5062: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5063: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5064: ctxt->sax->error(ctxt->userData,
1.59 daniel 5065: "xmlParseEntityRef: expecting ';'\n");
5066: ctxt->wellFormed = 0;
1.180 daniel 5067: ctxt->disableSAX = 1;
1.24 daniel 5068: }
1.119 daniel 5069: xmlFree(name);
1.24 daniel 5070: }
5071: }
1.77 daniel 5072: return(ent);
1.24 daniel 5073: }
1.229 veillard 5074:
1.135 daniel 5075: /**
5076: * xmlParseStringEntityRef:
5077: * @ctxt: an XML parser context
5078: * @str: a pointer to an index in the string
5079: *
5080: * parse ENTITY references declarations, but this version parses it from
5081: * a string value.
5082: *
5083: * [68] EntityRef ::= '&' Name ';'
5084: *
5085: * [ WFC: Entity Declared ]
5086: * In a document without any DTD, a document with only an internal DTD
5087: * subset which contains no parameter entity references, or a document
5088: * with "standalone='yes'", the Name given in the entity reference
5089: * must match that in an entity declaration, except that well-formed
5090: * documents need not declare any of the following entities: amp, lt,
5091: * gt, apos, quot. The declaration of a parameter entity must precede
5092: * any reference to it. Similarly, the declaration of a general entity
5093: * must precede any reference to it which appears in a default value in an
5094: * attribute-list declaration. Note that if entities are declared in the
5095: * external subset or in external parameter entities, a non-validating
5096: * processor is not obligated to read and process their declarations;
5097: * for such documents, the rule that an entity must be declared is a
5098: * well-formedness constraint only if standalone='yes'.
5099: *
5100: * [ WFC: Parsed Entity ]
5101: * An entity reference must not contain the name of an unparsed entity
5102: *
5103: * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5104: * is updated to the current location in the string.
5105: */
5106: xmlEntityPtr
5107: xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5108: xmlChar *name;
5109: const xmlChar *ptr;
5110: xmlChar cur;
5111: xmlEntityPtr ent = NULL;
5112:
1.156 daniel 5113: if ((str == NULL) || (*str == NULL))
5114: return(NULL);
1.135 daniel 5115: ptr = *str;
5116: cur = *ptr;
5117: if (cur == '&') {
5118: ptr++;
5119: cur = *ptr;
5120: name = xmlParseStringName(ctxt, &ptr);
5121: if (name == NULL) {
1.230 veillard 5122: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5124: ctxt->sax->error(ctxt->userData,
5125: "xmlParseEntityRef: no name\n");
5126: ctxt->wellFormed = 0;
1.180 daniel 5127: ctxt->disableSAX = 1;
1.135 daniel 5128: } else {
1.185 daniel 5129: if (*ptr == ';') {
5130: ptr++;
1.135 daniel 5131: /*
5132: * Ask first SAX for entity resolution, otherwise try the
5133: * predefined set.
5134: */
5135: if (ctxt->sax != NULL) {
5136: if (ctxt->sax->getEntity != NULL)
5137: ent = ctxt->sax->getEntity(ctxt->userData, name);
5138: if (ent == NULL)
5139: ent = xmlGetPredefinedEntity(name);
5140: }
5141: /*
5142: * [ WFC: Entity Declared ]
5143: * In a document without any DTD, a document with only an
5144: * internal DTD subset which contains no parameter entity
5145: * references, or a document with "standalone='yes'", the
5146: * Name given in the entity reference must match that in an
5147: * entity declaration, except that well-formed documents
5148: * need not declare any of the following entities: amp, lt,
5149: * gt, apos, quot.
5150: * The declaration of a parameter entity must precede any
5151: * reference to it.
5152: * Similarly, the declaration of a general entity must
5153: * precede any reference to it which appears in a default
5154: * value in an attribute-list declaration. Note that if
5155: * entities are declared in the external subset or in
5156: * external parameter entities, a non-validating processor
5157: * is not obligated to read and process their declarations;
5158: * for such documents, the rule that an entity must be
5159: * declared is a well-formedness constraint only if
5160: * standalone='yes'.
5161: */
5162: if (ent == NULL) {
5163: if ((ctxt->standalone == 1) ||
5164: ((ctxt->hasExternalSubset == 0) &&
5165: (ctxt->hasPErefs == 0))) {
1.230 veillard 5166: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5168: ctxt->sax->error(ctxt->userData,
5169: "Entity '%s' not defined\n", name);
5170: ctxt->wellFormed = 0;
1.180 daniel 5171: ctxt->disableSAX = 1;
1.135 daniel 5172: } else {
1.230 veillard 5173: ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
1.135 daniel 5174: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5175: ctxt->sax->warning(ctxt->userData,
5176: "Entity '%s' not defined\n", name);
5177: }
5178: }
5179:
5180: /*
5181: * [ WFC: Parsed Entity ]
5182: * An entity reference must not contain the name of an
5183: * unparsed entity
5184: */
1.159 daniel 5185: else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
1.230 veillard 5186: ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
1.135 daniel 5187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5188: ctxt->sax->error(ctxt->userData,
5189: "Entity reference to unparsed entity %s\n", name);
5190: ctxt->wellFormed = 0;
1.180 daniel 5191: ctxt->disableSAX = 1;
1.135 daniel 5192: }
5193:
5194: /*
5195: * [ WFC: No External Entity References ]
5196: * Attribute values cannot contain direct or indirect
5197: * entity references to external entities.
5198: */
5199: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
1.159 daniel 5200: (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
1.230 veillard 5201: ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
1.135 daniel 5202: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5203: ctxt->sax->error(ctxt->userData,
5204: "Attribute references external entity '%s'\n", name);
5205: ctxt->wellFormed = 0;
1.180 daniel 5206: ctxt->disableSAX = 1;
1.135 daniel 5207: }
5208: /*
5209: * [ WFC: No < in Attribute Values ]
5210: * The replacement text of any entity referred to directly or
5211: * indirectly in an attribute value (other than "<") must
5212: * not contain a <.
5213: */
5214: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5215: (ent != NULL) &&
1.236 veillard 5216: (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
1.135 daniel 5217: (ent->content != NULL) &&
5218: (xmlStrchr(ent->content, '<'))) {
1.230 veillard 5219: ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
1.135 daniel 5220: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5221: ctxt->sax->error(ctxt->userData,
5222: "'<' in entity '%s' is not allowed in attributes values\n", name);
5223: ctxt->wellFormed = 0;
1.180 daniel 5224: ctxt->disableSAX = 1;
1.135 daniel 5225: }
5226:
5227: /*
5228: * Internal check, no parameter entities here ...
5229: */
5230: else {
1.159 daniel 5231: switch (ent->etype) {
1.135 daniel 5232: case XML_INTERNAL_PARAMETER_ENTITY:
5233: case XML_EXTERNAL_PARAMETER_ENTITY:
1.230 veillard 5234: ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
1.135 daniel 5235: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5236: ctxt->sax->error(ctxt->userData,
5237: "Attempt to reference the parameter entity '%s'\n", name);
5238: ctxt->wellFormed = 0;
1.180 daniel 5239: ctxt->disableSAX = 1;
5240: break;
5241: default:
1.135 daniel 5242: break;
5243: }
5244: }
5245:
5246: /*
5247: * [ WFC: No Recursion ]
1.229 veillard 5248: * A parsed entity must not contain a recursive reference
1.135 daniel 5249: * to itself, either directly or indirectly.
1.229 veillard 5250: * Done somewhwere else
1.135 daniel 5251: */
5252:
5253: } else {
1.230 veillard 5254: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5256: ctxt->sax->error(ctxt->userData,
5257: "xmlParseEntityRef: expecting ';'\n");
5258: ctxt->wellFormed = 0;
1.180 daniel 5259: ctxt->disableSAX = 1;
1.135 daniel 5260: }
5261: xmlFree(name);
5262: }
5263: }
1.185 daniel 5264: *str = ptr;
1.135 daniel 5265: return(ent);
5266: }
1.24 daniel 5267:
1.50 daniel 5268: /**
5269: * xmlParsePEReference:
5270: * @ctxt: an XML parser context
5271: *
5272: * parse PEReference declarations
1.77 daniel 5273: * The entity content is handled directly by pushing it's content as
5274: * a new input stream.
1.22 daniel 5275: *
5276: * [69] PEReference ::= '%' Name ';'
1.68 daniel 5277: *
1.98 daniel 5278: * [ WFC: No Recursion ]
1.229 veillard 5279: * A parsed entity must not contain a recursive
1.98 daniel 5280: * reference to itself, either directly or indirectly.
5281: *
5282: * [ WFC: Entity Declared ]
5283: * In a document without any DTD, a document with only an internal DTD
5284: * subset which contains no parameter entity references, or a document
5285: * with "standalone='yes'", ... ... The declaration of a parameter
5286: * entity must precede any reference to it...
5287: *
5288: * [ VC: Entity Declared ]
5289: * In a document with an external subset or external parameter entities
5290: * with "standalone='no'", ... ... The declaration of a parameter entity
5291: * must precede any reference to it...
5292: *
5293: * [ WFC: In DTD ]
5294: * Parameter-entity references may only appear in the DTD.
5295: * NOTE: misleading but this is handled.
1.22 daniel 5296: */
1.77 daniel 5297: void
1.55 daniel 5298: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.123 daniel 5299: xmlChar *name;
1.72 daniel 5300: xmlEntityPtr entity = NULL;
1.50 daniel 5301: xmlParserInputPtr input;
1.22 daniel 5302:
1.152 daniel 5303: if (RAW == '%') {
1.40 daniel 5304: NEXT;
1.22 daniel 5305: name = xmlParseName(ctxt);
5306: if (name == NULL) {
1.230 veillard 5307: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5308: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5309: ctxt->sax->error(ctxt->userData,
5310: "xmlParsePEReference: no name\n");
1.59 daniel 5311: ctxt->wellFormed = 0;
1.180 daniel 5312: ctxt->disableSAX = 1;
1.22 daniel 5313: } else {
1.152 daniel 5314: if (RAW == ';') {
1.40 daniel 5315: NEXT;
1.98 daniel 5316: if ((ctxt->sax != NULL) &&
5317: (ctxt->sax->getParameterEntity != NULL))
5318: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5319: name);
1.45 daniel 5320: if (entity == NULL) {
1.98 daniel 5321: /*
5322: * [ WFC: Entity Declared ]
5323: * In a document without any DTD, a document with only an
5324: * internal DTD subset which contains no parameter entity
5325: * references, or a document with "standalone='yes'", ...
5326: * ... The declaration of a parameter entity must precede
5327: * any reference to it...
5328: */
5329: if ((ctxt->standalone == 1) ||
5330: ((ctxt->hasExternalSubset == 0) &&
5331: (ctxt->hasPErefs == 0))) {
1.230 veillard 5332: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.220 veillard 5333: if ((!ctxt->disableSAX) &&
5334: (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5335: ctxt->sax->error(ctxt->userData,
5336: "PEReference: %%%s; not found\n", name);
5337: ctxt->wellFormed = 0;
1.180 daniel 5338: ctxt->disableSAX = 1;
1.98 daniel 5339: } else {
5340: /*
5341: * [ VC: Entity Declared ]
5342: * In a document with an external subset or external
5343: * parameter entities with "standalone='no'", ...
5344: * ... The declaration of a parameter entity must precede
5345: * any reference to it...
5346: */
1.220 veillard 5347: if ((!ctxt->disableSAX) &&
5348: (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.98 daniel 5349: ctxt->sax->warning(ctxt->userData,
5350: "PEReference: %%%s; not found\n", name);
5351: ctxt->valid = 0;
5352: }
1.50 daniel 5353: } else {
1.98 daniel 5354: /*
5355: * Internal checking in case the entity quest barfed
5356: */
1.159 daniel 5357: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5358: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.98 daniel 5359: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5360: ctxt->sax->warning(ctxt->userData,
5361: "Internal: %%%s; is not a parameter entity\n", name);
5362: } else {
1.164 daniel 5363: /*
5364: * TODO !!!
5365: * handle the extra spaces added before and after
5366: * c.f. http://www.w3.org/TR/REC-xml#as-PE
5367: */
1.98 daniel 5368: input = xmlNewEntityInputStream(ctxt, entity);
5369: xmlPushInput(ctxt, input);
1.164 daniel 5370: if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5371: (RAW == '<') && (NXT(1) == '?') &&
5372: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5373: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.172 daniel 5374: xmlParseTextDecl(ctxt);
1.193 daniel 5375: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5376: /*
5377: * The XML REC instructs us to stop parsing
5378: * right here
5379: */
5380: ctxt->instate = XML_PARSER_EOF;
5381: xmlFree(name);
5382: return;
5383: }
1.164 daniel 5384: }
5385: if (ctxt->token == 0)
5386: ctxt->token = ' ';
1.98 daniel 5387: }
1.45 daniel 5388: }
1.98 daniel 5389: ctxt->hasPErefs = 1;
1.22 daniel 5390: } else {
1.230 veillard 5391: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.55 daniel 5392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5393: ctxt->sax->error(ctxt->userData,
1.59 daniel 5394: "xmlParsePEReference: expecting ';'\n");
5395: ctxt->wellFormed = 0;
1.180 daniel 5396: ctxt->disableSAX = 1;
1.22 daniel 5397: }
1.119 daniel 5398: xmlFree(name);
1.3 veillard 5399: }
5400: }
5401: }
5402:
1.50 daniel 5403: /**
1.135 daniel 5404: * xmlParseStringPEReference:
5405: * @ctxt: an XML parser context
5406: * @str: a pointer to an index in the string
5407: *
5408: * parse PEReference declarations
5409: *
5410: * [69] PEReference ::= '%' Name ';'
5411: *
5412: * [ WFC: No Recursion ]
1.229 veillard 5413: * A parsed entity must not contain a recursive
1.135 daniel 5414: * reference to itself, either directly or indirectly.
5415: *
5416: * [ WFC: Entity Declared ]
5417: * In a document without any DTD, a document with only an internal DTD
5418: * subset which contains no parameter entity references, or a document
5419: * with "standalone='yes'", ... ... The declaration of a parameter
5420: * entity must precede any reference to it...
5421: *
5422: * [ VC: Entity Declared ]
5423: * In a document with an external subset or external parameter entities
5424: * with "standalone='no'", ... ... The declaration of a parameter entity
5425: * must precede any reference to it...
5426: *
5427: * [ WFC: In DTD ]
5428: * Parameter-entity references may only appear in the DTD.
5429: * NOTE: misleading but this is handled.
5430: *
5431: * Returns the string of the entity content.
5432: * str is updated to the current value of the index
5433: */
5434: xmlEntityPtr
5435: xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5436: const xmlChar *ptr;
5437: xmlChar cur;
5438: xmlChar *name;
5439: xmlEntityPtr entity = NULL;
5440:
5441: if ((str == NULL) || (*str == NULL)) return(NULL);
5442: ptr = *str;
5443: cur = *ptr;
5444: if (cur == '%') {
5445: ptr++;
5446: cur = *ptr;
5447: name = xmlParseStringName(ctxt, &ptr);
5448: if (name == NULL) {
1.230 veillard 5449: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.135 daniel 5450: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5451: ctxt->sax->error(ctxt->userData,
5452: "xmlParseStringPEReference: no name\n");
5453: ctxt->wellFormed = 0;
1.180 daniel 5454: ctxt->disableSAX = 1;
1.135 daniel 5455: } else {
5456: cur = *ptr;
5457: if (cur == ';') {
5458: ptr++;
5459: cur = *ptr;
5460: if ((ctxt->sax != NULL) &&
5461: (ctxt->sax->getParameterEntity != NULL))
5462: entity = ctxt->sax->getParameterEntity(ctxt->userData,
5463: name);
5464: if (entity == NULL) {
5465: /*
5466: * [ WFC: Entity Declared ]
5467: * In a document without any DTD, a document with only an
5468: * internal DTD subset which contains no parameter entity
5469: * references, or a document with "standalone='yes'", ...
5470: * ... The declaration of a parameter entity must precede
5471: * any reference to it...
5472: */
5473: if ((ctxt->standalone == 1) ||
5474: ((ctxt->hasExternalSubset == 0) &&
5475: (ctxt->hasPErefs == 0))) {
1.230 veillard 5476: ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
1.135 daniel 5477: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5478: ctxt->sax->error(ctxt->userData,
5479: "PEReference: %%%s; not found\n", name);
5480: ctxt->wellFormed = 0;
1.180 daniel 5481: ctxt->disableSAX = 1;
1.135 daniel 5482: } else {
5483: /*
5484: * [ VC: Entity Declared ]
5485: * In a document with an external subset or external
5486: * parameter entities with "standalone='no'", ...
5487: * ... The declaration of a parameter entity must
5488: * precede any reference to it...
5489: */
5490: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5491: ctxt->sax->warning(ctxt->userData,
5492: "PEReference: %%%s; not found\n", name);
5493: ctxt->valid = 0;
5494: }
5495: } else {
5496: /*
5497: * Internal checking in case the entity quest barfed
5498: */
1.159 daniel 5499: if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5500: (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
1.135 daniel 5501: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5502: ctxt->sax->warning(ctxt->userData,
5503: "Internal: %%%s; is not a parameter entity\n", name);
5504: }
5505: }
5506: ctxt->hasPErefs = 1;
5507: } else {
1.230 veillard 5508: ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
1.135 daniel 5509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5510: ctxt->sax->error(ctxt->userData,
5511: "xmlParseStringPEReference: expecting ';'\n");
5512: ctxt->wellFormed = 0;
1.180 daniel 5513: ctxt->disableSAX = 1;
1.135 daniel 5514: }
5515: xmlFree(name);
5516: }
5517: }
5518: *str = ptr;
5519: return(entity);
5520: }
5521:
5522: /**
1.181 daniel 5523: * xmlParseDocTypeDecl:
1.50 daniel 5524: * @ctxt: an XML parser context
5525: *
5526: * parse a DOCTYPE declaration
1.21 daniel 5527: *
1.22 daniel 5528: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5529: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 5530: *
5531: * [ VC: Root Element Type ]
1.99 daniel 5532: * The Name in the document type declaration must match the element
1.98 daniel 5533: * type of the root element.
1.21 daniel 5534: */
5535:
1.55 daniel 5536: void
5537: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.165 daniel 5538: xmlChar *name = NULL;
1.123 daniel 5539: xmlChar *ExternalID = NULL;
5540: xmlChar *URI = NULL;
1.21 daniel 5541:
5542: /*
5543: * We know that '<!DOCTYPE' has been detected.
5544: */
1.40 daniel 5545: SKIP(9);
1.21 daniel 5546:
1.42 daniel 5547: SKIP_BLANKS;
1.21 daniel 5548:
5549: /*
5550: * Parse the DOCTYPE name.
5551: */
5552: name = xmlParseName(ctxt);
5553: if (name == NULL) {
1.230 veillard 5554: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 5556: ctxt->sax->error(ctxt->userData,
5557: "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 5558: ctxt->wellFormed = 0;
1.180 daniel 5559: ctxt->disableSAX = 1;
1.21 daniel 5560: }
1.165 daniel 5561: ctxt->intSubName = name;
1.21 daniel 5562:
1.42 daniel 5563: SKIP_BLANKS;
1.21 daniel 5564:
5565: /*
1.22 daniel 5566: * Check for SystemID and ExternalID
5567: */
1.67 daniel 5568: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 5569:
5570: if ((URI != NULL) || (ExternalID != NULL)) {
5571: ctxt->hasExternalSubset = 1;
5572: }
1.165 daniel 5573: ctxt->extSubURI = URI;
5574: ctxt->extSubSystem = ExternalID;
1.98 daniel 5575:
1.42 daniel 5576: SKIP_BLANKS;
1.36 daniel 5577:
1.76 daniel 5578: /*
1.165 daniel 5579: * Create and update the internal subset.
1.76 daniel 5580: */
1.171 daniel 5581: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5582: (!ctxt->disableSAX))
1.74 daniel 5583: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 5584:
5585: /*
1.140 daniel 5586: * Is there any internal subset declarations ?
5587: * they are handled separately in xmlParseInternalSubset()
5588: */
1.152 daniel 5589: if (RAW == '[')
1.140 daniel 5590: return;
5591:
5592: /*
5593: * We should be at the end of the DOCTYPE declaration.
5594: */
1.152 daniel 5595: if (RAW != '>') {
1.230 veillard 5596: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.140 daniel 5597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5598: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5599: ctxt->wellFormed = 0;
1.180 daniel 5600: ctxt->disableSAX = 1;
1.140 daniel 5601: }
5602: NEXT;
5603: }
5604:
5605: /**
1.181 daniel 5606: * xmlParseInternalsubset:
1.140 daniel 5607: * @ctxt: an XML parser context
5608: *
5609: * parse the internal subset declaration
5610: *
5611: * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5612: */
5613:
5614: void
5615: xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5616: /*
1.22 daniel 5617: * Is there any DTD definition ?
5618: */
1.152 daniel 5619: if (RAW == '[') {
1.96 daniel 5620: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 5621: NEXT;
1.22 daniel 5622: /*
5623: * Parse the succession of Markup declarations and
5624: * PEReferences.
5625: * Subsequence (markupdecl | PEReference | S)*
5626: */
1.152 daniel 5627: while (RAW != ']') {
1.123 daniel 5628: const xmlChar *check = CUR_PTR;
1.115 daniel 5629: int cons = ctxt->input->consumed;
1.22 daniel 5630:
1.42 daniel 5631: SKIP_BLANKS;
1.22 daniel 5632: xmlParseMarkupDecl(ctxt);
1.50 daniel 5633: xmlParsePEReference(ctxt);
1.22 daniel 5634:
1.115 daniel 5635: /*
5636: * Pop-up of finished entities.
5637: */
1.152 daniel 5638: while ((RAW == 0) && (ctxt->inputNr > 1))
1.115 daniel 5639: xmlPopInput(ctxt);
5640:
1.118 daniel 5641: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
1.230 veillard 5642: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5644: ctxt->sax->error(ctxt->userData,
1.140 daniel 5645: "xmlParseInternalSubset: error detected in Markup declaration\n");
1.59 daniel 5646: ctxt->wellFormed = 0;
1.180 daniel 5647: ctxt->disableSAX = 1;
1.22 daniel 5648: break;
5649: }
5650: }
1.209 veillard 5651: if (RAW == ']') {
5652: NEXT;
5653: SKIP_BLANKS;
5654: }
1.22 daniel 5655: }
5656:
5657: /*
5658: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 5659: */
1.152 daniel 5660: if (RAW != '>') {
1.230 veillard 5661: ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
1.55 daniel 5662: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5663: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 5664: ctxt->wellFormed = 0;
1.180 daniel 5665: ctxt->disableSAX = 1;
1.21 daniel 5666: }
1.40 daniel 5667: NEXT;
1.21 daniel 5668: }
5669:
1.50 daniel 5670: /**
5671: * xmlParseAttribute:
5672: * @ctxt: an XML parser context
1.123 daniel 5673: * @value: a xmlChar ** used to store the value of the attribute
1.50 daniel 5674: *
5675: * parse an attribute
1.3 veillard 5676: *
1.22 daniel 5677: * [41] Attribute ::= Name Eq AttValue
5678: *
1.98 daniel 5679: * [ WFC: No External Entity References ]
5680: * Attribute values cannot contain direct or indirect entity references
5681: * to external entities.
5682: *
5683: * [ WFC: No < in Attribute Values ]
5684: * The replacement text of any entity referred to directly or indirectly in
5685: * an attribute value (other than "<") must not contain a <.
5686: *
5687: * [ VC: Attribute Value Type ]
1.117 daniel 5688: * The attribute must have been declared; the value must be of the type
1.99 daniel 5689: * declared for it.
1.98 daniel 5690: *
1.22 daniel 5691: * [25] Eq ::= S? '=' S?
5692: *
1.29 daniel 5693: * With namespace:
5694: *
5695: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 5696: *
5697: * Also the case QName == xmlns:??? is handled independently as a namespace
5698: * definition.
1.69 daniel 5699: *
1.72 daniel 5700: * Returns the attribute name, and the value in *value.
1.3 veillard 5701: */
5702:
1.123 daniel 5703: xmlChar *
5704: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5705: xmlChar *name, *val;
1.3 veillard 5706:
1.72 daniel 5707: *value = NULL;
5708: name = xmlParseName(ctxt);
1.22 daniel 5709: if (name == NULL) {
1.230 veillard 5710: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.55 daniel 5711: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5712: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 5713: ctxt->wellFormed = 0;
1.180 daniel 5714: ctxt->disableSAX = 1;
1.52 daniel 5715: return(NULL);
1.3 veillard 5716: }
5717:
5718: /*
1.29 daniel 5719: * read the value
1.3 veillard 5720: */
1.42 daniel 5721: SKIP_BLANKS;
1.152 daniel 5722: if (RAW == '=') {
1.40 daniel 5723: NEXT;
1.42 daniel 5724: SKIP_BLANKS;
1.72 daniel 5725: val = xmlParseAttValue(ctxt);
1.96 daniel 5726: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 5727: } else {
1.230 veillard 5728: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.55 daniel 5729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5730: ctxt->sax->error(ctxt->userData,
1.59 daniel 5731: "Specification mandate value for attribute %s\n", name);
5732: ctxt->wellFormed = 0;
1.180 daniel 5733: ctxt->disableSAX = 1;
1.170 daniel 5734: xmlFree(name);
1.52 daniel 5735: return(NULL);
1.43 daniel 5736: }
5737:
1.172 daniel 5738: /*
5739: * Check that xml:lang conforms to the specification
1.222 veillard 5740: * No more registered as an error, just generate a warning now
5741: * since this was deprecated in XML second edition
1.172 daniel 5742: */
1.236 veillard 5743: if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
1.172 daniel 5744: if (!xmlCheckLanguageID(val)) {
1.222 veillard 5745: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5746: ctxt->sax->warning(ctxt->userData,
5747: "Malformed value for xml:lang : %s\n", val);
1.172 daniel 5748: }
5749: }
5750:
1.176 daniel 5751: /*
5752: * Check that xml:space conforms to the specification
5753: */
1.236 veillard 5754: if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5755: if (xmlStrEqual(val, BAD_CAST "default"))
1.176 daniel 5756: *(ctxt->space) = 0;
1.236 veillard 5757: else if (xmlStrEqual(val, BAD_CAST "preserve"))
1.176 daniel 5758: *(ctxt->space) = 1;
5759: else {
1.230 veillard 5760: ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
1.176 daniel 5761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762: ctxt->sax->error(ctxt->userData,
5763: "Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5764: val);
5765: ctxt->wellFormed = 0;
1.180 daniel 5766: ctxt->disableSAX = 1;
1.176 daniel 5767: }
5768: }
5769:
1.72 daniel 5770: *value = val;
5771: return(name);
1.3 veillard 5772: }
5773:
1.50 daniel 5774: /**
5775: * xmlParseStartTag:
5776: * @ctxt: an XML parser context
5777: *
5778: * parse a start of tag either for rule element or
5779: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 5780: *
5781: * [40] STag ::= '<' Name (S Attribute)* S? '>'
5782: *
1.98 daniel 5783: * [ WFC: Unique Att Spec ]
5784: * No attribute name may appear more than once in the same start-tag or
5785: * empty-element tag.
5786: *
1.29 daniel 5787: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5788: *
1.98 daniel 5789: * [ WFC: Unique Att Spec ]
5790: * No attribute name may appear more than once in the same start-tag or
5791: * empty-element tag.
5792: *
1.29 daniel 5793: * With namespace:
5794: *
5795: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5796: *
5797: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 5798: *
1.192 daniel 5799: * Returns the element name parsed
1.2 veillard 5800: */
5801:
1.123 daniel 5802: xmlChar *
1.69 daniel 5803: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5804: xmlChar *name;
5805: xmlChar *attname;
5806: xmlChar *attvalue;
5807: const xmlChar **atts = NULL;
1.72 daniel 5808: int nbatts = 0;
5809: int maxatts = 0;
5810: int i;
1.2 veillard 5811:
1.152 daniel 5812: if (RAW != '<') return(NULL);
1.40 daniel 5813: NEXT;
1.3 veillard 5814:
1.72 daniel 5815: name = xmlParseName(ctxt);
1.59 daniel 5816: if (name == NULL) {
1.230 veillard 5817: ctxt->errNo = XML_ERR_NAME_REQUIRED;
1.59 daniel 5818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5819: ctxt->sax->error(ctxt->userData,
1.59 daniel 5820: "xmlParseStartTag: invalid element name\n");
5821: ctxt->wellFormed = 0;
1.180 daniel 5822: ctxt->disableSAX = 1;
1.83 daniel 5823: return(NULL);
1.50 daniel 5824: }
5825:
5826: /*
1.3 veillard 5827: * Now parse the attributes, it ends up with the ending
5828: *
5829: * (S Attribute)* S?
5830: */
1.42 daniel 5831: SKIP_BLANKS;
1.91 daniel 5832: GROW;
1.168 daniel 5833:
1.153 daniel 5834: while ((IS_CHAR(RAW)) &&
1.152 daniel 5835: (RAW != '>') &&
5836: ((RAW != '/') || (NXT(1) != '>'))) {
1.123 daniel 5837: const xmlChar *q = CUR_PTR;
1.91 daniel 5838: int cons = ctxt->input->consumed;
1.29 daniel 5839:
1.72 daniel 5840: attname = xmlParseAttribute(ctxt, &attvalue);
5841: if ((attname != NULL) && (attvalue != NULL)) {
5842: /*
1.98 daniel 5843: * [ WFC: Unique Att Spec ]
5844: * No attribute name may appear more than once in the same
5845: * start-tag or empty-element tag.
1.72 daniel 5846: */
5847: for (i = 0; i < nbatts;i += 2) {
1.236 veillard 5848: if (xmlStrEqual(atts[i], attname)) {
1.230 veillard 5849: ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
1.72 daniel 5850: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 5851: ctxt->sax->error(ctxt->userData,
5852: "Attribute %s redefined\n",
5853: attname);
1.72 daniel 5854: ctxt->wellFormed = 0;
1.180 daniel 5855: ctxt->disableSAX = 1;
1.119 daniel 5856: xmlFree(attname);
5857: xmlFree(attvalue);
1.98 daniel 5858: goto failed;
1.72 daniel 5859: }
5860: }
5861:
5862: /*
5863: * Add the pair to atts
5864: */
5865: if (atts == NULL) {
5866: maxatts = 10;
1.123 daniel 5867: atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
1.72 daniel 5868: if (atts == NULL) {
1.86 daniel 5869: fprintf(stderr, "malloc of %ld byte failed\n",
1.123 daniel 5870: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5871: return(NULL);
1.72 daniel 5872: }
1.127 daniel 5873: } else if (nbatts + 4 > maxatts) {
1.72 daniel 5874: maxatts *= 2;
1.233 veillard 5875: atts = (const xmlChar **) xmlRealloc((void *) atts,
5876: maxatts * sizeof(xmlChar *));
1.72 daniel 5877: if (atts == NULL) {
1.86 daniel 5878: fprintf(stderr, "realloc of %ld byte failed\n",
1.123 daniel 5879: maxatts * (long)sizeof(xmlChar *));
1.83 daniel 5880: return(NULL);
1.72 daniel 5881: }
5882: }
5883: atts[nbatts++] = attname;
5884: atts[nbatts++] = attvalue;
5885: atts[nbatts] = NULL;
5886: atts[nbatts + 1] = NULL;
1.176 daniel 5887: } else {
5888: if (attname != NULL)
5889: xmlFree(attname);
5890: if (attvalue != NULL)
5891: xmlFree(attvalue);
1.72 daniel 5892: }
5893:
1.116 daniel 5894: failed:
1.168 daniel 5895:
5896: if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5897: break;
5898: if (!IS_BLANK(RAW)) {
1.230 veillard 5899: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.168 daniel 5900: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5901: ctxt->sax->error(ctxt->userData,
5902: "attributes construct error\n");
5903: ctxt->wellFormed = 0;
1.180 daniel 5904: ctxt->disableSAX = 1;
1.168 daniel 5905: }
1.42 daniel 5906: SKIP_BLANKS;
1.91 daniel 5907: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.230 veillard 5908: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 5909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5910: ctxt->sax->error(ctxt->userData,
1.31 daniel 5911: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 5912: ctxt->wellFormed = 0;
1.180 daniel 5913: ctxt->disableSAX = 1;
1.29 daniel 5914: break;
1.3 veillard 5915: }
1.91 daniel 5916: GROW;
1.3 veillard 5917: }
5918:
1.43 daniel 5919: /*
1.72 daniel 5920: * SAX: Start of Element !
1.43 daniel 5921: */
1.171 daniel 5922: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5923: (!ctxt->disableSAX))
1.74 daniel 5924: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 5925:
1.72 daniel 5926: if (atts != NULL) {
1.123 daniel 5927: for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
1.233 veillard 5928: xmlFree((void *) atts);
1.72 daniel 5929: }
1.83 daniel 5930: return(name);
1.3 veillard 5931: }
5932:
1.50 daniel 5933: /**
5934: * xmlParseEndTag:
5935: * @ctxt: an XML parser context
5936: *
5937: * parse an end of tag
1.27 daniel 5938: *
5939: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 5940: *
5941: * With namespace
5942: *
1.72 daniel 5943: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 5944: */
5945:
1.55 daniel 5946: void
1.140 daniel 5947: xmlParseEndTag(xmlParserCtxtPtr ctxt) {
1.123 daniel 5948: xmlChar *name;
1.140 daniel 5949: xmlChar *oldname;
1.7 veillard 5950:
1.91 daniel 5951: GROW;
1.152 daniel 5952: if ((RAW != '<') || (NXT(1) != '/')) {
1.230 veillard 5953: ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
1.55 daniel 5954: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5955: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 5956: ctxt->wellFormed = 0;
1.180 daniel 5957: ctxt->disableSAX = 1;
1.27 daniel 5958: return;
5959: }
1.40 daniel 5960: SKIP(2);
1.7 veillard 5961:
1.72 daniel 5962: name = xmlParseName(ctxt);
1.7 veillard 5963:
5964: /*
5965: * We should definitely be at the ending "S? '>'" part
5966: */
1.91 daniel 5967: GROW;
1.42 daniel 5968: SKIP_BLANKS;
1.153 daniel 5969: if ((!IS_CHAR(RAW)) || (RAW != '>')) {
1.230 veillard 5970: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 5971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5972: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 5973: ctxt->wellFormed = 0;
1.180 daniel 5974: ctxt->disableSAX = 1;
1.7 veillard 5975: } else
1.40 daniel 5976: NEXT;
1.7 veillard 5977:
1.72 daniel 5978: /*
1.98 daniel 5979: * [ WFC: Element Type Match ]
5980: * The Name in an element's end-tag must match the element type in the
5981: * start-tag.
5982: *
1.83 daniel 5983: */
1.147 daniel 5984: if ((name == NULL) || (ctxt->name == NULL) ||
1.236 veillard 5985: (!xmlStrEqual(name, ctxt->name))) {
1.230 veillard 5986: ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
1.147 daniel 5987: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
5988: if ((name != NULL) && (ctxt->name != NULL)) {
5989: ctxt->sax->error(ctxt->userData,
5990: "Opening and ending tag mismatch: %s and %s\n",
5991: ctxt->name, name);
5992: } else if (ctxt->name != NULL) {
5993: ctxt->sax->error(ctxt->userData,
5994: "Ending tag eror for: %s\n", ctxt->name);
5995: } else {
5996: ctxt->sax->error(ctxt->userData,
5997: "Ending tag error: internal error ???\n");
5998: }
1.122 daniel 5999:
1.147 daniel 6000: }
1.83 daniel 6001: ctxt->wellFormed = 0;
1.180 daniel 6002: ctxt->disableSAX = 1;
1.83 daniel 6003: }
6004:
6005: /*
1.72 daniel 6006: * SAX: End of Tag
6007: */
1.171 daniel 6008: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6009: (!ctxt->disableSAX))
1.74 daniel 6010: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 6011:
6012: if (name != NULL)
1.119 daniel 6013: xmlFree(name);
1.140 daniel 6014: oldname = namePop(ctxt);
1.176 daniel 6015: spacePop(ctxt);
1.140 daniel 6016: if (oldname != NULL) {
6017: #ifdef DEBUG_STACK
6018: fprintf(stderr,"Close: popped %s\n", oldname);
6019: #endif
6020: xmlFree(oldname);
6021: }
1.7 veillard 6022: return;
6023: }
6024:
1.50 daniel 6025: /**
6026: * xmlParseCDSect:
6027: * @ctxt: an XML parser context
6028: *
6029: * Parse escaped pure raw content.
1.29 daniel 6030: *
6031: * [18] CDSect ::= CDStart CData CDEnd
6032: *
6033: * [19] CDStart ::= '<![CDATA['
6034: *
6035: * [20] Data ::= (Char* - (Char* ']]>' Char*))
6036: *
6037: * [21] CDEnd ::= ']]>'
1.3 veillard 6038: */
1.55 daniel 6039: void
6040: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.135 daniel 6041: xmlChar *buf = NULL;
6042: int len = 0;
1.140 daniel 6043: int size = XML_PARSER_BUFFER_SIZE;
1.152 daniel 6044: int r, rl;
6045: int s, sl;
6046: int cur, l;
1.234 veillard 6047: int count = 0;
1.3 veillard 6048:
1.106 daniel 6049: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 6050: (NXT(2) == '[') && (NXT(3) == 'C') &&
6051: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6052: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6053: (NXT(8) == '[')) {
6054: SKIP(9);
1.29 daniel 6055: } else
1.45 daniel 6056: return;
1.109 daniel 6057:
6058: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.152 daniel 6059: r = CUR_CHAR(rl);
6060: if (!IS_CHAR(r)) {
1.230 veillard 6061: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6062: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6063: ctxt->sax->error(ctxt->userData,
1.135 daniel 6064: "CData section not finished\n");
1.59 daniel 6065: ctxt->wellFormed = 0;
1.180 daniel 6066: ctxt->disableSAX = 1;
1.109 daniel 6067: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6068: return;
1.3 veillard 6069: }
1.152 daniel 6070: NEXTL(rl);
6071: s = CUR_CHAR(sl);
6072: if (!IS_CHAR(s)) {
1.230 veillard 6073: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6075: ctxt->sax->error(ctxt->userData,
1.135 daniel 6076: "CData section not finished\n");
1.59 daniel 6077: ctxt->wellFormed = 0;
1.180 daniel 6078: ctxt->disableSAX = 1;
1.109 daniel 6079: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 6080: return;
1.3 veillard 6081: }
1.152 daniel 6082: NEXTL(sl);
6083: cur = CUR_CHAR(l);
1.135 daniel 6084: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6085: if (buf == NULL) {
6086: fprintf(stderr, "malloc of %d byte failed\n", size);
6087: return;
6088: }
1.108 veillard 6089: while (IS_CHAR(cur) &&
1.110 daniel 6090: ((r != ']') || (s != ']') || (cur != '>'))) {
1.152 daniel 6091: if (len + 5 >= size) {
1.135 daniel 6092: size *= 2;
1.204 veillard 6093: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6094: if (buf == NULL) {
6095: fprintf(stderr, "realloc of %d byte failed\n", size);
6096: return;
6097: }
6098: }
1.152 daniel 6099: COPY_BUF(rl,buf,len,r);
1.110 daniel 6100: r = s;
1.152 daniel 6101: rl = sl;
1.110 daniel 6102: s = cur;
1.152 daniel 6103: sl = l;
1.234 veillard 6104: count++;
6105: if (count > 50) {
6106: GROW;
6107: count = 0;
6108: }
1.152 daniel 6109: NEXTL(l);
6110: cur = CUR_CHAR(l);
1.3 veillard 6111: }
1.135 daniel 6112: buf[len] = 0;
1.109 daniel 6113: ctxt->instate = XML_PARSER_CONTENT;
1.152 daniel 6114: if (cur != '>') {
1.230 veillard 6115: ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
1.55 daniel 6116: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6117: ctxt->sax->error(ctxt->userData,
1.135 daniel 6118: "CData section not finished\n%.50s\n", buf);
1.59 daniel 6119: ctxt->wellFormed = 0;
1.180 daniel 6120: ctxt->disableSAX = 1;
1.135 daniel 6121: xmlFree(buf);
1.45 daniel 6122: return;
1.3 veillard 6123: }
1.152 daniel 6124: NEXTL(l);
1.16 daniel 6125:
1.45 daniel 6126: /*
1.135 daniel 6127: * Ok the buffer is to be consumed as cdata.
1.45 daniel 6128: */
1.171 daniel 6129: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.107 daniel 6130: if (ctxt->sax->cdataBlock != NULL)
1.135 daniel 6131: ctxt->sax->cdataBlock(ctxt->userData, buf, len);
1.45 daniel 6132: }
1.135 daniel 6133: xmlFree(buf);
1.2 veillard 6134: }
6135:
1.50 daniel 6136: /**
6137: * xmlParseContent:
6138: * @ctxt: an XML parser context
6139: *
6140: * Parse a content:
1.2 veillard 6141: *
1.27 daniel 6142: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 6143: */
6144:
1.55 daniel 6145: void
6146: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 6147: GROW;
1.176 daniel 6148: while (((RAW != 0) || (ctxt->token != 0)) &&
6149: ((RAW != '<') || (NXT(1) != '/'))) {
1.123 daniel 6150: const xmlChar *test = CUR_PTR;
1.91 daniel 6151: int cons = ctxt->input->consumed;
1.123 daniel 6152: xmlChar tok = ctxt->token;
1.27 daniel 6153:
6154: /*
1.152 daniel 6155: * Handle possible processed charrefs.
6156: */
6157: if (ctxt->token != 0) {
6158: xmlParseCharData(ctxt, 0);
6159: }
6160: /*
1.27 daniel 6161: * First case : a Processing Instruction.
6162: */
1.152 daniel 6163: else if ((RAW == '<') && (NXT(1) == '?')) {
1.27 daniel 6164: xmlParsePI(ctxt);
6165: }
1.72 daniel 6166:
1.27 daniel 6167: /*
6168: * Second case : a CDSection
6169: */
1.152 daniel 6170: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6171: (NXT(2) == '[') && (NXT(3) == 'C') &&
6172: (NXT(4) == 'D') && (NXT(5) == 'A') &&
6173: (NXT(6) == 'T') && (NXT(7) == 'A') &&
6174: (NXT(8) == '[')) {
1.45 daniel 6175: xmlParseCDSect(ctxt);
1.27 daniel 6176: }
1.72 daniel 6177:
1.27 daniel 6178: /*
6179: * Third case : a comment
6180: */
1.152 daniel 6181: else if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6182: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 6183: xmlParseComment(ctxt);
1.97 daniel 6184: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 6185: }
1.72 daniel 6186:
1.27 daniel 6187: /*
6188: * Fourth case : a sub-element.
6189: */
1.152 daniel 6190: else if (RAW == '<') {
1.72 daniel 6191: xmlParseElement(ctxt);
1.45 daniel 6192: }
1.72 daniel 6193:
1.45 daniel 6194: /*
1.50 daniel 6195: * Fifth case : a reference. If if has not been resolved,
6196: * parsing returns it's Name, create the node
1.45 daniel 6197: */
1.97 daniel 6198:
1.152 daniel 6199: else if (RAW == '&') {
1.77 daniel 6200: xmlParseReference(ctxt);
1.27 daniel 6201: }
1.72 daniel 6202:
1.27 daniel 6203: /*
6204: * Last case, text. Note that References are handled directly.
6205: */
6206: else {
1.45 daniel 6207: xmlParseCharData(ctxt, 0);
1.3 veillard 6208: }
1.14 veillard 6209:
1.91 daniel 6210: GROW;
1.14 veillard 6211: /*
1.45 daniel 6212: * Pop-up of finished entities.
1.14 veillard 6213: */
1.152 daniel 6214: while ((RAW == 0) && (ctxt->inputNr > 1))
1.69 daniel 6215: xmlPopInput(ctxt);
1.135 daniel 6216: SHRINK;
1.45 daniel 6217:
1.113 daniel 6218: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6219: (tok == ctxt->token)) {
1.230 veillard 6220: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.55 daniel 6221: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6222: ctxt->sax->error(ctxt->userData,
1.59 daniel 6223: "detected an error in element content\n");
6224: ctxt->wellFormed = 0;
1.180 daniel 6225: ctxt->disableSAX = 1;
1.224 veillard 6226: ctxt->instate = XML_PARSER_EOF;
1.29 daniel 6227: break;
6228: }
1.3 veillard 6229: }
1.2 veillard 6230: }
6231:
1.50 daniel 6232: /**
6233: * xmlParseElement:
6234: * @ctxt: an XML parser context
6235: *
6236: * parse an XML element, this is highly recursive
1.26 daniel 6237: *
6238: * [39] element ::= EmptyElemTag | STag content ETag
6239: *
1.98 daniel 6240: * [ WFC: Element Type Match ]
6241: * The Name in an element's end-tag must match the element type in the
6242: * start-tag.
6243: *
6244: * [ VC: Element Valid ]
1.117 daniel 6245: * An element is valid if there is a declaration matching elementdecl
1.99 daniel 6246: * where the Name matches the element type and one of the following holds:
6247: * - The declaration matches EMPTY and the element has no content.
6248: * - The declaration matches children and the sequence of child elements
6249: * belongs to the language generated by the regular expression in the
6250: * content model, with optional white space (characters matching the
6251: * nonterminal S) between each pair of child elements.
6252: * - The declaration matches Mixed and the content consists of character
6253: * data and child elements whose types match names in the content model.
6254: * - The declaration matches ANY, and the types of any child elements have
6255: * been declared.
1.2 veillard 6256: */
1.26 daniel 6257:
1.72 daniel 6258: void
1.69 daniel 6259: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.123 daniel 6260: const xmlChar *openTag = CUR_PTR;
6261: xmlChar *name;
1.140 daniel 6262: xmlChar *oldname;
1.32 daniel 6263: xmlParserNodeInfo node_info;
1.118 daniel 6264: xmlNodePtr ret;
1.2 veillard 6265:
1.32 daniel 6266: /* Capture start position */
1.118 daniel 6267: if (ctxt->record_info) {
6268: node_info.begin_pos = ctxt->input->consumed +
6269: (CUR_PTR - ctxt->input->base);
6270: node_info.begin_line = ctxt->input->line;
6271: }
1.32 daniel 6272:
1.176 daniel 6273: if (ctxt->spaceNr == 0)
6274: spacePush(ctxt, -1);
6275: else
6276: spacePush(ctxt, *ctxt->space);
6277:
1.83 daniel 6278: name = xmlParseStartTag(ctxt);
6279: if (name == NULL) {
1.176 daniel 6280: spacePop(ctxt);
1.83 daniel 6281: return;
6282: }
1.140 daniel 6283: namePush(ctxt, name);
1.118 daniel 6284: ret = ctxt->node;
1.2 veillard 6285:
6286: /*
1.99 daniel 6287: * [ VC: Root Element Type ]
6288: * The Name in the document type declaration must match the element
6289: * type of the root element.
6290: */
1.105 daniel 6291: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 6292: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.102 daniel 6293: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 6294:
6295: /*
1.2 veillard 6296: * Check for an Empty Element.
6297: */
1.152 daniel 6298: if ((RAW == '/') && (NXT(1) == '>')) {
1.40 daniel 6299: SKIP(2);
1.171 daniel 6300: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6301: (!ctxt->disableSAX))
1.83 daniel 6302: ctxt->sax->endElement(ctxt->userData, name);
1.140 daniel 6303: oldname = namePop(ctxt);
1.176 daniel 6304: spacePop(ctxt);
1.140 daniel 6305: if (oldname != NULL) {
6306: #ifdef DEBUG_STACK
6307: fprintf(stderr,"Close: popped %s\n", oldname);
6308: #endif
6309: xmlFree(oldname);
1.211 veillard 6310: }
6311: if ( ret != NULL && ctxt->record_info ) {
6312: node_info.end_pos = ctxt->input->consumed +
6313: (CUR_PTR - ctxt->input->base);
6314: node_info.end_line = ctxt->input->line;
6315: node_info.node = ret;
6316: xmlParserAddNodeInfo(ctxt, &node_info);
1.140 daniel 6317: }
1.72 daniel 6318: return;
1.2 veillard 6319: }
1.152 daniel 6320: if (RAW == '>') {
1.91 daniel 6321: NEXT;
6322: } else {
1.230 veillard 6323: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.55 daniel 6324: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6325: ctxt->sax->error(ctxt->userData,
6326: "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 6327: openTag);
1.59 daniel 6328: ctxt->wellFormed = 0;
1.180 daniel 6329: ctxt->disableSAX = 1;
1.45 daniel 6330:
6331: /*
6332: * end of parsing of this node.
6333: */
6334: nodePop(ctxt);
1.140 daniel 6335: oldname = namePop(ctxt);
1.176 daniel 6336: spacePop(ctxt);
1.140 daniel 6337: if (oldname != NULL) {
6338: #ifdef DEBUG_STACK
6339: fprintf(stderr,"Close: popped %s\n", oldname);
6340: #endif
6341: xmlFree(oldname);
6342: }
1.118 daniel 6343:
6344: /*
6345: * Capture end position and add node
6346: */
6347: if ( ret != NULL && ctxt->record_info ) {
6348: node_info.end_pos = ctxt->input->consumed +
6349: (CUR_PTR - ctxt->input->base);
6350: node_info.end_line = ctxt->input->line;
6351: node_info.node = ret;
6352: xmlParserAddNodeInfo(ctxt, &node_info);
6353: }
1.72 daniel 6354: return;
1.2 veillard 6355: }
6356:
6357: /*
6358: * Parse the content of the element:
6359: */
1.45 daniel 6360: xmlParseContent(ctxt);
1.153 daniel 6361: if (!IS_CHAR(RAW)) {
1.230 veillard 6362: ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
1.55 daniel 6363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6364: ctxt->sax->error(ctxt->userData,
1.57 daniel 6365: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 6366: ctxt->wellFormed = 0;
1.180 daniel 6367: ctxt->disableSAX = 1;
1.45 daniel 6368:
6369: /*
6370: * end of parsing of this node.
6371: */
6372: nodePop(ctxt);
1.140 daniel 6373: oldname = namePop(ctxt);
1.176 daniel 6374: spacePop(ctxt);
1.140 daniel 6375: if (oldname != NULL) {
6376: #ifdef DEBUG_STACK
6377: fprintf(stderr,"Close: popped %s\n", oldname);
6378: #endif
6379: xmlFree(oldname);
6380: }
1.72 daniel 6381: return;
1.2 veillard 6382: }
6383:
6384: /*
1.27 daniel 6385: * parse the end of tag: '</' should be here.
1.2 veillard 6386: */
1.140 daniel 6387: xmlParseEndTag(ctxt);
1.118 daniel 6388:
6389: /*
6390: * Capture end position and add node
6391: */
6392: if ( ret != NULL && ctxt->record_info ) {
6393: node_info.end_pos = ctxt->input->consumed +
6394: (CUR_PTR - ctxt->input->base);
6395: node_info.end_line = ctxt->input->line;
6396: node_info.node = ret;
6397: xmlParserAddNodeInfo(ctxt, &node_info);
6398: }
1.2 veillard 6399: }
6400:
1.50 daniel 6401: /**
6402: * xmlParseVersionNum:
6403: * @ctxt: an XML parser context
6404: *
6405: * parse the XML version value.
1.29 daniel 6406: *
6407: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 6408: *
6409: * Returns the string giving the XML version number, or NULL
1.29 daniel 6410: */
1.123 daniel 6411: xmlChar *
1.55 daniel 6412: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.135 daniel 6413: xmlChar *buf = NULL;
6414: int len = 0;
6415: int size = 10;
6416: xmlChar cur;
1.29 daniel 6417:
1.135 daniel 6418: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6419: if (buf == NULL) {
6420: fprintf(stderr, "malloc of %d byte failed\n", size);
6421: return(NULL);
6422: }
6423: cur = CUR;
1.152 daniel 6424: while (((cur >= 'a') && (cur <= 'z')) ||
6425: ((cur >= 'A') && (cur <= 'Z')) ||
6426: ((cur >= '0') && (cur <= '9')) ||
6427: (cur == '_') || (cur == '.') ||
6428: (cur == ':') || (cur == '-')) {
1.135 daniel 6429: if (len + 1 >= size) {
6430: size *= 2;
1.204 veillard 6431: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6432: if (buf == NULL) {
6433: fprintf(stderr, "realloc of %d byte failed\n", size);
6434: return(NULL);
6435: }
6436: }
6437: buf[len++] = cur;
6438: NEXT;
6439: cur=CUR;
6440: }
6441: buf[len] = 0;
6442: return(buf);
1.29 daniel 6443: }
6444:
1.50 daniel 6445: /**
6446: * xmlParseVersionInfo:
6447: * @ctxt: an XML parser context
6448: *
6449: * parse the XML version.
1.29 daniel 6450: *
6451: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6452: *
6453: * [25] Eq ::= S? '=' S?
1.50 daniel 6454: *
1.68 daniel 6455: * Returns the version string, e.g. "1.0"
1.29 daniel 6456: */
6457:
1.123 daniel 6458: xmlChar *
1.55 daniel 6459: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.123 daniel 6460: xmlChar *version = NULL;
6461: const xmlChar *q;
1.29 daniel 6462:
1.152 daniel 6463: if ((RAW == 'v') && (NXT(1) == 'e') &&
1.40 daniel 6464: (NXT(2) == 'r') && (NXT(3) == 's') &&
6465: (NXT(4) == 'i') && (NXT(5) == 'o') &&
6466: (NXT(6) == 'n')) {
6467: SKIP(7);
1.42 daniel 6468: SKIP_BLANKS;
1.152 daniel 6469: if (RAW != '=') {
1.230 veillard 6470: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6471: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6472: ctxt->sax->error(ctxt->userData,
6473: "xmlParseVersionInfo : expected '='\n");
1.59 daniel 6474: ctxt->wellFormed = 0;
1.180 daniel 6475: ctxt->disableSAX = 1;
1.31 daniel 6476: return(NULL);
6477: }
1.40 daniel 6478: NEXT;
1.42 daniel 6479: SKIP_BLANKS;
1.152 daniel 6480: if (RAW == '"') {
1.40 daniel 6481: NEXT;
6482: q = CUR_PTR;
1.29 daniel 6483: version = xmlParseVersionNum(ctxt);
1.152 daniel 6484: if (RAW != '"') {
1.230 veillard 6485: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6487: ctxt->sax->error(ctxt->userData,
6488: "String not closed\n%.50s\n", q);
1.59 daniel 6489: ctxt->wellFormed = 0;
1.180 daniel 6490: ctxt->disableSAX = 1;
1.55 daniel 6491: } else
1.40 daniel 6492: NEXT;
1.152 daniel 6493: } else if (RAW == '\''){
1.40 daniel 6494: NEXT;
6495: q = CUR_PTR;
1.29 daniel 6496: version = xmlParseVersionNum(ctxt);
1.152 daniel 6497: if (RAW != '\'') {
1.230 veillard 6498: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6500: ctxt->sax->error(ctxt->userData,
6501: "String not closed\n%.50s\n", q);
1.59 daniel 6502: ctxt->wellFormed = 0;
1.180 daniel 6503: ctxt->disableSAX = 1;
1.55 daniel 6504: } else
1.40 daniel 6505: NEXT;
1.31 daniel 6506: } else {
1.230 veillard 6507: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6508: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6509: ctxt->sax->error(ctxt->userData,
1.59 daniel 6510: "xmlParseVersionInfo : expected ' or \"\n");
1.122 daniel 6511: ctxt->wellFormed = 0;
1.180 daniel 6512: ctxt->disableSAX = 1;
1.29 daniel 6513: }
6514: }
6515: return(version);
6516: }
6517:
1.50 daniel 6518: /**
6519: * xmlParseEncName:
6520: * @ctxt: an XML parser context
6521: *
6522: * parse the XML encoding name
1.29 daniel 6523: *
6524: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 6525: *
1.68 daniel 6526: * Returns the encoding name value or NULL
1.29 daniel 6527: */
1.123 daniel 6528: xmlChar *
1.55 daniel 6529: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.135 daniel 6530: xmlChar *buf = NULL;
6531: int len = 0;
6532: int size = 10;
6533: xmlChar cur;
1.29 daniel 6534:
1.135 daniel 6535: cur = CUR;
6536: if (((cur >= 'a') && (cur <= 'z')) ||
6537: ((cur >= 'A') && (cur <= 'Z'))) {
6538: buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6539: if (buf == NULL) {
6540: fprintf(stderr, "malloc of %d byte failed\n", size);
6541: return(NULL);
6542: }
6543:
6544: buf[len++] = cur;
1.40 daniel 6545: NEXT;
1.135 daniel 6546: cur = CUR;
1.152 daniel 6547: while (((cur >= 'a') && (cur <= 'z')) ||
6548: ((cur >= 'A') && (cur <= 'Z')) ||
6549: ((cur >= '0') && (cur <= '9')) ||
6550: (cur == '.') || (cur == '_') ||
6551: (cur == '-')) {
1.135 daniel 6552: if (len + 1 >= size) {
6553: size *= 2;
1.204 veillard 6554: buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1.135 daniel 6555: if (buf == NULL) {
6556: fprintf(stderr, "realloc of %d byte failed\n", size);
6557: return(NULL);
6558: }
6559: }
6560: buf[len++] = cur;
6561: NEXT;
6562: cur = CUR;
6563: if (cur == 0) {
6564: SHRINK;
6565: GROW;
6566: cur = CUR;
6567: }
6568: }
6569: buf[len] = 0;
1.29 daniel 6570: } else {
1.230 veillard 6571: ctxt->errNo = XML_ERR_ENCODING_NAME;
1.55 daniel 6572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6573: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 6574: ctxt->wellFormed = 0;
1.180 daniel 6575: ctxt->disableSAX = 1;
1.29 daniel 6576: }
1.135 daniel 6577: return(buf);
1.29 daniel 6578: }
6579:
1.50 daniel 6580: /**
6581: * xmlParseEncodingDecl:
6582: * @ctxt: an XML parser context
6583: *
6584: * parse the XML encoding declaration
1.29 daniel 6585: *
6586: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 6587: *
1.229 veillard 6588: * this setups the conversion filters.
1.50 daniel 6589: *
1.68 daniel 6590: * Returns the encoding value or NULL
1.29 daniel 6591: */
6592:
1.123 daniel 6593: xmlChar *
1.55 daniel 6594: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6595: xmlChar *encoding = NULL;
6596: const xmlChar *q;
1.29 daniel 6597:
1.42 daniel 6598: SKIP_BLANKS;
1.152 daniel 6599: if ((RAW == 'e') && (NXT(1) == 'n') &&
1.40 daniel 6600: (NXT(2) == 'c') && (NXT(3) == 'o') &&
6601: (NXT(4) == 'd') && (NXT(5) == 'i') &&
6602: (NXT(6) == 'n') && (NXT(7) == 'g')) {
6603: SKIP(8);
1.42 daniel 6604: SKIP_BLANKS;
1.152 daniel 6605: if (RAW != '=') {
1.230 veillard 6606: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6608: ctxt->sax->error(ctxt->userData,
6609: "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 6610: ctxt->wellFormed = 0;
1.180 daniel 6611: ctxt->disableSAX = 1;
1.31 daniel 6612: return(NULL);
6613: }
1.40 daniel 6614: NEXT;
1.42 daniel 6615: SKIP_BLANKS;
1.152 daniel 6616: if (RAW == '"') {
1.40 daniel 6617: NEXT;
6618: q = CUR_PTR;
1.29 daniel 6619: encoding = xmlParseEncName(ctxt);
1.152 daniel 6620: if (RAW != '"') {
1.230 veillard 6621: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6623: ctxt->sax->error(ctxt->userData,
6624: "String not closed\n%.50s\n", q);
1.59 daniel 6625: ctxt->wellFormed = 0;
1.180 daniel 6626: ctxt->disableSAX = 1;
1.55 daniel 6627: } else
1.40 daniel 6628: NEXT;
1.152 daniel 6629: } else if (RAW == '\''){
1.40 daniel 6630: NEXT;
6631: q = CUR_PTR;
1.29 daniel 6632: encoding = xmlParseEncName(ctxt);
1.152 daniel 6633: if (RAW != '\'') {
1.230 veillard 6634: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6635: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6636: ctxt->sax->error(ctxt->userData,
6637: "String not closed\n%.50s\n", q);
1.59 daniel 6638: ctxt->wellFormed = 0;
1.180 daniel 6639: ctxt->disableSAX = 1;
1.55 daniel 6640: } else
1.40 daniel 6641: NEXT;
1.152 daniel 6642: } else if (RAW == '"'){
1.230 veillard 6643: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6644: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6645: ctxt->sax->error(ctxt->userData,
1.59 daniel 6646: "xmlParseEncodingDecl : expected ' or \"\n");
6647: ctxt->wellFormed = 0;
1.180 daniel 6648: ctxt->disableSAX = 1;
1.29 daniel 6649: }
1.193 daniel 6650: if (encoding != NULL) {
6651: xmlCharEncoding enc;
6652: xmlCharEncodingHandlerPtr handler;
6653:
1.195 daniel 6654: if (ctxt->input->encoding != NULL)
6655: xmlFree((xmlChar *) ctxt->input->encoding);
6656: ctxt->input->encoding = encoding;
6657:
1.193 daniel 6658: enc = xmlParseCharEncoding((const char *) encoding);
6659: /*
6660: * registered set of known encodings
6661: */
6662: if (enc != XML_CHAR_ENCODING_ERROR) {
6663: xmlSwitchEncoding(ctxt, enc);
6664: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6665: xmlFree(encoding);
6666: return(NULL);
6667: }
6668: } else {
6669: /*
6670: * fallback for unknown encodings
6671: */
6672: handler = xmlFindCharEncodingHandler((const char *) encoding);
6673: if (handler != NULL) {
6674: xmlSwitchToEncoding(ctxt, handler);
6675: } else {
6676: ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
1.208 veillard 6677: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6678: ctxt->sax->error(ctxt->userData,
6679: "Unsupported encoding %s\n", encoding);
1.193 daniel 6680: return(NULL);
6681: }
6682: }
6683: }
1.29 daniel 6684: }
6685: return(encoding);
6686: }
6687:
1.50 daniel 6688: /**
6689: * xmlParseSDDecl:
6690: * @ctxt: an XML parser context
6691: *
6692: * parse the XML standalone declaration
1.29 daniel 6693: *
6694: * [32] SDDecl ::= S 'standalone' Eq
6695: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 6696: *
6697: * [ VC: Standalone Document Declaration ]
6698: * TODO The standalone document declaration must have the value "no"
6699: * if any external markup declarations contain declarations of:
6700: * - attributes with default values, if elements to which these
6701: * attributes apply appear in the document without specifications
6702: * of values for these attributes, or
6703: * - entities (other than amp, lt, gt, apos, quot), if references
6704: * to those entities appear in the document, or
6705: * - attributes with values subject to normalization, where the
6706: * attribute appears in the document with a value which will change
6707: * as a result of normalization, or
6708: * - element types with element content, if white space occurs directly
6709: * within any instance of those types.
1.68 daniel 6710: *
6711: * Returns 1 if standalone, 0 otherwise
1.29 daniel 6712: */
6713:
1.55 daniel 6714: int
6715: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 6716: int standalone = -1;
6717:
1.42 daniel 6718: SKIP_BLANKS;
1.152 daniel 6719: if ((RAW == 's') && (NXT(1) == 't') &&
1.40 daniel 6720: (NXT(2) == 'a') && (NXT(3) == 'n') &&
6721: (NXT(4) == 'd') && (NXT(5) == 'a') &&
6722: (NXT(6) == 'l') && (NXT(7) == 'o') &&
6723: (NXT(8) == 'n') && (NXT(9) == 'e')) {
6724: SKIP(10);
1.81 daniel 6725: SKIP_BLANKS;
1.152 daniel 6726: if (RAW != '=') {
1.230 veillard 6727: ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
1.55 daniel 6728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6729: ctxt->sax->error(ctxt->userData,
1.59 daniel 6730: "XML standalone declaration : expected '='\n");
6731: ctxt->wellFormed = 0;
1.180 daniel 6732: ctxt->disableSAX = 1;
1.32 daniel 6733: return(standalone);
6734: }
1.40 daniel 6735: NEXT;
1.42 daniel 6736: SKIP_BLANKS;
1.152 daniel 6737: if (RAW == '\''){
1.40 daniel 6738: NEXT;
1.152 daniel 6739: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6740: standalone = 0;
1.40 daniel 6741: SKIP(2);
1.152 daniel 6742: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6743: (NXT(2) == 's')) {
1.29 daniel 6744: standalone = 1;
1.40 daniel 6745: SKIP(3);
1.29 daniel 6746: } else {
1.230 veillard 6747: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6748: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6749: ctxt->sax->error(ctxt->userData,
6750: "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 6751: ctxt->wellFormed = 0;
1.180 daniel 6752: ctxt->disableSAX = 1;
1.29 daniel 6753: }
1.152 daniel 6754: if (RAW != '\'') {
1.230 veillard 6755: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6756: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6757: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6758: ctxt->wellFormed = 0;
1.180 daniel 6759: ctxt->disableSAX = 1;
1.55 daniel 6760: } else
1.40 daniel 6761: NEXT;
1.152 daniel 6762: } else if (RAW == '"'){
1.40 daniel 6763: NEXT;
1.152 daniel 6764: if ((RAW == 'n') && (NXT(1) == 'o')) {
1.29 daniel 6765: standalone = 0;
1.40 daniel 6766: SKIP(2);
1.152 daniel 6767: } else if ((RAW == 'y') && (NXT(1) == 'e') &&
1.40 daniel 6768: (NXT(2) == 's')) {
1.29 daniel 6769: standalone = 1;
1.40 daniel 6770: SKIP(3);
1.29 daniel 6771: } else {
1.230 veillard 6772: ctxt->errNo = XML_ERR_STANDALONE_VALUE;
1.55 daniel 6773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6774: ctxt->sax->error(ctxt->userData,
1.59 daniel 6775: "standalone accepts only 'yes' or 'no'\n");
6776: ctxt->wellFormed = 0;
1.180 daniel 6777: ctxt->disableSAX = 1;
1.29 daniel 6778: }
1.152 daniel 6779: if (RAW != '"') {
1.230 veillard 6780: ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
1.55 daniel 6781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6782: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 6783: ctxt->wellFormed = 0;
1.180 daniel 6784: ctxt->disableSAX = 1;
1.55 daniel 6785: } else
1.40 daniel 6786: NEXT;
1.37 daniel 6787: } else {
1.230 veillard 6788: ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
1.55 daniel 6789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6790: ctxt->sax->error(ctxt->userData,
6791: "Standalone value not found\n");
1.59 daniel 6792: ctxt->wellFormed = 0;
1.180 daniel 6793: ctxt->disableSAX = 1;
1.37 daniel 6794: }
1.29 daniel 6795: }
6796: return(standalone);
6797: }
6798:
1.50 daniel 6799: /**
6800: * xmlParseXMLDecl:
6801: * @ctxt: an XML parser context
6802: *
6803: * parse an XML declaration header
1.29 daniel 6804: *
6805: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 6806: */
6807:
1.55 daniel 6808: void
6809: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.123 daniel 6810: xmlChar *version;
1.1 veillard 6811:
6812: /*
1.19 daniel 6813: * We know that '<?xml' is here.
1.1 veillard 6814: */
1.40 daniel 6815: SKIP(5);
1.1 veillard 6816:
1.153 daniel 6817: if (!IS_BLANK(RAW)) {
1.230 veillard 6818: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6819: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6820: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 6821: ctxt->wellFormed = 0;
1.180 daniel 6822: ctxt->disableSAX = 1;
1.59 daniel 6823: }
1.42 daniel 6824: SKIP_BLANKS;
1.1 veillard 6825:
6826: /*
1.29 daniel 6827: * We should have the VersionInfo here.
1.1 veillard 6828: */
1.29 daniel 6829: version = xmlParseVersionInfo(ctxt);
6830: if (version == NULL)
1.45 daniel 6831: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 6832: ctxt->version = xmlStrdup(version);
1.119 daniel 6833: xmlFree(version);
1.29 daniel 6834:
6835: /*
6836: * We may have the encoding declaration
6837: */
1.153 daniel 6838: if (!IS_BLANK(RAW)) {
1.152 daniel 6839: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6840: SKIP(2);
6841: return;
6842: }
1.230 veillard 6843: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6845: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6846: ctxt->wellFormed = 0;
1.180 daniel 6847: ctxt->disableSAX = 1;
1.59 daniel 6848: }
1.195 daniel 6849: xmlParseEncodingDecl(ctxt);
1.193 daniel 6850: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6851: /*
6852: * The XML REC instructs us to stop parsing right here
6853: */
6854: return;
6855: }
1.1 veillard 6856:
6857: /*
1.29 daniel 6858: * We may have the standalone status.
1.1 veillard 6859: */
1.164 daniel 6860: if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
1.152 daniel 6861: if ((RAW == '?') && (NXT(1) == '>')) {
1.59 daniel 6862: SKIP(2);
6863: return;
6864: }
1.230 veillard 6865: ctxt->errNo = XML_ERR_SPACE_REQUIRED;
1.59 daniel 6866: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6867: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 6868: ctxt->wellFormed = 0;
1.180 daniel 6869: ctxt->disableSAX = 1;
1.59 daniel 6870: }
6871: SKIP_BLANKS;
1.167 daniel 6872: ctxt->input->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 6873:
1.42 daniel 6874: SKIP_BLANKS;
1.152 daniel 6875: if ((RAW == '?') && (NXT(1) == '>')) {
1.40 daniel 6876: SKIP(2);
1.152 daniel 6877: } else if (RAW == '>') {
1.31 daniel 6878: /* Deprecated old WD ... */
1.230 veillard 6879: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6880: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6881: ctxt->sax->error(ctxt->userData,
6882: "XML declaration must end-up with '?>'\n");
1.59 daniel 6883: ctxt->wellFormed = 0;
1.180 daniel 6884: ctxt->disableSAX = 1;
1.40 daniel 6885: NEXT;
1.29 daniel 6886: } else {
1.230 veillard 6887: ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
1.55 daniel 6888: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.122 daniel 6889: ctxt->sax->error(ctxt->userData,
6890: "parsing XML declaration: '?>' expected\n");
1.59 daniel 6891: ctxt->wellFormed = 0;
1.180 daniel 6892: ctxt->disableSAX = 1;
1.40 daniel 6893: MOVETO_ENDTAG(CUR_PTR);
6894: NEXT;
1.29 daniel 6895: }
1.1 veillard 6896: }
6897:
1.50 daniel 6898: /**
6899: * xmlParseMisc:
6900: * @ctxt: an XML parser context
6901: *
6902: * parse an XML Misc* optionnal field.
1.21 daniel 6903: *
1.22 daniel 6904: * [27] Misc ::= Comment | PI | S
1.1 veillard 6905: */
6906:
1.55 daniel 6907: void
6908: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.152 daniel 6909: while (((RAW == '<') && (NXT(1) == '?')) ||
6910: ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 6911: (NXT(2) == '-') && (NXT(3) == '-')) ||
6912: IS_BLANK(CUR)) {
1.152 daniel 6913: if ((RAW == '<') && (NXT(1) == '?')) {
1.16 daniel 6914: xmlParsePI(ctxt);
1.40 daniel 6915: } else if (IS_BLANK(CUR)) {
6916: NEXT;
1.1 veillard 6917: } else
1.114 daniel 6918: xmlParseComment(ctxt);
1.1 veillard 6919: }
6920: }
6921:
1.50 daniel 6922: /**
1.181 daniel 6923: * xmlParseDocument:
1.50 daniel 6924: * @ctxt: an XML parser context
6925: *
6926: * parse an XML document (and build a tree if using the standard SAX
6927: * interface).
1.21 daniel 6928: *
1.22 daniel 6929: * [1] document ::= prolog element Misc*
1.29 daniel 6930: *
6931: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 6932: *
1.68 daniel 6933: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 6934: * as a result of the parsing.
1.1 veillard 6935: */
6936:
1.55 daniel 6937: int
6938: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.156 daniel 6939: xmlChar start[4];
6940: xmlCharEncoding enc;
6941:
1.235 veillard 6942: xmlInitParser();
1.45 daniel 6943:
1.91 daniel 6944: GROW;
6945:
1.14 veillard 6946: /*
1.44 daniel 6947: * SAX: beginning of the document processing.
6948: */
1.72 daniel 6949: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 6950: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 6951:
1.156 daniel 6952: /*
6953: * Get the 4 first bytes and decode the charset
6954: * if enc != XML_CHAR_ENCODING_NONE
6955: * plug some encoding conversion routines.
6956: */
6957: start[0] = RAW;
6958: start[1] = NXT(1);
6959: start[2] = NXT(2);
6960: start[3] = NXT(3);
6961: enc = xmlDetectCharEncoding(start, 4);
6962: if (enc != XML_CHAR_ENCODING_NONE) {
6963: xmlSwitchEncoding(ctxt, enc);
6964: }
6965:
1.1 veillard 6966:
1.59 daniel 6967: if (CUR == 0) {
1.230 veillard 6968: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 6969: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6970: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 6971: ctxt->wellFormed = 0;
1.180 daniel 6972: ctxt->disableSAX = 1;
1.59 daniel 6973: }
1.1 veillard 6974:
6975: /*
6976: * Check for the XMLDecl in the Prolog.
6977: */
1.91 daniel 6978: GROW;
1.152 daniel 6979: if ((RAW == '<') && (NXT(1) == '?') &&
1.40 daniel 6980: (NXT(2) == 'x') && (NXT(3) == 'm') &&
1.142 daniel 6981: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
1.196 daniel 6982:
6983: /*
6984: * Note that we will switch encoding on the fly.
6985: */
1.19 daniel 6986: xmlParseXMLDecl(ctxt);
1.193 daniel 6987: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6988: /*
6989: * The XML REC instructs us to stop parsing right here
6990: */
6991: return(-1);
6992: }
1.167 daniel 6993: ctxt->standalone = ctxt->input->standalone;
1.42 daniel 6994: SKIP_BLANKS;
1.1 veillard 6995: } else {
1.72 daniel 6996: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 6997: }
1.171 daniel 6998: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
1.74 daniel 6999: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 7000:
7001: /*
7002: * The Misc part of the Prolog
7003: */
1.91 daniel 7004: GROW;
1.16 daniel 7005: xmlParseMisc(ctxt);
1.1 veillard 7006:
7007: /*
1.29 daniel 7008: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 7009: * (doctypedecl Misc*)?
7010: */
1.91 daniel 7011: GROW;
1.152 daniel 7012: if ((RAW == '<') && (NXT(1) == '!') &&
1.40 daniel 7013: (NXT(2) == 'D') && (NXT(3) == 'O') &&
7014: (NXT(4) == 'C') && (NXT(5) == 'T') &&
7015: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7016: (NXT(8) == 'E')) {
1.165 daniel 7017:
1.166 daniel 7018: ctxt->inSubset = 1;
1.22 daniel 7019: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7020: if (RAW == '[') {
1.140 daniel 7021: ctxt->instate = XML_PARSER_DTD;
7022: xmlParseInternalSubset(ctxt);
7023: }
1.165 daniel 7024:
7025: /*
7026: * Create and update the external subset.
7027: */
1.166 daniel 7028: ctxt->inSubset = 2;
1.171 daniel 7029: if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7030: (!ctxt->disableSAX))
1.165 daniel 7031: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7032: ctxt->extSubSystem, ctxt->extSubURI);
1.166 daniel 7033: ctxt->inSubset = 0;
1.165 daniel 7034:
7035:
1.96 daniel 7036: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 7037: xmlParseMisc(ctxt);
1.21 daniel 7038: }
7039:
7040: /*
7041: * Time to start parsing the tree itself
1.1 veillard 7042: */
1.91 daniel 7043: GROW;
1.152 daniel 7044: if (RAW != '<') {
1.230 veillard 7045: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.59 daniel 7046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 7047: ctxt->sax->error(ctxt->userData,
1.151 daniel 7048: "Start tag expected, '<' not found\n");
1.59 daniel 7049: ctxt->wellFormed = 0;
1.180 daniel 7050: ctxt->disableSAX = 1;
1.140 daniel 7051: ctxt->instate = XML_PARSER_EOF;
7052: } else {
7053: ctxt->instate = XML_PARSER_CONTENT;
7054: xmlParseElement(ctxt);
7055: ctxt->instate = XML_PARSER_EPILOG;
7056:
7057:
7058: /*
7059: * The Misc part at the end
7060: */
7061: xmlParseMisc(ctxt);
7062:
1.152 daniel 7063: if (RAW != 0) {
1.230 veillard 7064: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7065: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7066: ctxt->sax->error(ctxt->userData,
7067: "Extra content at the end of the document\n");
7068: ctxt->wellFormed = 0;
1.180 daniel 7069: ctxt->disableSAX = 1;
1.140 daniel 7070: }
7071: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 7072: }
7073:
1.44 daniel 7074: /*
7075: * SAX: end of the document processing.
7076: */
1.171 daniel 7077: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7078: (!ctxt->disableSAX))
1.74 daniel 7079: ctxt->sax->endDocument(ctxt->userData);
1.151 daniel 7080:
1.59 daniel 7081: if (! ctxt->wellFormed) return(-1);
1.16 daniel 7082: return(0);
7083: }
7084:
1.229 veillard 7085: /**
7086: * xmlParseExtParsedEnt:
7087: * @ctxt: an XML parser context
7088: *
7089: * parse a genreral parsed entity
7090: * An external general parsed entity is well-formed if it matches the
7091: * production labeled extParsedEnt.
7092: *
7093: * [78] extParsedEnt ::= TextDecl? content
7094: *
7095: * Returns 0, -1 in case of error. the parser context is augmented
7096: * as a result of the parsing.
7097: */
7098:
7099: int
7100: xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7101: xmlChar start[4];
7102: xmlCharEncoding enc;
7103:
7104: xmlDefaultSAXHandlerInit();
7105:
7106: GROW;
7107:
7108: /*
7109: * SAX: beginning of the document processing.
7110: */
7111: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7112: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7113:
7114: /*
7115: * Get the 4 first bytes and decode the charset
7116: * if enc != XML_CHAR_ENCODING_NONE
7117: * plug some encoding conversion routines.
7118: */
7119: start[0] = RAW;
7120: start[1] = NXT(1);
7121: start[2] = NXT(2);
7122: start[3] = NXT(3);
7123: enc = xmlDetectCharEncoding(start, 4);
7124: if (enc != XML_CHAR_ENCODING_NONE) {
7125: xmlSwitchEncoding(ctxt, enc);
7126: }
7127:
7128:
7129: if (CUR == 0) {
1.230 veillard 7130: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.229 veillard 7131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7132: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7133: ctxt->wellFormed = 0;
7134: ctxt->disableSAX = 1;
7135: }
7136:
7137: /*
7138: * Check for the XMLDecl in the Prolog.
7139: */
7140: GROW;
7141: if ((RAW == '<') && (NXT(1) == '?') &&
7142: (NXT(2) == 'x') && (NXT(3) == 'm') &&
7143: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7144:
7145: /*
7146: * Note that we will switch encoding on the fly.
7147: */
7148: xmlParseXMLDecl(ctxt);
7149: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7150: /*
7151: * The XML REC instructs us to stop parsing right here
7152: */
7153: return(-1);
7154: }
7155: SKIP_BLANKS;
7156: } else {
7157: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7158: }
7159: if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7160: ctxt->sax->startDocument(ctxt->userData);
7161:
7162: /*
7163: * Doing validity checking on chunk doesn't make sense
7164: */
7165: ctxt->instate = XML_PARSER_CONTENT;
7166: ctxt->validate = 0;
7167: ctxt->depth = 0;
7168:
7169: xmlParseContent(ctxt);
7170:
7171: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 7172: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.229 veillard 7173: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7174: ctxt->sax->error(ctxt->userData,
7175: "chunk is not well balanced\n");
7176: ctxt->wellFormed = 0;
7177: ctxt->disableSAX = 1;
7178: } else if (RAW != 0) {
1.230 veillard 7179: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.229 veillard 7180: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7181: ctxt->sax->error(ctxt->userData,
7182: "extra content at the end of well balanced chunk\n");
7183: ctxt->wellFormed = 0;
7184: ctxt->disableSAX = 1;
7185: }
7186:
7187: /*
7188: * SAX: end of the document processing.
7189: */
7190: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7191: (!ctxt->disableSAX))
7192: ctxt->sax->endDocument(ctxt->userData);
7193:
7194: if (! ctxt->wellFormed) return(-1);
7195: return(0);
7196: }
7197:
1.98 daniel 7198: /************************************************************************
7199: * *
1.128 daniel 7200: * Progressive parsing interfaces *
7201: * *
7202: ************************************************************************/
7203:
7204: /**
7205: * xmlParseLookupSequence:
7206: * @ctxt: an XML parser context
7207: * @first: the first char to lookup
1.140 daniel 7208: * @next: the next char to lookup or zero
7209: * @third: the next char to lookup or zero
1.128 daniel 7210: *
1.140 daniel 7211: * Try to find if a sequence (first, next, third) or just (first next) or
7212: * (first) is available in the input stream.
7213: * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7214: * to avoid rescanning sequences of bytes, it DOES change the state of the
7215: * parser, do not use liberally.
1.128 daniel 7216: *
1.140 daniel 7217: * Returns the index to the current parsing point if the full sequence
7218: * is available, -1 otherwise.
1.128 daniel 7219: */
7220: int
1.140 daniel 7221: xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7222: xmlChar next, xmlChar third) {
7223: int base, len;
7224: xmlParserInputPtr in;
7225: const xmlChar *buf;
7226:
7227: in = ctxt->input;
7228: if (in == NULL) return(-1);
7229: base = in->cur - in->base;
7230: if (base < 0) return(-1);
7231: if (ctxt->checkIndex > base)
7232: base = ctxt->checkIndex;
7233: if (in->buf == NULL) {
7234: buf = in->base;
7235: len = in->length;
7236: } else {
7237: buf = in->buf->buffer->content;
7238: len = in->buf->buffer->use;
7239: }
7240: /* take into account the sequence length */
7241: if (third) len -= 2;
7242: else if (next) len --;
7243: for (;base < len;base++) {
7244: if (buf[base] == first) {
7245: if (third != 0) {
7246: if ((buf[base + 1] != next) ||
7247: (buf[base + 2] != third)) continue;
7248: } else if (next != 0) {
7249: if (buf[base + 1] != next) continue;
7250: }
7251: ctxt->checkIndex = 0;
7252: #ifdef DEBUG_PUSH
7253: if (next == 0)
7254: fprintf(stderr, "PP: lookup '%c' found at %d\n",
7255: first, base);
7256: else if (third == 0)
7257: fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
7258: first, next, base);
7259: else
7260: fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
7261: first, next, third, base);
7262: #endif
7263: return(base - (in->cur - in->base));
7264: }
7265: }
7266: ctxt->checkIndex = base;
7267: #ifdef DEBUG_PUSH
7268: if (next == 0)
7269: fprintf(stderr, "PP: lookup '%c' failed\n", first);
7270: else if (third == 0)
7271: fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
7272: else
7273: fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
7274: #endif
7275: return(-1);
1.128 daniel 7276: }
7277:
7278: /**
1.143 daniel 7279: * xmlParseTryOrFinish:
1.128 daniel 7280: * @ctxt: an XML parser context
1.143 daniel 7281: * @terminate: last chunk indicator
1.128 daniel 7282: *
7283: * Try to progress on parsing
7284: *
7285: * Returns zero if no parsing was possible
7286: */
7287: int
1.143 daniel 7288: xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
1.128 daniel 7289: int ret = 0;
1.140 daniel 7290: int avail;
7291: xmlChar cur, next;
7292:
7293: #ifdef DEBUG_PUSH
7294: switch (ctxt->instate) {
7295: case XML_PARSER_EOF:
7296: fprintf(stderr, "PP: try EOF\n"); break;
7297: case XML_PARSER_START:
7298: fprintf(stderr, "PP: try START\n"); break;
7299: case XML_PARSER_MISC:
7300: fprintf(stderr, "PP: try MISC\n");break;
7301: case XML_PARSER_COMMENT:
7302: fprintf(stderr, "PP: try COMMENT\n");break;
7303: case XML_PARSER_PROLOG:
7304: fprintf(stderr, "PP: try PROLOG\n");break;
7305: case XML_PARSER_START_TAG:
7306: fprintf(stderr, "PP: try START_TAG\n");break;
7307: case XML_PARSER_CONTENT:
7308: fprintf(stderr, "PP: try CONTENT\n");break;
7309: case XML_PARSER_CDATA_SECTION:
7310: fprintf(stderr, "PP: try CDATA_SECTION\n");break;
7311: case XML_PARSER_END_TAG:
7312: fprintf(stderr, "PP: try END_TAG\n");break;
7313: case XML_PARSER_ENTITY_DECL:
7314: fprintf(stderr, "PP: try ENTITY_DECL\n");break;
7315: case XML_PARSER_ENTITY_VALUE:
7316: fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
7317: case XML_PARSER_ATTRIBUTE_VALUE:
7318: fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
7319: case XML_PARSER_DTD:
7320: fprintf(stderr, "PP: try DTD\n");break;
7321: case XML_PARSER_EPILOG:
7322: fprintf(stderr, "PP: try EPILOG\n");break;
7323: case XML_PARSER_PI:
7324: fprintf(stderr, "PP: try PI\n");break;
7325: }
7326: #endif
1.128 daniel 7327:
7328: while (1) {
1.140 daniel 7329: /*
7330: * Pop-up of finished entities.
7331: */
1.152 daniel 7332: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7333: xmlPopInput(ctxt);
7334:
1.184 daniel 7335: if (ctxt->input ==NULL) break;
7336: if (ctxt->input->buf == NULL)
7337: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7338: else
1.184 daniel 7339: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7340: if (avail < 1)
7341: goto done;
1.128 daniel 7342: switch (ctxt->instate) {
7343: case XML_PARSER_EOF:
1.140 daniel 7344: /*
7345: * Document parsing is done !
7346: */
7347: goto done;
7348: case XML_PARSER_START:
7349: /*
7350: * Very first chars read from the document flow.
7351: */
1.184 daniel 7352: cur = ctxt->input->cur[0];
1.140 daniel 7353: if (IS_BLANK(cur)) {
7354: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7355: ctxt->sax->setDocumentLocator(ctxt->userData,
7356: &xmlDefaultSAXLocator);
1.230 veillard 7357: ctxt->errNo = XML_ERR_DOCUMENT_START;
1.140 daniel 7358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7359: ctxt->sax->error(ctxt->userData,
7360: "Extra spaces at the beginning of the document are not allowed\n");
7361: ctxt->wellFormed = 0;
1.180 daniel 7362: ctxt->disableSAX = 1;
1.140 daniel 7363: SKIP_BLANKS;
7364: ret++;
1.184 daniel 7365: if (ctxt->input->buf == NULL)
7366: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7367: else
1.184 daniel 7368: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7369: }
7370: if (avail < 2)
7371: goto done;
7372:
1.184 daniel 7373: cur = ctxt->input->cur[0];
7374: next = ctxt->input->cur[1];
1.140 daniel 7375: if (cur == 0) {
7376: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7377: ctxt->sax->setDocumentLocator(ctxt->userData,
7378: &xmlDefaultSAXLocator);
1.230 veillard 7379: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7380: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7381: ctxt->sax->error(ctxt->userData, "Document is empty\n");
7382: ctxt->wellFormed = 0;
1.180 daniel 7383: ctxt->disableSAX = 1;
1.140 daniel 7384: ctxt->instate = XML_PARSER_EOF;
7385: #ifdef DEBUG_PUSH
7386: fprintf(stderr, "PP: entering EOF\n");
7387: #endif
7388: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7389: ctxt->sax->endDocument(ctxt->userData);
7390: goto done;
7391: }
7392: if ((cur == '<') && (next == '?')) {
7393: /* PI or XML decl */
7394: if (avail < 5) return(ret);
1.143 daniel 7395: if ((!terminate) &&
7396: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7397: return(ret);
7398: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7399: ctxt->sax->setDocumentLocator(ctxt->userData,
7400: &xmlDefaultSAXLocator);
1.184 daniel 7401: if ((ctxt->input->cur[2] == 'x') &&
7402: (ctxt->input->cur[3] == 'm') &&
7403: (ctxt->input->cur[4] == 'l') &&
7404: (IS_BLANK(ctxt->input->cur[5]))) {
1.140 daniel 7405: ret += 5;
7406: #ifdef DEBUG_PUSH
7407: fprintf(stderr, "PP: Parsing XML Decl\n");
7408: #endif
7409: xmlParseXMLDecl(ctxt);
1.193 daniel 7410: if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7411: /*
7412: * The XML REC instructs us to stop parsing right
7413: * here
7414: */
7415: ctxt->instate = XML_PARSER_EOF;
7416: return(0);
7417: }
1.167 daniel 7418: ctxt->standalone = ctxt->input->standalone;
1.164 daniel 7419: if ((ctxt->encoding == NULL) &&
7420: (ctxt->input->encoding != NULL))
7421: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1.171 daniel 7422: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7423: (!ctxt->disableSAX))
1.140 daniel 7424: ctxt->sax->startDocument(ctxt->userData);
7425: ctxt->instate = XML_PARSER_MISC;
7426: #ifdef DEBUG_PUSH
7427: fprintf(stderr, "PP: entering MISC\n");
7428: #endif
7429: } else {
7430: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7431: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7432: (!ctxt->disableSAX))
1.140 daniel 7433: ctxt->sax->startDocument(ctxt->userData);
7434: ctxt->instate = XML_PARSER_MISC;
7435: #ifdef DEBUG_PUSH
7436: fprintf(stderr, "PP: entering MISC\n");
7437: #endif
7438: }
7439: } else {
7440: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7441: ctxt->sax->setDocumentLocator(ctxt->userData,
7442: &xmlDefaultSAXLocator);
7443: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.171 daniel 7444: if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7445: (!ctxt->disableSAX))
1.140 daniel 7446: ctxt->sax->startDocument(ctxt->userData);
7447: ctxt->instate = XML_PARSER_MISC;
7448: #ifdef DEBUG_PUSH
7449: fprintf(stderr, "PP: entering MISC\n");
7450: #endif
7451: }
7452: break;
7453: case XML_PARSER_MISC:
7454: SKIP_BLANKS;
1.184 daniel 7455: if (ctxt->input->buf == NULL)
7456: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7457: else
1.184 daniel 7458: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7459: if (avail < 2)
7460: goto done;
1.184 daniel 7461: cur = ctxt->input->cur[0];
7462: next = ctxt->input->cur[1];
1.140 daniel 7463: if ((cur == '<') && (next == '?')) {
1.143 daniel 7464: if ((!terminate) &&
7465: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7466: goto done;
7467: #ifdef DEBUG_PUSH
7468: fprintf(stderr, "PP: Parsing PI\n");
7469: #endif
7470: xmlParsePI(ctxt);
7471: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7472: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7473: if ((!terminate) &&
7474: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7475: goto done;
7476: #ifdef DEBUG_PUSH
7477: fprintf(stderr, "PP: Parsing Comment\n");
7478: #endif
7479: xmlParseComment(ctxt);
7480: ctxt->instate = XML_PARSER_MISC;
7481: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7482: (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7483: (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7484: (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7485: (ctxt->input->cur[8] == 'E')) {
1.143 daniel 7486: if ((!terminate) &&
7487: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7488: goto done;
7489: #ifdef DEBUG_PUSH
7490: fprintf(stderr, "PP: Parsing internal subset\n");
7491: #endif
1.166 daniel 7492: ctxt->inSubset = 1;
1.140 daniel 7493: xmlParseDocTypeDecl(ctxt);
1.152 daniel 7494: if (RAW == '[') {
1.140 daniel 7495: ctxt->instate = XML_PARSER_DTD;
7496: #ifdef DEBUG_PUSH
7497: fprintf(stderr, "PP: entering DTD\n");
7498: #endif
7499: } else {
1.166 daniel 7500: /*
7501: * Create and update the external subset.
7502: */
7503: ctxt->inSubset = 2;
1.171 daniel 7504: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7505: (ctxt->sax->externalSubset != NULL))
7506: ctxt->sax->externalSubset(ctxt->userData,
7507: ctxt->intSubName, ctxt->extSubSystem,
7508: ctxt->extSubURI);
7509: ctxt->inSubset = 0;
1.140 daniel 7510: ctxt->instate = XML_PARSER_PROLOG;
7511: #ifdef DEBUG_PUSH
7512: fprintf(stderr, "PP: entering PROLOG\n");
7513: #endif
7514: }
7515: } else if ((cur == '<') && (next == '!') &&
7516: (avail < 9)) {
7517: goto done;
7518: } else {
7519: ctxt->instate = XML_PARSER_START_TAG;
7520: #ifdef DEBUG_PUSH
7521: fprintf(stderr, "PP: entering START_TAG\n");
7522: #endif
7523: }
7524: break;
1.128 daniel 7525: case XML_PARSER_PROLOG:
1.140 daniel 7526: SKIP_BLANKS;
1.184 daniel 7527: if (ctxt->input->buf == NULL)
7528: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7529: else
1.184 daniel 7530: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7531: if (avail < 2)
7532: goto done;
1.184 daniel 7533: cur = ctxt->input->cur[0];
7534: next = ctxt->input->cur[1];
1.140 daniel 7535: if ((cur == '<') && (next == '?')) {
1.143 daniel 7536: if ((!terminate) &&
7537: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7538: goto done;
7539: #ifdef DEBUG_PUSH
7540: fprintf(stderr, "PP: Parsing PI\n");
7541: #endif
7542: xmlParsePI(ctxt);
7543: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7544: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7545: if ((!terminate) &&
7546: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7547: goto done;
7548: #ifdef DEBUG_PUSH
7549: fprintf(stderr, "PP: Parsing Comment\n");
7550: #endif
7551: xmlParseComment(ctxt);
7552: ctxt->instate = XML_PARSER_PROLOG;
7553: } else if ((cur == '<') && (next == '!') &&
7554: (avail < 4)) {
7555: goto done;
7556: } else {
7557: ctxt->instate = XML_PARSER_START_TAG;
7558: #ifdef DEBUG_PUSH
7559: fprintf(stderr, "PP: entering START_TAG\n");
7560: #endif
7561: }
7562: break;
7563: case XML_PARSER_EPILOG:
7564: SKIP_BLANKS;
1.184 daniel 7565: if (ctxt->input->buf == NULL)
7566: avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7567: else
1.184 daniel 7568: avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
1.140 daniel 7569: if (avail < 2)
7570: goto done;
1.184 daniel 7571: cur = ctxt->input->cur[0];
7572: next = ctxt->input->cur[1];
1.140 daniel 7573: if ((cur == '<') && (next == '?')) {
1.143 daniel 7574: if ((!terminate) &&
7575: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7576: goto done;
7577: #ifdef DEBUG_PUSH
7578: fprintf(stderr, "PP: Parsing PI\n");
7579: #endif
7580: xmlParsePI(ctxt);
7581: ctxt->instate = XML_PARSER_EPILOG;
7582: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7583: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7584: if ((!terminate) &&
7585: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7586: goto done;
7587: #ifdef DEBUG_PUSH
7588: fprintf(stderr, "PP: Parsing Comment\n");
7589: #endif
7590: xmlParseComment(ctxt);
7591: ctxt->instate = XML_PARSER_EPILOG;
7592: } else if ((cur == '<') && (next == '!') &&
7593: (avail < 4)) {
7594: goto done;
7595: } else {
1.230 veillard 7596: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 7597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7598: ctxt->sax->error(ctxt->userData,
7599: "Extra content at the end of the document\n");
7600: ctxt->wellFormed = 0;
1.180 daniel 7601: ctxt->disableSAX = 1;
1.140 daniel 7602: ctxt->instate = XML_PARSER_EOF;
7603: #ifdef DEBUG_PUSH
7604: fprintf(stderr, "PP: entering EOF\n");
7605: #endif
1.171 daniel 7606: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7607: (!ctxt->disableSAX))
1.140 daniel 7608: ctxt->sax->endDocument(ctxt->userData);
7609: goto done;
7610: }
7611: break;
7612: case XML_PARSER_START_TAG: {
7613: xmlChar *name, *oldname;
7614:
1.184 daniel 7615: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7616: goto done;
1.184 daniel 7617: cur = ctxt->input->cur[0];
1.140 daniel 7618: if (cur != '<') {
1.230 veillard 7619: ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
1.140 daniel 7620: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7621: ctxt->sax->error(ctxt->userData,
7622: "Start tag expect, '<' not found\n");
7623: ctxt->wellFormed = 0;
1.180 daniel 7624: ctxt->disableSAX = 1;
1.140 daniel 7625: ctxt->instate = XML_PARSER_EOF;
7626: #ifdef DEBUG_PUSH
7627: fprintf(stderr, "PP: entering EOF\n");
7628: #endif
1.171 daniel 7629: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7630: (!ctxt->disableSAX))
1.140 daniel 7631: ctxt->sax->endDocument(ctxt->userData);
7632: goto done;
7633: }
1.143 daniel 7634: if ((!terminate) &&
7635: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7636: goto done;
1.176 daniel 7637: if (ctxt->spaceNr == 0)
7638: spacePush(ctxt, -1);
7639: else
7640: spacePush(ctxt, *ctxt->space);
1.140 daniel 7641: name = xmlParseStartTag(ctxt);
7642: if (name == NULL) {
1.176 daniel 7643: spacePop(ctxt);
1.140 daniel 7644: ctxt->instate = XML_PARSER_EOF;
7645: #ifdef DEBUG_PUSH
7646: fprintf(stderr, "PP: entering EOF\n");
7647: #endif
1.171 daniel 7648: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7649: (!ctxt->disableSAX))
1.140 daniel 7650: ctxt->sax->endDocument(ctxt->userData);
7651: goto done;
7652: }
7653: namePush(ctxt, xmlStrdup(name));
7654:
7655: /*
7656: * [ VC: Root Element Type ]
7657: * The Name in the document type declaration must match
7658: * the element type of the root element.
7659: */
7660: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
1.157 daniel 7661: ctxt->node && (ctxt->node == ctxt->myDoc->children))
1.140 daniel 7662: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7663:
7664: /*
7665: * Check for an Empty Element.
7666: */
1.152 daniel 7667: if ((RAW == '/') && (NXT(1) == '>')) {
1.140 daniel 7668: SKIP(2);
1.171 daniel 7669: if ((ctxt->sax != NULL) &&
7670: (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
1.140 daniel 7671: ctxt->sax->endElement(ctxt->userData, name);
7672: xmlFree(name);
7673: oldname = namePop(ctxt);
1.176 daniel 7674: spacePop(ctxt);
1.140 daniel 7675: if (oldname != NULL) {
7676: #ifdef DEBUG_STACK
7677: fprintf(stderr,"Close: popped %s\n", oldname);
7678: #endif
7679: xmlFree(oldname);
7680: }
7681: if (ctxt->name == NULL) {
7682: ctxt->instate = XML_PARSER_EPILOG;
7683: #ifdef DEBUG_PUSH
7684: fprintf(stderr, "PP: entering EPILOG\n");
7685: #endif
7686: } else {
7687: ctxt->instate = XML_PARSER_CONTENT;
7688: #ifdef DEBUG_PUSH
7689: fprintf(stderr, "PP: entering CONTENT\n");
7690: #endif
7691: }
7692: break;
7693: }
1.152 daniel 7694: if (RAW == '>') {
1.140 daniel 7695: NEXT;
7696: } else {
1.230 veillard 7697: ctxt->errNo = XML_ERR_GT_REQUIRED;
1.140 daniel 7698: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7699: ctxt->sax->error(ctxt->userData,
7700: "Couldn't find end of Start Tag %s\n",
7701: name);
7702: ctxt->wellFormed = 0;
1.180 daniel 7703: ctxt->disableSAX = 1;
1.140 daniel 7704:
7705: /*
7706: * end of parsing of this node.
7707: */
7708: nodePop(ctxt);
7709: oldname = namePop(ctxt);
1.176 daniel 7710: spacePop(ctxt);
1.140 daniel 7711: if (oldname != NULL) {
7712: #ifdef DEBUG_STACK
7713: fprintf(stderr,"Close: popped %s\n", oldname);
7714: #endif
7715: xmlFree(oldname);
7716: }
7717: }
7718: xmlFree(name);
7719: ctxt->instate = XML_PARSER_CONTENT;
7720: #ifdef DEBUG_PUSH
7721: fprintf(stderr, "PP: entering CONTENT\n");
7722: #endif
7723: break;
7724: }
1.224 veillard 7725: case XML_PARSER_CONTENT: {
7726: const xmlChar *test;
7727: int cons;
7728: xmlChar tok;
7729:
1.140 daniel 7730: /*
7731: * Handle preparsed entities and charRef
7732: */
7733: if (ctxt->token != 0) {
7734: xmlChar cur[2] = { 0 , 0 } ;
7735:
7736: cur[0] = (xmlChar) ctxt->token;
1.171 daniel 7737: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7738: (ctxt->sax->characters != NULL))
1.140 daniel 7739: ctxt->sax->characters(ctxt->userData, cur, 1);
7740: ctxt->token = 0;
7741: }
1.184 daniel 7742: if ((avail < 2) && (ctxt->inputNr == 1))
1.140 daniel 7743: goto done;
1.184 daniel 7744: cur = ctxt->input->cur[0];
7745: next = ctxt->input->cur[1];
1.224 veillard 7746:
7747: test = CUR_PTR;
7748: cons = ctxt->input->consumed;
7749: tok = ctxt->token;
1.140 daniel 7750: if ((cur == '<') && (next == '?')) {
1.143 daniel 7751: if ((!terminate) &&
7752: (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
1.140 daniel 7753: goto done;
7754: #ifdef DEBUG_PUSH
7755: fprintf(stderr, "PP: Parsing PI\n");
7756: #endif
7757: xmlParsePI(ctxt);
7758: } else if ((cur == '<') && (next == '!') &&
1.184 daniel 7759: (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
1.143 daniel 7760: if ((!terminate) &&
7761: (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
1.140 daniel 7762: goto done;
7763: #ifdef DEBUG_PUSH
7764: fprintf(stderr, "PP: Parsing Comment\n");
7765: #endif
7766: xmlParseComment(ctxt);
7767: ctxt->instate = XML_PARSER_CONTENT;
1.184 daniel 7768: } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7769: (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7770: (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7771: (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7772: (ctxt->input->cur[8] == '[')) {
1.140 daniel 7773: SKIP(9);
7774: ctxt->instate = XML_PARSER_CDATA_SECTION;
7775: #ifdef DEBUG_PUSH
7776: fprintf(stderr, "PP: entering CDATA_SECTION\n");
7777: #endif
7778: break;
7779: } else if ((cur == '<') && (next == '!') &&
7780: (avail < 9)) {
7781: goto done;
7782: } else if ((cur == '<') && (next == '/')) {
7783: ctxt->instate = XML_PARSER_END_TAG;
7784: #ifdef DEBUG_PUSH
7785: fprintf(stderr, "PP: entering END_TAG\n");
7786: #endif
7787: break;
7788: } else if (cur == '<') {
7789: ctxt->instate = XML_PARSER_START_TAG;
7790: #ifdef DEBUG_PUSH
7791: fprintf(stderr, "PP: entering START_TAG\n");
7792: #endif
7793: break;
7794: } else if (cur == '&') {
1.143 daniel 7795: if ((!terminate) &&
7796: (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
1.140 daniel 7797: goto done;
7798: #ifdef DEBUG_PUSH
7799: fprintf(stderr, "PP: Parsing Reference\n");
7800: #endif
7801: xmlParseReference(ctxt);
7802: } else {
1.156 daniel 7803: /* TODO Avoid the extra copy, handle directly !!! */
1.140 daniel 7804: /*
1.181 daniel 7805: * Goal of the following test is:
1.140 daniel 7806: * - minimize calls to the SAX 'character' callback
7807: * when they are mergeable
7808: * - handle an problem for isBlank when we only parse
7809: * a sequence of blank chars and the next one is
7810: * not available to check against '<' presence.
7811: * - tries to homogenize the differences in SAX
7812: * callbacks beween the push and pull versions
7813: * of the parser.
7814: */
7815: if ((ctxt->inputNr == 1) &&
7816: (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
1.143 daniel 7817: if ((!terminate) &&
7818: (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
1.140 daniel 7819: goto done;
7820: }
7821: ctxt->checkIndex = 0;
7822: #ifdef DEBUG_PUSH
7823: fprintf(stderr, "PP: Parsing char data\n");
7824: #endif
7825: xmlParseCharData(ctxt, 0);
7826: }
7827: /*
7828: * Pop-up of finished entities.
7829: */
1.152 daniel 7830: while ((RAW == 0) && (ctxt->inputNr > 1))
1.140 daniel 7831: xmlPopInput(ctxt);
1.224 veillard 7832: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7833: (tok == ctxt->token)) {
1.230 veillard 7834: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
1.224 veillard 7835: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7836: ctxt->sax->error(ctxt->userData,
7837: "detected an error in element content\n");
7838: ctxt->wellFormed = 0;
7839: ctxt->disableSAX = 1;
7840: ctxt->instate = XML_PARSER_EOF;
7841: break;
7842: }
1.140 daniel 7843: break;
1.224 veillard 7844: }
1.140 daniel 7845: case XML_PARSER_CDATA_SECTION: {
7846: /*
7847: * The Push mode need to have the SAX callback for
7848: * cdataBlock merge back contiguous callbacks.
7849: */
7850: int base;
7851:
7852: base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7853: if (base < 0) {
7854: if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
1.171 daniel 7855: if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
1.140 daniel 7856: if (ctxt->sax->cdataBlock != NULL)
1.184 daniel 7857: ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
1.140 daniel 7858: XML_PARSER_BIG_BUFFER_SIZE);
7859: }
7860: SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7861: ctxt->checkIndex = 0;
7862: }
7863: goto done;
7864: } else {
1.171 daniel 7865: if ((ctxt->sax != NULL) && (base > 0) &&
7866: (!ctxt->disableSAX)) {
1.140 daniel 7867: if (ctxt->sax->cdataBlock != NULL)
7868: ctxt->sax->cdataBlock(ctxt->userData,
1.184 daniel 7869: ctxt->input->cur, base);
1.140 daniel 7870: }
7871: SKIP(base + 3);
7872: ctxt->checkIndex = 0;
7873: ctxt->instate = XML_PARSER_CONTENT;
7874: #ifdef DEBUG_PUSH
7875: fprintf(stderr, "PP: entering CONTENT\n");
7876: #endif
7877: }
7878: break;
7879: }
1.141 daniel 7880: case XML_PARSER_END_TAG:
1.140 daniel 7881: if (avail < 2)
7882: goto done;
1.143 daniel 7883: if ((!terminate) &&
7884: (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
1.140 daniel 7885: goto done;
7886: xmlParseEndTag(ctxt);
7887: if (ctxt->name == NULL) {
7888: ctxt->instate = XML_PARSER_EPILOG;
7889: #ifdef DEBUG_PUSH
7890: fprintf(stderr, "PP: entering EPILOG\n");
7891: #endif
7892: } else {
7893: ctxt->instate = XML_PARSER_CONTENT;
7894: #ifdef DEBUG_PUSH
7895: fprintf(stderr, "PP: entering CONTENT\n");
7896: #endif
7897: }
7898: break;
7899: case XML_PARSER_DTD: {
7900: /*
7901: * Sorry but progressive parsing of the internal subset
7902: * is not expected to be supported. We first check that
7903: * the full content of the internal subset is available and
7904: * the parsing is launched only at that point.
7905: * Internal subset ends up with "']' S? '>'" in an unescaped
7906: * section and not in a ']]>' sequence which are conditional
7907: * sections (whoever argued to keep that crap in XML deserve
7908: * a place in hell !).
7909: */
7910: int base, i;
7911: xmlChar *buf;
7912: xmlChar quote = 0;
7913:
1.184 daniel 7914: base = ctxt->input->cur - ctxt->input->base;
1.140 daniel 7915: if (base < 0) return(0);
7916: if (ctxt->checkIndex > base)
7917: base = ctxt->checkIndex;
1.184 daniel 7918: buf = ctxt->input->buf->buffer->content;
1.202 daniel 7919: for (;(unsigned int) base < ctxt->input->buf->buffer->use;
7920: base++) {
1.140 daniel 7921: if (quote != 0) {
7922: if (buf[base] == quote)
7923: quote = 0;
7924: continue;
7925: }
7926: if (buf[base] == '"') {
7927: quote = '"';
7928: continue;
7929: }
7930: if (buf[base] == '\'') {
7931: quote = '\'';
7932: continue;
7933: }
7934: if (buf[base] == ']') {
1.202 daniel 7935: if ((unsigned int) base +1 >=
7936: ctxt->input->buf->buffer->use)
1.140 daniel 7937: break;
7938: if (buf[base + 1] == ']') {
7939: /* conditional crap, skip both ']' ! */
7940: base++;
7941: continue;
7942: }
1.202 daniel 7943: for (i = 0;
7944: (unsigned int) base + i < ctxt->input->buf->buffer->use;
7945: i++) {
1.140 daniel 7946: if (buf[base + i] == '>')
7947: goto found_end_int_subset;
7948: }
7949: break;
7950: }
7951: }
7952: /*
7953: * We didn't found the end of the Internal subset
7954: */
7955: if (quote == 0)
7956: ctxt->checkIndex = base;
7957: #ifdef DEBUG_PUSH
7958: if (next == 0)
7959: fprintf(stderr, "PP: lookup of int subset end filed\n");
7960: #endif
7961: goto done;
7962:
7963: found_end_int_subset:
7964: xmlParseInternalSubset(ctxt);
1.166 daniel 7965: ctxt->inSubset = 2;
1.171 daniel 7966: if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
1.166 daniel 7967: (ctxt->sax->externalSubset != NULL))
7968: ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7969: ctxt->extSubSystem, ctxt->extSubURI);
7970: ctxt->inSubset = 0;
1.140 daniel 7971: ctxt->instate = XML_PARSER_PROLOG;
7972: ctxt->checkIndex = 0;
7973: #ifdef DEBUG_PUSH
7974: fprintf(stderr, "PP: entering PROLOG\n");
7975: #endif
7976: break;
7977: }
7978: case XML_PARSER_COMMENT:
7979: fprintf(stderr, "PP: internal error, state == COMMENT\n");
7980: ctxt->instate = XML_PARSER_CONTENT;
7981: #ifdef DEBUG_PUSH
7982: fprintf(stderr, "PP: entering CONTENT\n");
7983: #endif
7984: break;
7985: case XML_PARSER_PI:
7986: fprintf(stderr, "PP: internal error, state == PI\n");
7987: ctxt->instate = XML_PARSER_CONTENT;
7988: #ifdef DEBUG_PUSH
7989: fprintf(stderr, "PP: entering CONTENT\n");
7990: #endif
7991: break;
1.128 daniel 7992: case XML_PARSER_ENTITY_DECL:
1.140 daniel 7993: fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
7994: ctxt->instate = XML_PARSER_DTD;
7995: #ifdef DEBUG_PUSH
7996: fprintf(stderr, "PP: entering DTD\n");
7997: #endif
7998: break;
1.128 daniel 7999: case XML_PARSER_ENTITY_VALUE:
1.140 daniel 8000: fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
8001: ctxt->instate = XML_PARSER_CONTENT;
8002: #ifdef DEBUG_PUSH
8003: fprintf(stderr, "PP: entering DTD\n");
8004: #endif
8005: break;
1.128 daniel 8006: case XML_PARSER_ATTRIBUTE_VALUE:
1.140 daniel 8007: fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
1.168 daniel 8008: ctxt->instate = XML_PARSER_START_TAG;
8009: #ifdef DEBUG_PUSH
8010: fprintf(stderr, "PP: entering START_TAG\n");
8011: #endif
8012: break;
8013: case XML_PARSER_SYSTEM_LITERAL:
8014: fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
1.140 daniel 8015: ctxt->instate = XML_PARSER_START_TAG;
8016: #ifdef DEBUG_PUSH
8017: fprintf(stderr, "PP: entering START_TAG\n");
8018: #endif
8019: break;
1.128 daniel 8020: }
8021: }
1.140 daniel 8022: done:
8023: #ifdef DEBUG_PUSH
8024: fprintf(stderr, "PP: done %d\n", ret);
8025: #endif
1.128 daniel 8026: return(ret);
8027: }
8028:
8029: /**
1.143 daniel 8030: * xmlParseTry:
8031: * @ctxt: an XML parser context
8032: *
8033: * Try to progress on parsing
8034: *
8035: * Returns zero if no parsing was possible
8036: */
8037: int
8038: xmlParseTry(xmlParserCtxtPtr ctxt) {
8039: return(xmlParseTryOrFinish(ctxt, 0));
8040: }
8041:
8042: /**
1.128 daniel 8043: * xmlParseChunk:
8044: * @ctxt: an XML parser context
8045: * @chunk: an char array
8046: * @size: the size in byte of the chunk
8047: * @terminate: last chunk indicator
8048: *
8049: * Parse a Chunk of memory
8050: *
8051: * Returns zero if no error, the xmlParserErrors otherwise.
8052: */
1.140 daniel 8053: int
1.128 daniel 8054: xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8055: int terminate) {
1.132 daniel 8056: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
1.140 daniel 8057: (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8058: int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8059: int cur = ctxt->input->cur - ctxt->input->base;
8060:
1.132 daniel 8061: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
1.140 daniel 8062: ctxt->input->base = ctxt->input->buf->buffer->content + base;
8063: ctxt->input->cur = ctxt->input->base + cur;
8064: #ifdef DEBUG_PUSH
8065: fprintf(stderr, "PP: pushed %d\n", size);
8066: #endif
8067:
1.150 daniel 8068: if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8069: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8070: } else if (ctxt->instate != XML_PARSER_EOF)
1.143 daniel 8071: xmlParseTryOrFinish(ctxt, terminate);
1.140 daniel 8072: if (terminate) {
1.151 daniel 8073: /*
8074: * Check for termination
8075: */
1.140 daniel 8076: if ((ctxt->instate != XML_PARSER_EOF) &&
8077: (ctxt->instate != XML_PARSER_EPILOG)) {
1.230 veillard 8078: ctxt->errNo = XML_ERR_DOCUMENT_END;
1.140 daniel 8079: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8080: ctxt->sax->error(ctxt->userData,
8081: "Extra content at the end of the document\n");
8082: ctxt->wellFormed = 0;
1.180 daniel 8083: ctxt->disableSAX = 1;
1.140 daniel 8084: }
8085: if (ctxt->instate != XML_PARSER_EOF) {
1.171 daniel 8086: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8087: (!ctxt->disableSAX))
1.140 daniel 8088: ctxt->sax->endDocument(ctxt->userData);
8089: }
8090: ctxt->instate = XML_PARSER_EOF;
1.128 daniel 8091: }
8092: return((xmlParserErrors) ctxt->errNo);
8093: }
8094:
8095: /************************************************************************
8096: * *
1.98 daniel 8097: * I/O front end functions to the parser *
8098: * *
8099: ************************************************************************/
1.201 daniel 8100:
8101: /**
1.229 veillard 8102: * xmlStopParser:
1.201 daniel 8103: * @ctxt: an XML parser context
8104: *
8105: * Blocks further parser processing
8106: */
8107: void
8108: xmlStopParser(xmlParserCtxtPtr ctxt) {
8109: ctxt->instate = XML_PARSER_EOF;
8110: if (ctxt->input != NULL)
8111: ctxt->input->cur = BAD_CAST"";
8112: }
1.98 daniel 8113:
1.50 daniel 8114: /**
1.181 daniel 8115: * xmlCreatePushParserCtxt:
1.140 daniel 8116: * @sax: a SAX handler
8117: * @user_data: The user data returned on SAX callbacks
8118: * @chunk: a pointer to an array of chars
8119: * @size: number of chars in the array
8120: * @filename: an optional file name or URI
8121: *
8122: * Create a parser context for using the XML parser in push mode
8123: * To allow content encoding detection, @size should be >= 4
8124: * The value of @filename is used for fetching external entities
8125: * and error/warning reports.
8126: *
8127: * Returns the new parser context or NULL
8128: */
8129: xmlParserCtxtPtr
8130: xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8131: const char *chunk, int size, const char *filename) {
8132: xmlParserCtxtPtr ctxt;
8133: xmlParserInputPtr inputStream;
8134: xmlParserInputBufferPtr buf;
8135: xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8136:
8137: /*
1.156 daniel 8138: * plug some encoding conversion routines
1.140 daniel 8139: */
8140: if ((chunk != NULL) && (size >= 4))
1.156 daniel 8141: enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
1.140 daniel 8142:
8143: buf = xmlAllocParserInputBuffer(enc);
8144: if (buf == NULL) return(NULL);
8145:
8146: ctxt = xmlNewParserCtxt();
8147: if (ctxt == NULL) {
8148: xmlFree(buf);
8149: return(NULL);
8150: }
8151: if (sax != NULL) {
8152: if (ctxt->sax != &xmlDefaultSAXHandler)
8153: xmlFree(ctxt->sax);
8154: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8155: if (ctxt->sax == NULL) {
8156: xmlFree(buf);
8157: xmlFree(ctxt);
8158: return(NULL);
8159: }
8160: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8161: if (user_data != NULL)
8162: ctxt->userData = user_data;
8163: }
8164: if (filename == NULL) {
8165: ctxt->directory = NULL;
8166: } else {
8167: ctxt->directory = xmlParserGetDirectory(filename);
8168: }
8169:
8170: inputStream = xmlNewInputStream(ctxt);
8171: if (inputStream == NULL) {
8172: xmlFreeParserCtxt(ctxt);
8173: return(NULL);
8174: }
8175:
8176: if (filename == NULL)
8177: inputStream->filename = NULL;
8178: else
8179: inputStream->filename = xmlMemStrdup(filename);
8180: inputStream->buf = buf;
8181: inputStream->base = inputStream->buf->buffer->content;
8182: inputStream->cur = inputStream->buf->buffer->content;
1.156 daniel 8183: if (enc != XML_CHAR_ENCODING_NONE) {
8184: xmlSwitchEncoding(ctxt, enc);
8185: }
1.140 daniel 8186:
8187: inputPush(ctxt, inputStream);
8188:
8189: if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8190: (ctxt->input->buf != NULL)) {
8191: xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8192: #ifdef DEBUG_PUSH
8193: fprintf(stderr, "PP: pushed %d\n", size);
8194: #endif
8195: }
1.190 daniel 8196:
8197: return(ctxt);
8198: }
8199:
8200: /**
8201: * xmlCreateIOParserCtxt:
8202: * @sax: a SAX handler
8203: * @user_data: The user data returned on SAX callbacks
8204: * @ioread: an I/O read function
8205: * @ioclose: an I/O close function
8206: * @ioctx: an I/O handler
8207: * @enc: the charset encoding if known
8208: *
8209: * Create a parser context for using the XML parser with an existing
8210: * I/O stream
8211: *
8212: * Returns the new parser context or NULL
8213: */
8214: xmlParserCtxtPtr
8215: xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8216: xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8217: void *ioctx, xmlCharEncoding enc) {
8218: xmlParserCtxtPtr ctxt;
8219: xmlParserInputPtr inputStream;
8220: xmlParserInputBufferPtr buf;
8221:
8222: buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8223: if (buf == NULL) return(NULL);
8224:
8225: ctxt = xmlNewParserCtxt();
8226: if (ctxt == NULL) {
8227: xmlFree(buf);
8228: return(NULL);
8229: }
8230: if (sax != NULL) {
8231: if (ctxt->sax != &xmlDefaultSAXHandler)
8232: xmlFree(ctxt->sax);
8233: ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8234: if (ctxt->sax == NULL) {
8235: xmlFree(buf);
8236: xmlFree(ctxt);
8237: return(NULL);
8238: }
8239: memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8240: if (user_data != NULL)
8241: ctxt->userData = user_data;
8242: }
8243:
1.229 veillard 8244: inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8245: if (inputStream == NULL) {
8246: xmlFreeParserCtxt(ctxt);
8247: return(NULL);
1.74 daniel 8248: }
1.229 veillard 8249: inputPush(ctxt, inputStream);
1.69 daniel 8250:
1.229 veillard 8251: return(ctxt);
1.1 veillard 8252: }
8253:
1.229 veillard 8254: /************************************************************************
8255: * *
8256: * Front ends when parsing a Dtd *
8257: * *
8258: ************************************************************************/
1.76 daniel 8259:
8260: /**
1.181 daniel 8261: * xmlSAXParseDTD:
1.76 daniel 8262: * @sax: the SAX handler block
8263: * @ExternalID: a NAME* containing the External ID of the DTD
8264: * @SystemID: a NAME* containing the URL to the DTD
8265: *
8266: * Load and parse an external subset.
8267: *
8268: * Returns the resulting xmlDtdPtr or NULL in case of error.
8269: */
8270:
8271: xmlDtdPtr
1.123 daniel 8272: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8273: const xmlChar *SystemID) {
1.76 daniel 8274: xmlDtdPtr ret = NULL;
8275: xmlParserCtxtPtr ctxt;
1.83 daniel 8276: xmlParserInputPtr input = NULL;
1.76 daniel 8277: xmlCharEncoding enc;
8278:
8279: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8280:
1.97 daniel 8281: ctxt = xmlNewParserCtxt();
1.76 daniel 8282: if (ctxt == NULL) {
8283: return(NULL);
8284: }
8285:
8286: /*
8287: * Set-up the SAX context
8288: */
8289: if (sax != NULL) {
1.93 veillard 8290: if (ctxt->sax != NULL)
1.119 daniel 8291: xmlFree(ctxt->sax);
1.76 daniel 8292: ctxt->sax = sax;
8293: ctxt->userData = NULL;
8294: }
8295:
8296: /*
8297: * Ask the Entity resolver to load the damn thing
8298: */
8299:
8300: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8301: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8302: if (input == NULL) {
1.86 daniel 8303: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8304: xmlFreeParserCtxt(ctxt);
8305: return(NULL);
8306: }
8307:
8308: /*
1.156 daniel 8309: * plug some encoding conversion routines here.
1.76 daniel 8310: */
8311: xmlPushInput(ctxt, input);
1.156 daniel 8312: enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
1.76 daniel 8313: xmlSwitchEncoding(ctxt, enc);
8314:
1.95 veillard 8315: if (input->filename == NULL)
1.156 daniel 8316: input->filename = (char *) xmlStrdup(SystemID);
1.76 daniel 8317: input->line = 1;
8318: input->col = 1;
8319: input->base = ctxt->input->cur;
8320: input->cur = ctxt->input->cur;
8321: input->free = NULL;
8322:
8323: /*
8324: * let's parse that entity knowing it's an external subset.
8325: */
1.191 daniel 8326: ctxt->inSubset = 2;
8327: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8328: ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8329: ExternalID, SystemID);
1.79 daniel 8330: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 8331:
8332: if (ctxt->myDoc != NULL) {
8333: if (ctxt->wellFormed) {
1.191 daniel 8334: ret = ctxt->myDoc->extSubset;
8335: ctxt->myDoc->extSubset = NULL;
1.76 daniel 8336: } else {
8337: ret = NULL;
8338: }
8339: xmlFreeDoc(ctxt->myDoc);
8340: ctxt->myDoc = NULL;
8341: }
1.86 daniel 8342: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 8343: xmlFreeParserCtxt(ctxt);
8344:
8345: return(ret);
8346: }
8347:
8348: /**
1.181 daniel 8349: * xmlParseDTD:
1.76 daniel 8350: * @ExternalID: a NAME* containing the External ID of the DTD
8351: * @SystemID: a NAME* containing the URL to the DTD
8352: *
8353: * Load and parse an external subset.
8354: *
8355: * Returns the resulting xmlDtdPtr or NULL in case of error.
8356: */
8357:
8358: xmlDtdPtr
1.123 daniel 8359: xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
1.76 daniel 8360: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 8361: }
8362:
1.229 veillard 8363: /************************************************************************
8364: * *
8365: * Front ends when parsing an Entity *
8366: * *
8367: ************************************************************************/
8368:
1.59 daniel 8369: /**
1.181 daniel 8370: * xmlSAXParseBalancedChunk:
1.144 daniel 8371: * @ctx: an XML parser context (possibly NULL)
8372: * @sax: the SAX handler bloc (possibly NULL)
8373: * @user_data: The user data returned on SAX callbacks (possibly NULL)
8374: * @input: a parser input stream
8375: * @enc: the encoding
8376: *
8377: * Parse a well-balanced chunk of an XML document
8378: * The user has to provide SAX callback block whose routines will be
8379: * called by the parser
8380: * The allowed sequence for the Well Balanced Chunk is the one defined by
8381: * the content production in the XML grammar:
8382: *
8383: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8384: *
1.176 daniel 8385: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
1.144 daniel 8386: * the error code otherwise
8387: */
8388:
8389: int
8390: xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8391: void *user_data, xmlParserInputPtr input,
8392: xmlCharEncoding enc) {
8393: xmlParserCtxtPtr ctxt;
8394: int ret;
8395:
8396: if (input == NULL) return(-1);
8397:
8398: if (ctx != NULL)
8399: ctxt = ctx;
8400: else {
8401: ctxt = xmlNewParserCtxt();
8402: if (ctxt == NULL)
8403: return(-1);
8404: if (sax == NULL)
8405: ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8406: }
8407:
8408: /*
8409: * Set-up the SAX context
8410: */
8411: if (sax != NULL) {
8412: if (ctxt->sax != NULL)
8413: xmlFree(ctxt->sax);
8414: ctxt->sax = sax;
8415: ctxt->userData = user_data;
8416: }
8417:
8418: /*
8419: * plug some encoding conversion routines here.
8420: */
8421: xmlPushInput(ctxt, input);
8422: if (enc != XML_CHAR_ENCODING_NONE)
8423: xmlSwitchEncoding(ctxt, enc);
8424:
8425: /*
8426: * let's parse that entity knowing it's an external subset.
8427: */
8428: xmlParseContent(ctxt);
8429: ret = ctxt->errNo;
8430:
8431: if (ctx == NULL) {
8432: if (sax != NULL)
8433: ctxt->sax = NULL;
8434: else
8435: xmlFreeDoc(ctxt->myDoc);
8436: xmlFreeParserCtxt(ctxt);
8437: }
8438: return(ret);
8439: }
8440:
8441: /**
1.213 veillard 8442: * xmlParseCtxtExternalEntity:
8443: * @ctx: the existing parsing context
8444: * @URL: the URL for the entity to load
8445: * @ID: the System ID for the entity to load
8446: * @list: the return value for the set of parsed nodes
8447: *
8448: * Parse an external general entity within an existing parsing context
8449: * An external general parsed entity is well-formed if it matches the
8450: * production labeled extParsedEnt.
8451: *
8452: * [78] extParsedEnt ::= TextDecl? content
8453: *
8454: * Returns 0 if the entity is well formed, -1 in case of args problem and
8455: * the parser error code otherwise
8456: */
8457:
8458: int
8459: xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8460: const xmlChar *ID, xmlNodePtr *list) {
8461: xmlParserCtxtPtr ctxt;
8462: xmlDocPtr newDoc;
8463: xmlSAXHandlerPtr oldsax = NULL;
8464: int ret = 0;
8465:
8466: if (ctx->depth > 40) {
8467: return(XML_ERR_ENTITY_LOOP);
8468: }
8469:
8470: if (list != NULL)
8471: *list = NULL;
8472: if ((URL == NULL) && (ID == NULL))
8473: return(-1);
8474: if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8475: return(-1);
8476:
8477:
1.228 veillard 8478: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.213 veillard 8479: if (ctxt == NULL) return(-1);
8480: ctxt->userData = ctxt;
8481: oldsax = ctxt->sax;
8482: ctxt->sax = ctx->sax;
8483: newDoc = xmlNewDoc(BAD_CAST "1.0");
8484: if (newDoc == NULL) {
8485: xmlFreeParserCtxt(ctxt);
8486: return(-1);
8487: }
8488: if (ctx->myDoc != NULL) {
8489: newDoc->intSubset = ctx->myDoc->intSubset;
8490: newDoc->extSubset = ctx->myDoc->extSubset;
8491: }
8492: if (ctx->myDoc->URL != NULL) {
8493: newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8494: }
8495: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8496: if (newDoc->children == NULL) {
8497: ctxt->sax = oldsax;
8498: xmlFreeParserCtxt(ctxt);
8499: newDoc->intSubset = NULL;
8500: newDoc->extSubset = NULL;
8501: xmlFreeDoc(newDoc);
8502: return(-1);
8503: }
8504: nodePush(ctxt, newDoc->children);
8505: if (ctx->myDoc == NULL) {
8506: ctxt->myDoc = newDoc;
8507: } else {
8508: ctxt->myDoc = ctx->myDoc;
8509: newDoc->children->doc = ctx->myDoc;
8510: }
8511:
8512: /*
8513: * Parse a possible text declaration first
8514: */
8515: GROW;
8516: if ((RAW == '<') && (NXT(1) == '?') &&
8517: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8518: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8519: xmlParseTextDecl(ctxt);
8520: }
8521:
8522: /*
8523: * Doing validity checking on chunk doesn't make sense
8524: */
8525: ctxt->instate = XML_PARSER_CONTENT;
8526: ctxt->validate = ctx->validate;
8527: ctxt->depth = ctx->depth + 1;
8528: ctxt->replaceEntities = ctx->replaceEntities;
8529: if (ctxt->validate) {
8530: ctxt->vctxt.error = ctx->vctxt.error;
8531: ctxt->vctxt.warning = ctx->vctxt.warning;
8532: /* Allocate the Node stack */
8533: ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1.228 veillard 8534: if (ctxt->vctxt.nodeTab == NULL) {
8535: fprintf(stderr, "xmlParseCtxtExternalEntity: out of memory\n");
8536: ctxt->validate = 0;
8537: ctxt->vctxt.error = NULL;
8538: ctxt->vctxt.warning = NULL;
8539: } else {
8540: ctxt->vctxt.nodeNr = 0;
8541: ctxt->vctxt.nodeMax = 4;
8542: ctxt->vctxt.node = NULL;
8543: }
1.213 veillard 8544: } else {
8545: ctxt->vctxt.error = NULL;
8546: ctxt->vctxt.warning = NULL;
8547: }
8548:
8549: xmlParseContent(ctxt);
8550:
8551: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8552: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8554: ctxt->sax->error(ctxt->userData,
8555: "chunk is not well balanced\n");
8556: ctxt->wellFormed = 0;
8557: ctxt->disableSAX = 1;
8558: } else if (RAW != 0) {
1.230 veillard 8559: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.213 veillard 8560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8561: ctxt->sax->error(ctxt->userData,
8562: "extra content at the end of well balanced chunk\n");
8563: ctxt->wellFormed = 0;
8564: ctxt->disableSAX = 1;
8565: }
8566: if (ctxt->node != newDoc->children) {
1.230 veillard 8567: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.213 veillard 8568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8569: ctxt->sax->error(ctxt->userData,
8570: "chunk is not well balanced\n");
8571: ctxt->wellFormed = 0;
8572: ctxt->disableSAX = 1;
8573: }
8574:
8575: if (!ctxt->wellFormed) {
8576: if (ctxt->errNo == 0)
8577: ret = 1;
8578: else
8579: ret = ctxt->errNo;
8580: } else {
8581: if (list != NULL) {
8582: xmlNodePtr cur;
8583:
8584: /*
8585: * Return the newly created nodeset after unlinking it from
8586: * they pseudo parent.
8587: */
8588: cur = newDoc->children->children;
8589: *list = cur;
8590: while (cur != NULL) {
8591: cur->parent = NULL;
8592: cur = cur->next;
8593: }
8594: newDoc->children->children = NULL;
8595: }
8596: ret = 0;
8597: }
8598: ctxt->sax = oldsax;
8599: xmlFreeParserCtxt(ctxt);
8600: newDoc->intSubset = NULL;
8601: newDoc->extSubset = NULL;
8602: xmlFreeDoc(newDoc);
8603:
8604: return(ret);
8605: }
8606:
8607: /**
1.181 daniel 8608: * xmlParseExternalEntity:
8609: * @doc: the document the chunk pertains to
8610: * @sax: the SAX handler bloc (possibly NULL)
8611: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8612: * @depth: Used for loop detection, use 0
1.181 daniel 8613: * @URL: the URL for the entity to load
8614: * @ID: the System ID for the entity to load
8615: * @list: the return value for the set of parsed nodes
8616: *
8617: * Parse an external general entity
8618: * An external general parsed entity is well-formed if it matches the
8619: * production labeled extParsedEnt.
8620: *
8621: * [78] extParsedEnt ::= TextDecl? content
8622: *
8623: * Returns 0 if the entity is well formed, -1 in case of args problem and
8624: * the parser error code otherwise
8625: */
8626:
8627: int
8628: xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
1.185 daniel 8629: int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
1.181 daniel 8630: xmlParserCtxtPtr ctxt;
8631: xmlDocPtr newDoc;
8632: xmlSAXHandlerPtr oldsax = NULL;
8633: int ret = 0;
8634:
1.185 daniel 8635: if (depth > 40) {
8636: return(XML_ERR_ENTITY_LOOP);
8637: }
8638:
8639:
1.181 daniel 8640:
8641: if (list != NULL)
8642: *list = NULL;
8643: if ((URL == NULL) && (ID == NULL))
1.213 veillard 8644: return(-1);
8645: if (doc == NULL) /* @@ relax but check for dereferences */
1.181 daniel 8646: return(-1);
8647:
8648:
1.228 veillard 8649: ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
1.181 daniel 8650: if (ctxt == NULL) return(-1);
8651: ctxt->userData = ctxt;
8652: if (sax != NULL) {
8653: oldsax = ctxt->sax;
8654: ctxt->sax = sax;
8655: if (user_data != NULL)
8656: ctxt->userData = user_data;
8657: }
8658: newDoc = xmlNewDoc(BAD_CAST "1.0");
8659: if (newDoc == NULL) {
8660: xmlFreeParserCtxt(ctxt);
8661: return(-1);
8662: }
8663: if (doc != NULL) {
8664: newDoc->intSubset = doc->intSubset;
8665: newDoc->extSubset = doc->extSubset;
8666: }
8667: if (doc->URL != NULL) {
8668: newDoc->URL = xmlStrdup(doc->URL);
8669: }
8670: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8671: if (newDoc->children == NULL) {
8672: if (sax != NULL)
8673: ctxt->sax = oldsax;
8674: xmlFreeParserCtxt(ctxt);
8675: newDoc->intSubset = NULL;
8676: newDoc->extSubset = NULL;
8677: xmlFreeDoc(newDoc);
8678: return(-1);
8679: }
8680: nodePush(ctxt, newDoc->children);
8681: if (doc == NULL) {
8682: ctxt->myDoc = newDoc;
8683: } else {
8684: ctxt->myDoc = doc;
8685: newDoc->children->doc = doc;
8686: }
8687:
8688: /*
8689: * Parse a possible text declaration first
8690: */
8691: GROW;
8692: if ((RAW == '<') && (NXT(1) == '?') &&
8693: (NXT(2) == 'x') && (NXT(3) == 'm') &&
8694: (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8695: xmlParseTextDecl(ctxt);
8696: }
8697:
8698: /*
8699: * Doing validity checking on chunk doesn't make sense
8700: */
8701: ctxt->instate = XML_PARSER_CONTENT;
8702: ctxt->validate = 0;
1.185 daniel 8703: ctxt->depth = depth;
1.181 daniel 8704:
8705: xmlParseContent(ctxt);
8706:
8707: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8708: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8709: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8710: ctxt->sax->error(ctxt->userData,
8711: "chunk is not well balanced\n");
8712: ctxt->wellFormed = 0;
8713: ctxt->disableSAX = 1;
8714: } else if (RAW != 0) {
1.230 veillard 8715: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.181 daniel 8716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8717: ctxt->sax->error(ctxt->userData,
8718: "extra content at the end of well balanced chunk\n");
8719: ctxt->wellFormed = 0;
8720: ctxt->disableSAX = 1;
8721: }
8722: if (ctxt->node != newDoc->children) {
1.230 veillard 8723: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.181 daniel 8724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8725: ctxt->sax->error(ctxt->userData,
8726: "chunk is not well balanced\n");
8727: ctxt->wellFormed = 0;
8728: ctxt->disableSAX = 1;
8729: }
8730:
8731: if (!ctxt->wellFormed) {
8732: if (ctxt->errNo == 0)
8733: ret = 1;
8734: else
8735: ret = ctxt->errNo;
8736: } else {
8737: if (list != NULL) {
8738: xmlNodePtr cur;
8739:
8740: /*
8741: * Return the newly created nodeset after unlinking it from
8742: * they pseudo parent.
8743: */
8744: cur = newDoc->children->children;
8745: *list = cur;
8746: while (cur != NULL) {
8747: cur->parent = NULL;
8748: cur = cur->next;
8749: }
8750: newDoc->children->children = NULL;
8751: }
8752: ret = 0;
8753: }
8754: if (sax != NULL)
8755: ctxt->sax = oldsax;
8756: xmlFreeParserCtxt(ctxt);
8757: newDoc->intSubset = NULL;
8758: newDoc->extSubset = NULL;
8759: xmlFreeDoc(newDoc);
8760:
8761: return(ret);
8762: }
8763:
8764: /**
8765: * xmlParseBalancedChunk:
1.176 daniel 8766: * @doc: the document the chunk pertains to
8767: * @sax: the SAX handler bloc (possibly NULL)
8768: * @user_data: The user data returned on SAX callbacks (possibly NULL)
1.185 daniel 8769: * @depth: Used for loop detection, use 0
1.176 daniel 8770: * @string: the input string in UTF8 or ISO-Latin (zero terminated)
8771: * @list: the return value for the set of parsed nodes
8772: *
8773: * Parse a well-balanced chunk of an XML document
8774: * called by the parser
8775: * The allowed sequence for the Well Balanced Chunk is the one defined by
8776: * the content production in the XML grammar:
1.144 daniel 8777: *
1.175 daniel 8778: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8779: *
1.176 daniel 8780: * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8781: * the parser error code otherwise
1.144 daniel 8782: */
8783:
1.175 daniel 8784: int
8785: xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
1.185 daniel 8786: void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
1.176 daniel 8787: xmlParserCtxtPtr ctxt;
1.175 daniel 8788: xmlDocPtr newDoc;
1.181 daniel 8789: xmlSAXHandlerPtr oldsax = NULL;
1.175 daniel 8790: int size;
1.176 daniel 8791: int ret = 0;
1.175 daniel 8792:
1.185 daniel 8793: if (depth > 40) {
8794: return(XML_ERR_ENTITY_LOOP);
8795: }
8796:
1.175 daniel 8797:
1.176 daniel 8798: if (list != NULL)
8799: *list = NULL;
8800: if (string == NULL)
8801: return(-1);
8802:
8803: size = xmlStrlen(string);
8804:
1.183 daniel 8805: ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
1.176 daniel 8806: if (ctxt == NULL) return(-1);
8807: ctxt->userData = ctxt;
1.175 daniel 8808: if (sax != NULL) {
1.176 daniel 8809: oldsax = ctxt->sax;
8810: ctxt->sax = sax;
8811: if (user_data != NULL)
8812: ctxt->userData = user_data;
1.175 daniel 8813: }
8814: newDoc = xmlNewDoc(BAD_CAST "1.0");
1.176 daniel 8815: if (newDoc == NULL) {
8816: xmlFreeParserCtxt(ctxt);
8817: return(-1);
8818: }
1.175 daniel 8819: if (doc != NULL) {
8820: newDoc->intSubset = doc->intSubset;
8821: newDoc->extSubset = doc->extSubset;
8822: }
1.176 daniel 8823: newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8824: if (newDoc->children == NULL) {
8825: if (sax != NULL)
8826: ctxt->sax = oldsax;
8827: xmlFreeParserCtxt(ctxt);
8828: newDoc->intSubset = NULL;
8829: newDoc->extSubset = NULL;
8830: xmlFreeDoc(newDoc);
8831: return(-1);
8832: }
8833: nodePush(ctxt, newDoc->children);
8834: if (doc == NULL) {
8835: ctxt->myDoc = newDoc;
8836: } else {
8837: ctxt->myDoc = doc;
8838: newDoc->children->doc = doc;
8839: }
8840: ctxt->instate = XML_PARSER_CONTENT;
1.185 daniel 8841: ctxt->depth = depth;
1.176 daniel 8842:
8843: /*
8844: * Doing validity checking on chunk doesn't make sense
8845: */
8846: ctxt->validate = 0;
8847:
1.175 daniel 8848: xmlParseContent(ctxt);
1.176 daniel 8849:
8850: if ((RAW == '<') && (NXT(1) == '/')) {
1.230 veillard 8851: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 8852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8853: ctxt->sax->error(ctxt->userData,
8854: "chunk is not well balanced\n");
8855: ctxt->wellFormed = 0;
1.180 daniel 8856: ctxt->disableSAX = 1;
1.176 daniel 8857: } else if (RAW != 0) {
1.230 veillard 8858: ctxt->errNo = XML_ERR_EXTRA_CONTENT;
1.176 daniel 8859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8860: ctxt->sax->error(ctxt->userData,
8861: "extra content at the end of well balanced chunk\n");
8862: ctxt->wellFormed = 0;
1.180 daniel 8863: ctxt->disableSAX = 1;
1.176 daniel 8864: }
8865: if (ctxt->node != newDoc->children) {
1.230 veillard 8866: ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
1.176 daniel 8867: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8868: ctxt->sax->error(ctxt->userData,
8869: "chunk is not well balanced\n");
8870: ctxt->wellFormed = 0;
1.180 daniel 8871: ctxt->disableSAX = 1;
1.176 daniel 8872: }
1.175 daniel 8873:
1.176 daniel 8874: if (!ctxt->wellFormed) {
8875: if (ctxt->errNo == 0)
8876: ret = 1;
8877: else
8878: ret = ctxt->errNo;
8879: } else {
8880: if (list != NULL) {
8881: xmlNodePtr cur;
1.175 daniel 8882:
1.176 daniel 8883: /*
8884: * Return the newly created nodeset after unlinking it from
8885: * they pseudo parent.
8886: */
8887: cur = newDoc->children->children;
8888: *list = cur;
8889: while (cur != NULL) {
8890: cur->parent = NULL;
8891: cur = cur->next;
8892: }
8893: newDoc->children->children = NULL;
8894: }
8895: ret = 0;
1.175 daniel 8896: }
1.176 daniel 8897: if (sax != NULL)
8898: ctxt->sax = oldsax;
1.175 daniel 8899: xmlFreeParserCtxt(ctxt);
8900: newDoc->intSubset = NULL;
8901: newDoc->extSubset = NULL;
1.176 daniel 8902: xmlFreeDoc(newDoc);
1.175 daniel 8903:
1.176 daniel 8904: return(ret);
1.144 daniel 8905: }
8906:
8907: /**
1.229 veillard 8908: * xmlSAXParseEntity:
8909: * @sax: the SAX handler block
8910: * @filename: the filename
8911: *
8912: * parse an XML external entity out of context and build a tree.
8913: * It use the given SAX function block to handle the parsing callback.
8914: * If sax is NULL, fallback to the default DOM tree building routines.
8915: *
8916: * [78] extParsedEnt ::= TextDecl? content
8917: *
8918: * This correspond to a "Well Balanced" chunk
1.144 daniel 8919: *
1.229 veillard 8920: * Returns the resulting document tree
1.144 daniel 8921: */
8922:
1.229 veillard 8923: xmlDocPtr
8924: xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
8925: xmlDocPtr ret;
8926: xmlParserCtxtPtr ctxt;
8927: char *directory = NULL;
8928:
8929: ctxt = xmlCreateFileParserCtxt(filename);
8930: if (ctxt == NULL) {
8931: return(NULL);
8932: }
8933: if (sax != NULL) {
8934: if (ctxt->sax != NULL)
8935: xmlFree(ctxt->sax);
8936: ctxt->sax = sax;
8937: ctxt->userData = NULL;
8938: }
8939:
8940: if ((ctxt->directory == NULL) && (directory == NULL))
8941: directory = xmlParserGetDirectory(filename);
8942:
8943: xmlParseExtParsedEnt(ctxt);
8944:
8945: if (ctxt->wellFormed)
8946: ret = ctxt->myDoc;
8947: else {
8948: ret = NULL;
8949: xmlFreeDoc(ctxt->myDoc);
8950: ctxt->myDoc = NULL;
8951: }
8952: if (sax != NULL)
8953: ctxt->sax = NULL;
8954: xmlFreeParserCtxt(ctxt);
8955:
8956: return(ret);
1.144 daniel 8957: }
8958:
8959: /**
1.229 veillard 8960: * xmlParseEntity:
8961: * @filename: the filename
8962: *
8963: * parse an XML external entity out of context and build a tree.
8964: *
8965: * [78] extParsedEnt ::= TextDecl? content
8966: *
8967: * This correspond to a "Well Balanced" chunk
1.59 daniel 8968: *
1.68 daniel 8969: * Returns the resulting document tree
1.59 daniel 8970: */
8971:
1.69 daniel 8972: xmlDocPtr
1.229 veillard 8973: xmlParseEntity(const char *filename) {
8974: return(xmlSAXParseEntity(NULL, filename));
1.55 daniel 8975: }
8976:
8977: /**
1.181 daniel 8978: * xmlCreateEntityParserCtxt:
8979: * @URL: the entity URL
8980: * @ID: the entity PUBLIC ID
8981: * @base: a posible base for the target URI
8982: *
8983: * Create a parser context for an external entity
8984: * Automatic support for ZLIB/Compress compressed document is provided
8985: * by default if found at compile-time.
8986: *
8987: * Returns the new parser context or NULL
8988: */
8989: xmlParserCtxtPtr
8990: xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
8991: const xmlChar *base) {
8992: xmlParserCtxtPtr ctxt;
8993: xmlParserInputPtr inputStream;
8994: char *directory = NULL;
1.210 veillard 8995: xmlChar *uri;
8996:
1.181 daniel 8997: ctxt = xmlNewParserCtxt();
8998: if (ctxt == NULL) {
8999: return(NULL);
9000: }
9001:
1.210 veillard 9002: uri = xmlBuildURI(URL, base);
9003:
9004: if (uri == NULL) {
9005: inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9006: if (inputStream == NULL) {
9007: xmlFreeParserCtxt(ctxt);
9008: return(NULL);
9009: }
9010:
9011: inputPush(ctxt, inputStream);
9012:
9013: if ((ctxt->directory == NULL) && (directory == NULL))
9014: directory = xmlParserGetDirectory((char *)URL);
9015: if ((ctxt->directory == NULL) && (directory != NULL))
9016: ctxt->directory = directory;
9017: } else {
9018: inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9019: if (inputStream == NULL) {
9020: xmlFreeParserCtxt(ctxt);
9021: return(NULL);
9022: }
1.181 daniel 9023:
1.210 veillard 9024: inputPush(ctxt, inputStream);
1.181 daniel 9025:
1.210 veillard 9026: if ((ctxt->directory == NULL) && (directory == NULL))
9027: directory = xmlParserGetDirectory((char *)uri);
9028: if ((ctxt->directory == NULL) && (directory != NULL))
9029: ctxt->directory = directory;
9030: xmlFree(uri);
9031: }
1.181 daniel 9032:
9033: return(ctxt);
9034: }
9035:
1.229 veillard 9036: /************************************************************************
9037: * *
9038: * Front ends when parsing from a file *
9039: * *
9040: ************************************************************************/
9041:
1.181 daniel 9042: /**
9043: * xmlCreateFileParserCtxt:
1.50 daniel 9044: * @filename: the filename
9045: *
1.69 daniel 9046: * Create a parser context for a file content.
9047: * Automatic support for ZLIB/Compress compressed document is provided
9048: * by default if found at compile-time.
1.50 daniel 9049: *
1.69 daniel 9050: * Returns the new parser context or NULL
1.9 httpng 9051: */
1.69 daniel 9052: xmlParserCtxtPtr
9053: xmlCreateFileParserCtxt(const char *filename)
9054: {
9055: xmlParserCtxtPtr ctxt;
1.40 daniel 9056: xmlParserInputPtr inputStream;
1.91 daniel 9057: xmlParserInputBufferPtr buf;
1.111 daniel 9058: char *directory = NULL;
1.9 httpng 9059:
1.91 daniel 9060: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.228 veillard 9061: if (buf == NULL) {
9062: return(NULL);
9063: }
1.9 httpng 9064:
1.97 daniel 9065: ctxt = xmlNewParserCtxt();
1.16 daniel 9066: if (ctxt == NULL) {
1.228 veillard 9067: if (xmlDefaultSAXHandler.error != NULL) {
9068: xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9069: }
1.16 daniel 9070: return(NULL);
9071: }
1.97 daniel 9072:
1.96 daniel 9073: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 9074: if (inputStream == NULL) {
1.97 daniel 9075: xmlFreeParserCtxt(ctxt);
1.40 daniel 9076: return(NULL);
9077: }
9078:
1.119 daniel 9079: inputStream->filename = xmlMemStrdup(filename);
1.91 daniel 9080: inputStream->buf = buf;
9081: inputStream->base = inputStream->buf->buffer->content;
9082: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 9083:
1.40 daniel 9084: inputPush(ctxt, inputStream);
1.110 daniel 9085: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 9086: directory = xmlParserGetDirectory(filename);
9087: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 9088: ctxt->directory = directory;
1.106 daniel 9089:
1.69 daniel 9090: return(ctxt);
9091: }
9092:
9093: /**
1.181 daniel 9094: * xmlSAXParseFile:
1.69 daniel 9095: * @sax: the SAX handler block
9096: * @filename: the filename
9097: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9098: * documents
9099: *
9100: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9101: * compressed document is provided by default if found at compile-time.
9102: * It use the given SAX function block to handle the parsing callback.
9103: * If sax is NULL, fallback to the default DOM tree building routines.
9104: *
9105: * Returns the resulting document tree
9106: */
9107:
1.79 daniel 9108: xmlDocPtr
9109: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 9110: int recovery) {
9111: xmlDocPtr ret;
1.229 veillard 9112: xmlParserCtxtPtr ctxt;
9113: char *directory = NULL;
9114:
9115: ctxt = xmlCreateFileParserCtxt(filename);
9116: if (ctxt == NULL) {
9117: return(NULL);
9118: }
9119: if (sax != NULL) {
9120: if (ctxt->sax != NULL)
9121: xmlFree(ctxt->sax);
9122: ctxt->sax = sax;
9123: ctxt->userData = NULL;
9124: }
9125:
9126: if ((ctxt->directory == NULL) && (directory == NULL))
9127: directory = xmlParserGetDirectory(filename);
9128: if ((ctxt->directory == NULL) && (directory != NULL))
9129: ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9130:
9131: xmlParseDocument(ctxt);
9132:
9133: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9134: else {
9135: ret = NULL;
9136: xmlFreeDoc(ctxt->myDoc);
9137: ctxt->myDoc = NULL;
9138: }
9139: if (sax != NULL)
9140: ctxt->sax = NULL;
9141: xmlFreeParserCtxt(ctxt);
9142:
9143: return(ret);
9144: }
9145:
9146: /**
9147: * xmlRecoverDoc:
9148: * @cur: a pointer to an array of xmlChar
9149: *
9150: * parse an XML in-memory document and build a tree.
9151: * In the case the document is not Well Formed, a tree is built anyway
9152: *
9153: * Returns the resulting document tree
9154: */
9155:
9156: xmlDocPtr
9157: xmlRecoverDoc(xmlChar *cur) {
9158: return(xmlSAXParseDoc(NULL, cur, 1));
9159: }
9160:
9161: /**
9162: * xmlParseFile:
9163: * @filename: the filename
9164: *
9165: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9166: * compressed document is provided by default if found at compile-time.
9167: *
9168: * Returns the resulting document tree
9169: */
9170:
9171: xmlDocPtr
9172: xmlParseFile(const char *filename) {
9173: return(xmlSAXParseFile(NULL, filename, 0));
9174: }
9175:
9176: /**
9177: * xmlRecoverFile:
9178: * @filename: the filename
9179: *
9180: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9181: * compressed document is provided by default if found at compile-time.
9182: * In the case the document is not Well Formed, a tree is built anyway
9183: *
9184: * Returns the resulting document tree
9185: */
9186:
9187: xmlDocPtr
9188: xmlRecoverFile(const char *filename) {
9189: return(xmlSAXParseFile(NULL, filename, 1));
9190: }
9191:
9192:
9193: /**
9194: * xmlSetupParserForBuffer:
9195: * @ctxt: an XML parser context
9196: * @buffer: a xmlChar * buffer
9197: * @filename: a file name
9198: *
9199: * Setup the parser context to parse a new buffer; Clears any prior
9200: * contents from the parser context. The buffer parameter must not be
9201: * NULL, but the filename parameter can be
9202: */
9203: void
9204: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9205: const char* filename)
9206: {
9207: xmlParserInputPtr input;
9208:
9209: input = xmlNewInputStream(ctxt);
9210: if (input == NULL) {
9211: perror("malloc");
9212: xmlFree(ctxt);
9213: return;
9214: }
9215:
9216: xmlClearParserCtxt(ctxt);
9217: if (filename != NULL)
9218: input->filename = xmlMemStrdup(filename);
9219: input->base = buffer;
9220: input->cur = buffer;
9221: inputPush(ctxt, input);
9222: }
9223:
9224: /**
9225: * xmlSAXUserParseFile:
9226: * @sax: a SAX handler
9227: * @user_data: The user data returned on SAX callbacks
9228: * @filename: a file name
9229: *
9230: * parse an XML file and call the given SAX handler routines.
9231: * Automatic support for ZLIB/Compress compressed document is provided
9232: *
9233: * Returns 0 in case of success or a error number otherwise
9234: */
9235: int
9236: xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9237: const char *filename) {
9238: int ret = 0;
9239: xmlParserCtxtPtr ctxt;
9240:
9241: ctxt = xmlCreateFileParserCtxt(filename);
9242: if (ctxt == NULL) return -1;
9243: if (ctxt->sax != &xmlDefaultSAXHandler)
9244: xmlFree(ctxt->sax);
9245: ctxt->sax = sax;
9246: if (user_data != NULL)
9247: ctxt->userData = user_data;
9248:
1.16 daniel 9249: xmlParseDocument(ctxt);
1.229 veillard 9250:
9251: if (ctxt->wellFormed)
9252: ret = 0;
1.59 daniel 9253: else {
1.229 veillard 9254: if (ctxt->errNo != 0)
9255: ret = ctxt->errNo;
9256: else
9257: ret = -1;
1.59 daniel 9258: }
1.86 daniel 9259: if (sax != NULL)
1.229 veillard 9260: ctxt->sax = NULL;
1.69 daniel 9261: xmlFreeParserCtxt(ctxt);
1.20 daniel 9262:
1.229 veillard 9263: return ret;
1.20 daniel 9264: }
9265:
1.229 veillard 9266: /************************************************************************
9267: * *
9268: * Front ends when parsing from memory *
9269: * *
9270: ************************************************************************/
1.32 daniel 9271:
1.50 daniel 9272: /**
1.181 daniel 9273: * xmlCreateMemoryParserCtxt:
1.229 veillard 9274: * @buffer: a pointer to a char array
9275: * @size: the size of the array
1.50 daniel 9276: *
1.69 daniel 9277: * Create a parser context for an XML in-memory document.
1.50 daniel 9278: *
1.69 daniel 9279: * Returns the new parser context or NULL
1.20 daniel 9280: */
1.69 daniel 9281: xmlParserCtxtPtr
9282: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 9283: xmlParserCtxtPtr ctxt;
1.40 daniel 9284: xmlParserInputPtr input;
1.209 veillard 9285: xmlParserInputBufferPtr buf;
1.40 daniel 9286:
1.229 veillard 9287: if (buffer == NULL)
9288: return(NULL);
9289: if (size <= 0)
1.181 daniel 9290: return(NULL);
1.40 daniel 9291:
1.97 daniel 9292: ctxt = xmlNewParserCtxt();
1.181 daniel 9293: if (ctxt == NULL)
1.20 daniel 9294: return(NULL);
1.97 daniel 9295:
1.209 veillard 9296: buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9297: if (buf == NULL) return(NULL);
9298:
1.96 daniel 9299: input = xmlNewInputStream(ctxt);
1.40 daniel 9300: if (input == NULL) {
1.97 daniel 9301: xmlFreeParserCtxt(ctxt);
1.40 daniel 9302: return(NULL);
9303: }
1.20 daniel 9304:
1.40 daniel 9305: input->filename = NULL;
1.209 veillard 9306: input->buf = buf;
9307: input->base = input->buf->buffer->content;
9308: input->cur = input->buf->buffer->content;
1.20 daniel 9309:
1.40 daniel 9310: inputPush(ctxt, input);
1.69 daniel 9311: return(ctxt);
9312: }
9313:
9314: /**
1.181 daniel 9315: * xmlSAXParseMemory:
1.69 daniel 9316: * @sax: the SAX handler block
9317: * @buffer: an pointer to a char array
1.127 daniel 9318: * @size: the size of the array
9319: * @recovery: work in recovery mode, i.e. tries to read not Well Formed
1.69 daniel 9320: * documents
9321: *
9322: * parse an XML in-memory block and use the given SAX function block
9323: * to handle the parsing callback. If sax is NULL, fallback to the default
9324: * DOM tree building routines.
9325: *
9326: * Returns the resulting document tree
9327: */
9328: xmlDocPtr
9329: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9330: xmlDocPtr ret;
9331: xmlParserCtxtPtr ctxt;
9332:
9333: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9334: if (ctxt == NULL) return(NULL);
1.74 daniel 9335: if (sax != NULL) {
9336: ctxt->sax = sax;
9337: ctxt->userData = NULL;
9338: }
1.20 daniel 9339:
9340: xmlParseDocument(ctxt);
1.40 daniel 9341:
1.72 daniel 9342: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 9343: else {
9344: ret = NULL;
1.72 daniel 9345: xmlFreeDoc(ctxt->myDoc);
9346: ctxt->myDoc = NULL;
1.59 daniel 9347: }
1.86 daniel 9348: if (sax != NULL)
9349: ctxt->sax = NULL;
1.69 daniel 9350: xmlFreeParserCtxt(ctxt);
1.16 daniel 9351:
1.9 httpng 9352: return(ret);
1.17 daniel 9353: }
9354:
1.55 daniel 9355: /**
1.181 daniel 9356: * xmlParseMemory:
1.68 daniel 9357: * @buffer: an pointer to a char array
1.55 daniel 9358: * @size: the size of the array
9359: *
9360: * parse an XML in-memory block and build a tree.
9361: *
1.68 daniel 9362: * Returns the resulting document tree
1.55 daniel 9363: */
9364:
9365: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 9366: return(xmlSAXParseMemory(NULL, buffer, size, 0));
9367: }
9368:
9369: /**
1.181 daniel 9370: * xmlRecoverMemory:
1.68 daniel 9371: * @buffer: an pointer to a char array
1.59 daniel 9372: * @size: the size of the array
9373: *
9374: * parse an XML in-memory block and build a tree.
9375: * In the case the document is not Well Formed, a tree is built anyway
9376: *
1.68 daniel 9377: * Returns the resulting document tree
1.59 daniel 9378: */
9379:
9380: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9381: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 9382: }
9383:
1.123 daniel 9384: /**
9385: * xmlSAXUserParseMemory:
9386: * @sax: a SAX handler
9387: * @user_data: The user data returned on SAX callbacks
9388: * @buffer: an in-memory XML document input
1.127 daniel 9389: * @size: the length of the XML document in bytes
1.123 daniel 9390: *
9391: * A better SAX parsing routine.
9392: * parse an XML in-memory buffer and call the given SAX handler routines.
9393: *
9394: * Returns 0 in case of success or a error number otherwise
9395: */
9396: int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9397: char *buffer, int size) {
9398: int ret = 0;
9399: xmlParserCtxtPtr ctxt;
1.218 veillard 9400: xmlSAXHandlerPtr oldsax = NULL;
1.123 daniel 9401:
9402: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9403: if (ctxt == NULL) return -1;
1.216 veillard 9404: if (sax != NULL) {
9405: oldsax = ctxt->sax;
9406: ctxt->sax = sax;
9407: }
1.123 daniel 9408: ctxt->userData = user_data;
9409:
9410: xmlParseDocument(ctxt);
9411:
9412: if (ctxt->wellFormed)
9413: ret = 0;
9414: else {
9415: if (ctxt->errNo != 0)
9416: ret = ctxt->errNo;
9417: else
9418: ret = -1;
9419: }
1.216 veillard 9420: if (sax != NULL) {
9421: ctxt->sax = oldsax;
9422: }
1.123 daniel 9423: xmlFreeParserCtxt(ctxt);
9424:
9425: return ret;
9426: }
9427:
1.132 daniel 9428: /**
1.229 veillard 9429: * xmlCreateDocParserCtxt:
9430: * @cur: a pointer to an array of xmlChar
9431: *
9432: * Creates a parser context for an XML in-memory document.
1.132 daniel 9433: *
1.229 veillard 9434: * Returns the new parser context or NULL
1.132 daniel 9435: */
1.229 veillard 9436: xmlParserCtxtPtr
9437: xmlCreateDocParserCtxt(xmlChar *cur) {
9438: int len;
1.132 daniel 9439:
1.229 veillard 9440: if (cur == NULL)
9441: return(NULL);
9442: len = xmlStrlen(cur);
9443: return(xmlCreateMemoryParserCtxt((char *)cur, len));
1.132 daniel 9444: }
1.98 daniel 9445:
1.50 daniel 9446: /**
1.229 veillard 9447: * xmlSAXParseDoc:
9448: * @sax: the SAX handler block
9449: * @cur: a pointer to an array of xmlChar
9450: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9451: * documents
1.50 daniel 9452: *
1.229 veillard 9453: * parse an XML in-memory document and build a tree.
9454: * It use the given SAX function block to handle the parsing callback.
9455: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 9456: *
1.229 veillard 9457: * Returns the resulting document tree
1.32 daniel 9458: */
9459:
1.229 veillard 9460: xmlDocPtr
9461: xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9462: xmlDocPtr ret;
9463: xmlParserCtxtPtr ctxt;
9464:
9465: if (cur == NULL) return(NULL);
1.32 daniel 9466:
9467:
1.229 veillard 9468: ctxt = xmlCreateDocParserCtxt(cur);
9469: if (ctxt == NULL) return(NULL);
9470: if (sax != NULL) {
9471: ctxt->sax = sax;
9472: ctxt->userData = NULL;
9473: }
1.32 daniel 9474:
1.229 veillard 9475: xmlParseDocument(ctxt);
9476: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9477: else {
9478: ret = NULL;
9479: xmlFreeDoc(ctxt->myDoc);
9480: ctxt->myDoc = NULL;
9481: }
9482: if (sax != NULL)
9483: ctxt->sax = NULL;
9484: xmlFreeParserCtxt(ctxt);
9485:
9486: return(ret);
1.32 daniel 9487: }
9488:
1.50 daniel 9489: /**
1.229 veillard 9490: * xmlParseDoc:
9491: * @cur: a pointer to an array of xmlChar
1.50 daniel 9492: *
1.229 veillard 9493: * parse an XML in-memory document and build a tree.
1.50 daniel 9494: *
1.229 veillard 9495: * Returns the resulting document tree
1.32 daniel 9496: */
9497:
1.229 veillard 9498: xmlDocPtr
9499: xmlParseDoc(xmlChar *cur) {
9500: return(xmlSAXParseDoc(NULL, cur, 0));
9501: }
1.32 daniel 9502:
9503:
1.229 veillard 9504: /************************************************************************
9505: * *
9506: * Miscellaneous *
9507: * *
9508: ************************************************************************/
1.32 daniel 9509:
1.237 veillard 9510: #ifdef LIBXML_XPATH_ENABLED
9511: #include <libxml/xpath.h>
9512: #endif
9513:
1.235 veillard 9514: static int xmlParserInitialized = 0;
9515:
9516: /**
9517: * xmlInitParser:
9518: *
9519: * Initialization function for the XML parser.
9520: * This is not reentrant. Call once before processing in case of
9521: * use in multithreaded programs.
9522: */
9523:
9524: void
9525: xmlInitParser(void) {
9526: if (xmlParserInitialized) return;
9527:
9528: xmlInitCharEncodingHandlers();
9529: xmlInitializePredefinedEntities();
9530: xmlDefaultSAXHandlerInit();
1.237 veillard 9531: xmlRegisterDefaultInputCallbacks();
9532: xmlRegisterDefaultOutputCallbacks();
1.235 veillard 9533: #ifdef LIBXML_HTML_ENABLED
9534: htmlInitAutoClose();
9535: htmlDefaultSAXHandlerInit();
1.237 veillard 9536: #endif
9537: #ifdef LIBXML_XPATH_ENABLED
9538: xmlXPathInit();
1.235 veillard 9539: #endif
9540: xmlParserInitialized = 1;
9541: }
9542:
1.50 daniel 9543: /**
1.229 veillard 9544: * xmlCleanupParser:
1.50 daniel 9545: *
1.229 veillard 9546: * Cleanup function for the XML parser. It tries to reclaim all
9547: * parsing related global memory allocated for the parser processing.
9548: * It doesn't deallocate any document related memory. Calling this
9549: * function should not prevent reusing the parser.
1.32 daniel 9550: */
1.229 veillard 9551:
1.55 daniel 9552: void
1.229 veillard 9553: xmlCleanupParser(void) {
1.235 veillard 9554: xmlParserInitialized = 0;
1.229 veillard 9555: xmlCleanupCharEncodingHandlers();
9556: xmlCleanupPredefinedEntities();
1.32 daniel 9557: }
1.220 veillard 9558:
9559: /**
9560: * xmlPedanticParserDefault:
9561: * @val: int 0 or 1
9562: *
9563: * Set and return the previous value for enabling pedantic warnings.
9564: *
9565: * Returns the last value for 0 for no substitution, 1 for substitution.
9566: */
9567:
9568: int
9569: xmlPedanticParserDefault(int val) {
9570: int old = xmlPedanticParserDefaultValue;
9571:
9572: xmlPedanticParserDefaultValue = val;
9573: return(old);
9574: }
1.98 daniel 9575:
9576: /**
1.181 daniel 9577: * xmlSubstituteEntitiesDefault:
1.98 daniel 9578: * @val: int 0 or 1
9579: *
9580: * Set and return the previous value for default entity support.
9581: * Initially the parser always keep entity references instead of substituting
9582: * entity values in the output. This function has to be used to change the
9583: * default parser behaviour
9584: * SAX::subtituteEntities() has to be used for changing that on a file by
9585: * file basis.
9586: *
9587: * Returns the last value for 0 for no substitution, 1 for substitution.
9588: */
9589:
9590: int
9591: xmlSubstituteEntitiesDefault(int val) {
9592: int old = xmlSubstituteEntitiesDefaultValue;
9593:
9594: xmlSubstituteEntitiesDefaultValue = val;
1.180 daniel 9595: return(old);
9596: }
9597:
9598: /**
9599: * xmlKeepBlanksDefault:
9600: * @val: int 0 or 1
9601: *
9602: * Set and return the previous value for default blanks text nodes support.
9603: * The 1.x version of the parser used an heuristic to try to detect
9604: * ignorable white spaces. As a result the SAX callback was generating
9605: * ignorableWhitespace() callbacks instead of characters() one, and when
9606: * using the DOM output text nodes containing those blanks were not generated.
9607: * The 2.x and later version will switch to the XML standard way and
9608: * ignorableWhitespace() are only generated when running the parser in
9609: * validating mode and when the current element doesn't allow CDATA or
9610: * mixed content.
9611: * This function is provided as a way to force the standard behaviour
9612: * on 1.X libs and to switch back to the old mode for compatibility when
9613: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9614: * by using xmlIsBlankNode() commodity function to detect the "empty"
9615: * nodes generated.
9616: * This value also affect autogeneration of indentation when saving code
9617: * if blanks sections are kept, indentation is not generated.
9618: *
9619: * Returns the last value for 0 for no substitution, 1 for substitution.
9620: */
9621:
9622: int
9623: xmlKeepBlanksDefault(int val) {
9624: int old = xmlKeepBlanksDefaultValue;
9625:
9626: xmlKeepBlanksDefaultValue = val;
9627: xmlIndentTreeOutput = !val;
1.98 daniel 9628: return(old);
9629: }
1.77 daniel 9630:
Webmaster