Annotation of XML/parser.c, revision 1.97
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.75 daniel 33: #include "encoding.h"
1.61 daniel 34: #include "valid.h"
1.69 daniel 35: #include "parserInternals.h"
1.91 daniel 36: #include "xmlIO.h"
1.1 veillard 37:
1.86 daniel 38: const char *xmlParserVersion = LIBXML_VERSION;
39:
1.91 daniel 40: #define XML_MAX_NAMELEN 1000
41:
42: /************************************************************************
43: * *
44: * Input handling functions for progressive parsing *
45: * *
46: ************************************************************************/
47:
48: /* #define DEBUG_INPUT */
49:
50: #define INPUT_CHUNK 50
51:
52: #ifdef DEBUG_INPUT
53: #define CHECK_BUFFER(in) check_buffer(in)
54: #else
55: #define CHECK_BUFFER(in)
56: #endif
57:
58: void check_buffer(xmlParserInputPtr in) {
59: if (in->base != in->buf->buffer->content) {
60: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
61: }
62: if (in->cur < in->base) {
63: fprintf(stderr, "xmlParserInput: cur < base problem\n");
64: }
65: if (in->cur > in->base + in->buf->buffer->use) {
66: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
67: }
68: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
69: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
70: in->buf->buffer->use, in->buf->buffer->size);
71: }
72:
73:
74: /**
75: * xmlParserInputRead:
76: * @in: an XML parser input
77: * @len: an indicative size for the lookahead
78: *
79: * This function refresh the input for the parser. It doesn't try to
80: * preserve pointers to the input buffer, and discard already read data
81: *
82: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
83: * end of this entity
84: */
85: int
86: xmlParserInputRead(xmlParserInputPtr in, int len) {
87: int ret;
88: int used;
89: int index;
90:
91: #ifdef DEBUG_INPUT
92: fprintf(stderr, "Read\n");
93: #endif
94: if (in->buf == NULL) return(-1);
95: if (in->base == NULL) return(-1);
96: if (in->cur == NULL) return(-1);
97: if (in->buf->buffer == NULL) return(-1);
98:
99: CHECK_BUFFER(in);
100:
101: used = in->cur - in->buf->buffer->content;
102: ret = xmlBufferShrink(in->buf->buffer, used);
103: if (ret > 0) {
104: in->cur -= ret;
105: in->consumed += ret;
106: }
107: ret = xmlParserInputBufferRead(in->buf, len);
108: if (in->base != in->buf->buffer->content) {
109: /*
110: * the buffer has been realloced
111: */
112: index = in->cur - in->base;
113: in->base = in->buf->buffer->content;
114: in->cur = &in->buf->buffer->content[index];
115: }
116:
117: CHECK_BUFFER(in);
118:
119: return(ret);
120: }
121:
122: /**
123: * xmlParserInputGrow:
124: * @in: an XML parser input
125: * @len: an indicative size for the lookahead
126: *
127: * This function increase the input for the parser. It tries to
128: * preserve pointers to the input buffer, and keep already read data
129: *
130: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
131: * end of this entity
132: */
133: int
134: xmlParserInputGrow(xmlParserInputPtr in, int len) {
135: int ret;
136: int index;
137:
138: #ifdef DEBUG_INPUT
139: fprintf(stderr, "Grow\n");
140: #endif
141: if (in->buf == NULL) return(-1);
142: if (in->base == NULL) return(-1);
143: if (in->cur == NULL) return(-1);
144: if (in->buf->buffer == NULL) return(-1);
145:
146: CHECK_BUFFER(in);
147:
148: index = in->cur - in->base;
149: if (in->buf->buffer->use > index + INPUT_CHUNK) {
150:
151: CHECK_BUFFER(in);
152:
153: return(0);
154: }
155: ret = xmlParserInputBufferGrow(in->buf, len);
156: if (in->base != in->buf->buffer->content) {
157: /*
158: * the buffer has been realloced
159: */
160: index = in->cur - in->base;
161: in->base = in->buf->buffer->content;
162: in->cur = &in->buf->buffer->content[index];
163: }
164:
165: CHECK_BUFFER(in);
166:
167: return(ret);
168: }
169:
170: /**
171: * xmlParserInputShrink:
172: * @in: an XML parser input
173: *
174: * This function removes used input for the parser.
175: */
176: void
177: xmlParserInputShrink(xmlParserInputPtr in) {
178: int used;
179: int ret;
180: int index;
181:
182: #ifdef DEBUG_INPUT
183: fprintf(stderr, "Shrink\n");
184: #endif
185: if (in->buf == NULL) return;
186: if (in->base == NULL) return;
187: if (in->cur == NULL) return;
188: if (in->buf->buffer == NULL) return;
189:
190: CHECK_BUFFER(in);
191:
192: used = in->cur - in->buf->buffer->content;
193: if (used > INPUT_CHUNK) {
194: ret = xmlBufferShrink(in->buf->buffer, used);
195: if (ret > 0) {
196: in->cur -= ret;
197: in->consumed += ret;
198: }
199: }
200:
201: CHECK_BUFFER(in);
202:
203: if (in->buf->buffer->use > INPUT_CHUNK) {
204: return;
205: }
206: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
207: if (in->base != in->buf->buffer->content) {
208: /*
209: * the buffer has been realloced
210: */
211: index = in->cur - in->base;
212: in->base = in->buf->buffer->content;
213: in->cur = &in->buf->buffer->content[index];
214: }
215:
216: CHECK_BUFFER(in);
217: }
218:
1.45 daniel 219: /************************************************************************
220: * *
221: * Parser stacks related functions and macros *
222: * *
223: ************************************************************************/
1.79 daniel 224:
225: int xmlSubstituteEntitiesDefaultValue = 0;
226:
1.1 veillard 227: /*
1.40 daniel 228: * Generic function for accessing stacks in the Parser Context
1.1 veillard 229: */
230:
1.31 daniel 231: #define PUSH_AND_POP(type, name) \
1.72 daniel 232: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 233: if (ctxt->name##Nr >= ctxt->name##Max) { \
234: ctxt->name##Max *= 2; \
1.40 daniel 235: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
236: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
237: if (ctxt->name##Tab == NULL) { \
1.31 daniel 238: fprintf(stderr, "realloc failed !\n"); \
239: exit(1); \
240: } \
241: } \
1.40 daniel 242: ctxt->name##Tab[ctxt->name##Nr] = value; \
243: ctxt->name = value; \
244: return(ctxt->name##Nr++); \
1.31 daniel 245: } \
1.72 daniel 246: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 247: type ret; \
1.40 daniel 248: if (ctxt->name##Nr <= 0) return(0); \
249: ctxt->name##Nr--; \
1.50 daniel 250: if (ctxt->name##Nr > 0) \
251: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
252: else \
253: ctxt->name = NULL; \
1.69 daniel 254: ret = ctxt->name##Tab[ctxt->name##Nr]; \
255: ctxt->name##Tab[ctxt->name##Nr] = 0; \
256: return(ret); \
1.31 daniel 257: } \
258:
1.40 daniel 259: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 260: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 261:
1.55 daniel 262: /*
263: * Macros for accessing the content. Those should be used only by the parser,
264: * and not exported.
265: *
266: * Dirty macros, i.e. one need to make assumption on the context to use them
267: *
268: * CUR_PTR return the current pointer to the CHAR to be parsed.
269: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
270: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
271: * in UNICODE mode. This should be used internally by the parser
272: * only to compare to ASCII values otherwise it would break when
273: * running with UTF-8 encoding.
274: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
275: * to compare on ASCII based substring.
276: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
277: * strings within the parser.
278: *
1.77 daniel 279: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 280: *
281: * CURRENT Returns the current char value, with the full decoding of
282: * UTF-8 if we are using this mode. It returns an int.
283: * NEXT Skip to the next character, this does the proper decoding
284: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 285: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 286: */
1.45 daniel 287:
1.97 ! daniel 288: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 289: #define SKIP(val) ctxt->input->cur += (val)
290: #define NXT(val) ctxt->input->cur[(val)]
291: #define CUR_PTR ctxt->input->cur
1.97 ! daniel 292: #define SHRINK xmlParserInputShrink(ctxt->input); \
! 293: if ((*ctxt->input->cur == 0) && \
! 294: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
! 295: xmlPopInput(ctxt)
! 296:
! 297: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
! 298: if ((*ctxt->input->cur == 0) && \
! 299: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
! 300: xmlPopInput(ctxt)
1.55 daniel 301:
302: #define SKIP_BLANKS \
1.97 ! daniel 303: while (IS_BLANK(CUR)) NEXT
1.55 daniel 304:
305: #ifndef USE_UTF_8
306: #define CURRENT (*ctxt->input->cur)
1.91 daniel 307: #define NEXT { \
1.97 ! daniel 308: if (ctxt->token != 0) ctxt->token = 0; \
! 309: else { \
1.91 daniel 310: if ((*ctxt->input->cur == 0) && \
311: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
312: xmlPopInput(ctxt); \
313: } else { \
314: if (*(ctxt->input->cur) == '\n') { \
315: ctxt->input->line++; ctxt->input->col = 1; \
316: } else ctxt->input->col++; \
317: ctxt->input->cur++; \
318: if (*ctxt->input->cur == 0) \
319: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 320: } \
321: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
322: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 ! daniel 323: }}
1.91 daniel 324:
1.55 daniel 325: #else
326: #endif
1.42 daniel 327:
1.97 ! daniel 328: /************************************************************************
! 329: * *
! 330: * Commodity functions to handle entities processing *
! 331: * *
! 332: ************************************************************************/
1.40 daniel 333:
1.50 daniel 334: /**
335: * xmlPopInput:
336: * @ctxt: an XML parser context
337: *
1.40 daniel 338: * xmlPopInput: the current input pointed by ctxt->input came to an end
339: * pop it and return the next char.
1.45 daniel 340: *
1.68 daniel 341: * Returns the current CHAR in the parser context
1.40 daniel 342: */
1.55 daniel 343: CHAR
344: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 345: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 346: xmlFreeInputStream(inputPop(ctxt));
1.97 ! daniel 347: if ((*ctxt->input->cur == 0) &&
! 348: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
! 349: return(xmlPopInput(ctxt));
1.40 daniel 350: return(CUR);
351: }
352:
1.50 daniel 353: /**
354: * xmlPushInput:
355: * @ctxt: an XML parser context
356: * @input: an XML parser input fragment (entity, XML fragment ...).
357: *
1.40 daniel 358: * xmlPushInput: switch to a new input stream which is stacked on top
359: * of the previous one(s).
360: */
1.55 daniel 361: void
362: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 363: if (input == NULL) return;
364: inputPush(ctxt, input);
365: }
366:
1.50 daniel 367: /**
1.69 daniel 368: * xmlFreeInputStream:
369: * @input: an xmlParserInputPtr
370: *
371: * Free up an input stream.
372: */
373: void
374: xmlFreeInputStream(xmlParserInputPtr input) {
375: if (input == NULL) return;
376:
377: if (input->filename != NULL) free((char *) input->filename);
1.94 daniel 378: if (input->directory != NULL) free((char *) input->directory);
1.69 daniel 379: if ((input->free != NULL) && (input->base != NULL))
380: input->free((char *) input->base);
1.93 veillard 381: if (input->buf != NULL)
382: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 383: memset(input, -1, sizeof(xmlParserInput));
384: free(input);
385: }
386:
387: /**
1.96 daniel 388: * xmlNewInputStream:
389: * @ctxt: an XML parser context
390: *
391: * Create a new input stream structure
392: * Returns the new input stream or NULL
393: */
394: xmlParserInputPtr
395: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
396: xmlParserInputPtr input;
397:
398: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
399: if (input == NULL) {
400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
401: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
402: return(NULL);
403: }
404: input->filename = NULL;
405: input->directory = NULL;
406: input->base = NULL;
407: input->cur = NULL;
408: input->buf = NULL;
409: input->line = 1;
410: input->col = 1;
411: input->buf = NULL;
412: input->free = NULL;
413: input->consumed = 0;
414: return(input);
415: }
416:
417: /**
1.50 daniel 418: * xmlNewEntityInputStream:
419: * @ctxt: an XML parser context
420: * @entity: an Entity pointer
421: *
1.82 daniel 422: * Create a new input stream based on an xmlEntityPtr
1.68 daniel 423: * Returns the new input stream
1.45 daniel 424: */
1.50 daniel 425: xmlParserInputPtr
426: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 427: xmlParserInputPtr input;
428:
429: if (entity == NULL) {
1.55 daniel 430: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 431: ctxt->sax->error(ctxt->userData,
1.45 daniel 432: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 433: return(NULL);
1.45 daniel 434: }
435: if (entity->content == NULL) {
1.55 daniel 436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 437: ctxt->sax->error(ctxt->userData,
1.45 daniel 438: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 439: return(NULL);
1.45 daniel 440: }
1.96 daniel 441: input = xmlNewInputStream(ctxt);
1.45 daniel 442: if (input == NULL) {
1.50 daniel 443: return(NULL);
1.45 daniel 444: }
445: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
446: input->base = entity->content;
447: input->cur = entity->content;
1.50 daniel 448: return(input);
1.45 daniel 449: }
450:
1.59 daniel 451: /**
452: * xmlNewStringInputStream:
453: * @ctxt: an XML parser context
1.96 daniel 454: * @buffer: an memory buffer
1.59 daniel 455: *
456: * Create a new input stream based on a memory buffer.
1.68 daniel 457: * Returns the new input stream
1.59 daniel 458: */
459: xmlParserInputPtr
1.96 daniel 460: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
1.59 daniel 461: xmlParserInputPtr input;
462:
1.96 daniel 463: if (buffer == NULL) {
1.59 daniel 464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 465: ctxt->sax->error(ctxt->userData,
1.59 daniel 466: "internal: xmlNewStringInputStream string = NULL\n");
467: return(NULL);
468: }
1.96 daniel 469: input = xmlNewInputStream(ctxt);
1.59 daniel 470: if (input == NULL) {
471: return(NULL);
472: }
1.96 daniel 473: input->base = buffer;
474: input->cur = buffer;
1.59 daniel 475: return(input);
476: }
477:
1.76 daniel 478: /**
479: * xmlNewInputFromFile:
480: * @ctxt: an XML parser context
481: * @filename: the filename to use as entity
482: *
483: * Create a new input stream based on a file.
484: *
485: * Returns the new input stream or NULL in case of error
486: */
487: xmlParserInputPtr
1.79 daniel 488: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 489: xmlParserInputBufferPtr buf;
1.76 daniel 490: xmlParserInputPtr inputStream;
1.94 daniel 491: const char *directory = NULL;
1.76 daniel 492:
1.96 daniel 493: if (ctxt == NULL) return(NULL);
1.91 daniel 494: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 495: if (buf == NULL) {
496: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
497: char name[1024];
498: #ifdef WIN32
499: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
500: #else
501: sprintf(name, "%s/%s", ctxt->input->directory, filename);
502: #endif
503: buf = xmlParserInputBufferCreateFilename(name,
504: XML_CHAR_ENCODING_NONE);
505: if (buf == NULL)
506: return(NULL);
507: directory = strdup(ctxt->input->directory);
508: } else
509: return(NULL);
510: }
511: if (directory == NULL)
512: directory = xmlParserGetDirectory(filename);
1.76 daniel 513:
1.96 daniel 514: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 515: if (inputStream == NULL) {
1.96 daniel 516: if (directory != NULL) free((char *) directory);
1.76 daniel 517: return(NULL);
518: }
519:
520: inputStream->filename = strdup(filename);
1.94 daniel 521: inputStream->directory = directory;
1.91 daniel 522: inputStream->buf = buf;
1.76 daniel 523:
1.91 daniel 524: inputStream->base = inputStream->buf->buffer->content;
525: inputStream->cur = inputStream->buf->buffer->content;
1.76 daniel 526: return(inputStream);
527: }
528:
1.77 daniel 529: /************************************************************************
530: * *
1.97 ! daniel 531: * Commodity functions to handle parser contexts *
! 532: * *
! 533: ************************************************************************/
! 534:
! 535: /**
! 536: * xmlInitParserCtxt:
! 537: * @ctxt: an XML parser context
! 538: *
! 539: * Initialize a parser context
! 540: */
! 541:
! 542: void
! 543: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
! 544: {
! 545: xmlSAXHandler *sax;
! 546:
! 547: sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
! 548: if (sax == NULL) {
! 549: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
! 550: }
! 551:
! 552: /* Allocate the Input stack */
! 553: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
! 554: ctxt->inputNr = 0;
! 555: ctxt->inputMax = 5;
! 556: ctxt->input = NULL;
! 557: ctxt->version = NULL;
! 558: ctxt->encoding = NULL;
! 559: ctxt->standalone = -1;
! 560: ctxt->html = 0;
! 561: ctxt->instate = XML_PARSER_PROLOG;
! 562: ctxt->token = 0;
! 563:
! 564: /* Allocate the Node stack */
! 565: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
! 566: ctxt->nodeNr = 0;
! 567: ctxt->nodeMax = 10;
! 568: ctxt->node = NULL;
! 569:
! 570: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
! 571: else {
! 572: ctxt->sax = sax;
! 573: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
! 574: }
! 575: ctxt->userData = ctxt;
! 576: ctxt->myDoc = NULL;
! 577: ctxt->wellFormed = 1;
! 578: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
! 579: ctxt->record_info = 0;
! 580: xmlInitNodeInfoSeq(&ctxt->node_seq);
! 581: }
! 582:
! 583: /**
! 584: * xmlFreeParserCtxt:
! 585: * @ctxt: an XML parser context
! 586: *
! 587: * Free all the memory used by a parser context. However the parsed
! 588: * document in ctxt->myDoc is not freed.
! 589: */
! 590:
! 591: void
! 592: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
! 593: {
! 594: xmlParserInputPtr input;
! 595:
! 596: if (ctxt == NULL) return;
! 597:
! 598: while ((input = inputPop(ctxt)) != NULL) {
! 599: xmlFreeInputStream(input);
! 600: }
! 601:
! 602: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
! 603: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
! 604: if (ctxt->version != NULL) free((char *) ctxt->version);
! 605: if (ctxt->encoding != NULL) free((char *) ctxt->encoding);
! 606: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
! 607: free(ctxt->sax);
! 608: free(ctxt);
! 609: }
! 610:
! 611: /**
! 612: * xmlNewParserCtxt:
! 613: *
! 614: * Allocate and initialize a new parser context.
! 615: *
! 616: * Returns the xmlParserCtxtPtr or NULL
! 617: */
! 618:
! 619: xmlParserCtxtPtr
! 620: xmlNewParserCtxt()
! 621: {
! 622: xmlParserCtxtPtr ctxt;
! 623:
! 624: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
! 625: if (ctxt == NULL) {
! 626: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
! 627: perror("malloc");
! 628: return(NULL);
! 629: }
! 630: xmlInitParserCtxt(ctxt);
! 631: return(ctxt);
! 632: }
! 633:
! 634: /**
! 635: * xmlClearParserCtxt:
! 636: * @ctxt: an XML parser context
! 637: *
! 638: * Clear (release owned resources) and reinitialize a parser context
! 639: */
! 640:
! 641: void
! 642: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
! 643: {
! 644: xmlClearNodeInfoSeq(&ctxt->node_seq);
! 645: xmlInitParserCtxt(ctxt);
! 646: }
! 647:
! 648: /************************************************************************
! 649: * *
1.77 daniel 650: * Commodity functions to handle entities *
651: * *
652: ************************************************************************/
653:
1.97 ! daniel 654: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
! 655: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
! 656:
! 657: /**
! 658: * xmlParseCharRef:
! 659: * @ctxt: an XML parser context
! 660: *
! 661: * parse Reference declarations
! 662: *
! 663: * [66] CharRef ::= '&#' [0-9]+ ';' |
! 664: * '&#x' [0-9a-fA-F]+ ';'
! 665: *
! 666: * Returns the value parsed (as an int)
1.77 daniel 667: */
1.97 ! daniel 668: int
! 669: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
! 670: int val = 0;
! 671:
! 672: if ((CUR == '&') && (NXT(1) == '#') &&
! 673: (NXT(2) == 'x')) {
! 674: SKIP(3);
! 675: while (CUR != ';') {
! 676: if ((CUR >= '0') && (CUR <= '9'))
! 677: val = val * 16 + (CUR - '0');
! 678: else if ((CUR >= 'a') && (CUR <= 'f'))
! 679: val = val * 16 + (CUR - 'a') + 10;
! 680: else if ((CUR >= 'A') && (CUR <= 'F'))
! 681: val = val * 16 + (CUR - 'A') + 10;
! 682: else {
! 683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 684: ctxt->sax->error(ctxt->userData,
! 685: "xmlParseCharRef: invalid hexadecimal value\n");
! 686: ctxt->wellFormed = 0;
! 687: val = 0;
! 688: break;
! 689: }
! 690: NEXT;
! 691: }
! 692: if (CUR == ';')
! 693: NEXT;
! 694: } else if ((CUR == '&') && (NXT(1) == '#')) {
! 695: SKIP(2);
! 696: while (CUR != ';') {
! 697: if ((CUR >= '0') && (CUR <= '9'))
! 698: val = val * 10 + (CUR - '0');
! 699: else {
! 700: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 701: ctxt->sax->error(ctxt->userData,
! 702: "xmlParseCharRef: invalid decimal value\n");
! 703: ctxt->wellFormed = 0;
! 704: val = 0;
! 705: break;
! 706: }
! 707: NEXT;
! 708: }
! 709: if (CUR == ';')
! 710: NEXT;
! 711: } else {
! 712: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 713: ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid value\n");
! 714: ctxt->wellFormed = 0;
! 715: }
! 716: /*
! 717: * Check the value IS_CHAR ...
! 718: */
! 719: if (IS_CHAR(val)) {
! 720: return(val);
! 721: } else {
! 722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 723: ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid CHAR value %d\n",
! 724: val);
! 725: ctxt->wellFormed = 0;
! 726: }
! 727: return(0);
1.77 daniel 728: }
729:
1.96 daniel 730: /**
731: * xmlParserHandleReference:
732: * @ctxt: the parser context
733: *
1.97 ! daniel 734: * [67] Reference ::= EntityRef | CharRef
! 735: *
1.96 daniel 736: * [68] EntityRef ::= '&' Name ';'
737: *
1.97 ! daniel 738: * [66] CharRef ::= '&#' [0-9]+ ';' |
! 739: * '&#x' [0-9a-fA-F]+ ';'
! 740: *
1.96 daniel 741: * A PEReference may have been detectect in the current input stream
742: * the handling is done accordingly to
743: * http://www.w3.org/TR/REC-xml#entproc
744: */
745: void
746: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 ! daniel 747: xmlParserInputPtr input;
! 748: CHAR *name;
! 749: xmlEntityPtr ent = NULL;
! 750:
! 751: if (CUR != '&') return;
! 752: GROW;
! 753: if ((CUR == '&') && (NXT(1) == '#')) {
! 754: switch(ctxt->instate) {
! 755: case XML_PARSER_COMMENT:
! 756: return;
! 757: case XML_PARSER_EOF:
! 758: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 759: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
! 760: ctxt->wellFormed = 0;
! 761: return;
! 762: case XML_PARSER_PROLOG:
! 763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 764: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
! 765: ctxt->wellFormed = 0;
! 766: return;
! 767: case XML_PARSER_EPILOG:
! 768: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 769: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
! 770: ctxt->wellFormed = 0;
! 771: return;
! 772: case XML_PARSER_DTD:
! 773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 774: ctxt->sax->error(ctxt->userData,
! 775: "CharRef are forbiden in DTDs!\n");
! 776: ctxt->wellFormed = 0;
! 777: return;
! 778: case XML_PARSER_ENTITY_DECL:
! 779: /* we just ignore it there */
! 780: return;
! 781: case XML_PARSER_ENTITY_VALUE:
! 782: /*
! 783: * NOTE: in the case of entity values, we don't do the
! 784: * substitution here since we need the litteral
! 785: * entity value to be able to save the internal
! 786: * subset of the document.
! 787: * This will be handled by xmlDecodeEntities
! 788: */
! 789: return;
! 790: case XML_PARSER_CONTENT:
! 791: case XML_PARSER_ATTRIBUTE_VALUE:
! 792: /* TODO this may not be Ok for UTF-8, multibyte sequence */
! 793: ctxt->token = xmlParseCharRef(ctxt);
! 794: return;
! 795: }
! 796: return;
! 797: }
! 798:
! 799: switch(ctxt->instate) {
! 800: case XML_PARSER_COMMENT:
! 801: return;
! 802: case XML_PARSER_EOF:
! 803: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 804: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
! 805: ctxt->wellFormed = 0;
! 806: return;
! 807: case XML_PARSER_PROLOG:
! 808: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 809: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
! 810: ctxt->wellFormed = 0;
! 811: return;
! 812: case XML_PARSER_EPILOG:
! 813: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 814: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
! 815: ctxt->wellFormed = 0;
! 816: return;
! 817: case XML_PARSER_ENTITY_VALUE:
! 818: /*
! 819: * NOTE: in the case of entity values, we don't do the
! 820: * substitution here since we need the litteral
! 821: * entity value to be able to save the internal
! 822: * subset of the document.
! 823: * This will be handled by xmlDecodeEntities
! 824: */
! 825: return;
! 826: case XML_PARSER_ATTRIBUTE_VALUE:
! 827: /*
! 828: * NOTE: in the case of attributes values, we don't do the
! 829: * substitution here unless we are in a mode where
! 830: * the parser is explicitely asked to substitute
! 831: * entities. The SAX callback is called with values
! 832: * without entity substitution.
! 833: * This will then be handled by xmlDecodeEntities
! 834: */
! 835: if (ctxt->replaceEntities == 0) return;
! 836: break;
! 837: case XML_PARSER_ENTITY_DECL:
! 838: /*
! 839: * we just ignore it there
! 840: * the substitution will be done once the entity is referenced
! 841: */
! 842: return;
! 843: case XML_PARSER_DTD:
! 844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 845: ctxt->sax->error(ctxt->userData,
! 846: "Entity references are forbiden in DTDs!\n");
! 847: ctxt->wellFormed = 0;
! 848: return;
! 849: case XML_PARSER_CONTENT:
! 850: /*
! 851: * NOTE: in the case of attributes values, we don't do the
! 852: * substitution here unless we are in a mode where
! 853: * the parser is explicitely asked to substitute
! 854: * entities. The SAX callback is called with values
! 855: * without entity substitution.
! 856: * This will then be handled by xmlDecodeEntities
! 857: */
! 858: if (ctxt->replaceEntities == 0) return;
! 859: break;
! 860: }
! 861:
! 862: NEXT;
! 863: name = xmlScanName(ctxt);
! 864: if (name == NULL) {
! 865: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 866: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
! 867: ctxt->wellFormed = 0;
! 868: ctxt->token = '&';
! 869: return;
! 870: }
! 871: if (NXT(xmlStrlen(name)) != ';') {
! 872: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 873: ctxt->sax->error(ctxt->userData,
! 874: "Entity reference: ';' expected\n");
! 875: ctxt->wellFormed = 0;
! 876: ctxt->token = '&';
! 877: return;
! 878: }
! 879: SKIP(xmlStrlen(name) + 1);
! 880: if (ctxt->sax != NULL) {
! 881: if (ctxt->sax->getEntity != NULL)
! 882: ent = ctxt->sax->getEntity(ctxt->userData, name);
! 883: }
! 884: if (ent == NULL)
! 885: ent = xmlGetPredefinedEntity(name);
! 886: if (ent == NULL) {
! 887: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 888: ctxt->sax->error(ctxt->userData,
! 889: "Entity reference: entity %s undefined\n");
! 890: ctxt->wellFormed = 0;
! 891: return;
! 892: }
! 893: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
! 894: ctxt->token = ent->content[0];
! 895: return;
! 896: }
! 897: input = xmlNewEntityInputStream(ctxt, ent);
! 898: xmlPushInput(ctxt, input);
1.96 daniel 899: return;
900: }
901:
902: /**
903: * xmlParserHandlePEReference:
904: * @ctxt: the parser context
905: *
906: * [69] PEReference ::= '%' Name ';'
907: *
908: * A PEReference may have been detectect in the current input stream
909: * the handling is done accordingly to
910: * http://www.w3.org/TR/REC-xml#entproc
911: * i.e.
912: * - Included in literal in entity values
913: * - Included as Paraemeter Entity reference within DTDs
914: */
915: void
916: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
917: CHAR *name;
918: xmlEntityPtr entity = NULL;
919: xmlParserInputPtr input;
920:
921: switch(ctxt->instate) {
1.97 ! daniel 922: case XML_PARSER_COMMENT:
! 923: return;
1.96 daniel 924: case XML_PARSER_EOF:
925: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
926: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
927: ctxt->wellFormed = 0;
928: return;
929: case XML_PARSER_PROLOG:
930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
931: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
932: ctxt->wellFormed = 0;
933: return;
1.97 ! daniel 934: case XML_PARSER_ENTITY_DECL:
1.96 daniel 935: case XML_PARSER_CONTENT:
936: case XML_PARSER_ATTRIBUTE_VALUE:
937: /* we just ignore it there */
938: return;
939: case XML_PARSER_EPILOG:
940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 ! daniel 941: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 942: ctxt->wellFormed = 0;
943: return;
1.97 ! daniel 944: case XML_PARSER_ENTITY_VALUE:
! 945: /*
! 946: * NOTE: in the case of entity values, we don't do the
! 947: * substitution here since we need the litteral
! 948: * entity value to be able to save the internal
! 949: * subset of the document.
! 950: * This will be handled by xmlDecodeEntities
! 951: */
! 952: return;
1.96 daniel 953: case XML_PARSER_DTD:
954: }
955:
956: NEXT;
957: name = xmlParseName(ctxt);
958: if (name == NULL) {
959: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
960: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
961: ctxt->wellFormed = 0;
962: } else {
963: if (CUR == ';') {
964: NEXT;
965: if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
966: entity = ctxt->sax->getEntity(ctxt->userData, name);
967: if (entity == NULL) {
968: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
969: ctxt->sax->warning(ctxt->userData,
970: "xmlParsePEReference: %%%s; not found\n", name);
971: } else {
972: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
973: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
974: /*
975: * TODO !!!! handle the extra spaces added before and after
976: * c.f. http://www.w3.org/TR/REC-xml#as-PE
977: * TODO !!!! Avoid quote processing in parameters value
978: * c.f. http://www.w3.org/TR/REC-xml#inliteral
979: */
980: input = xmlNewEntityInputStream(ctxt, entity);
981: xmlPushInput(ctxt, input);
982: } else {
983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
984: ctxt->sax->error(ctxt->userData,
985: "xmlHandlePEReference: %s is not a parameter entity\n",
986: name);
987: ctxt->wellFormed = 0;
988: }
989: }
990: } else {
991: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
992: ctxt->sax->error(ctxt->userData,
993: "xmlHandlePEReference: expecting ';'\n");
994: ctxt->wellFormed = 0;
995: }
1.97 ! daniel 996: free(name);
! 997: }
! 998: }
! 999:
! 1000: /*
! 1001: * Macro used to grow the current buffer.
! 1002: */
! 1003: #define growBuffer(buffer) { \
! 1004: buffer##_size *= 2; \
! 1005: buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
! 1006: if (buffer == NULL) { \
! 1007: perror("realloc failed"); \
! 1008: exit(1); \
! 1009: } \
1.96 daniel 1010: }
1.77 daniel 1011:
1012: /**
1013: * xmlDecodeEntities:
1014: * @ctxt: the parser context
1015: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1016: * @len: the len to decode (in bytes !), -1 for no size limit
1017: * @end: an end marker CHAR, 0 if none
1018: * @end2: an end marker CHAR, 0 if none
1019: * @end3: an end marker CHAR, 0 if none
1020: *
1021: * [67] Reference ::= EntityRef | CharRef
1022: *
1023: * [69] PEReference ::= '%' Name ';'
1024: *
1025: * Returns A newly allocated string with the substitution done. The caller
1026: * must deallocate it !
1027: */
1028: CHAR *
1029: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1030: CHAR end, CHAR end2, CHAR end3) {
1031: CHAR *buffer = NULL;
1.78 daniel 1032: int buffer_size = 0;
1.77 daniel 1033: CHAR *out = NULL;
1.78 daniel 1034:
1.97 ! daniel 1035: CHAR *current = NULL;
1.77 daniel 1036: xmlEntityPtr ent;
1.91 daniel 1037: int nbchars = 0;
1.77 daniel 1038: unsigned int max = (unsigned int) len;
1.97 ! daniel 1039: CHAR cur;
1.77 daniel 1040:
1041: /*
1042: * allocate a translation buffer.
1043: */
1044: buffer_size = 1000;
1045: buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
1046: if (buffer == NULL) {
1047: perror("xmlDecodeEntities: malloc failed");
1048: return(NULL);
1049: }
1050: out = buffer;
1051:
1.78 daniel 1052: /*
1053: * Ok loop until we reach one of the ending char or a size limit.
1054: */
1.97 ! daniel 1055: cur = CUR;
! 1056: while ((nbchars < max) && (cur != end) &&
! 1057: (cur != end2) && (cur != end3)) {
1.77 daniel 1058:
1.97 ! daniel 1059: if (cur == '&' && (what & XML_SUBSTITUTE_REF)) {
1.77 daniel 1060: if (NXT(1) == '#') {
1061: int val = xmlParseCharRef(ctxt);
1062: *out++ = val;
1.97 ! daniel 1063: nbchars += 3;
1.77 daniel 1064: } else {
1065: ent = xmlParseEntityRef(ctxt);
1066: if (ent != NULL) {
1.97 ! daniel 1067: current = ent->content;
! 1068: while (*current != 0) {
! 1069: *out++ = *current++;
1.77 daniel 1070: if (out - buffer > buffer_size - 100) {
1071: int index = out - buffer;
1072:
1.78 daniel 1073: growBuffer(buffer);
1.77 daniel 1074: out = &buffer[index];
1075: }
1076: }
1.91 daniel 1077: nbchars += 3 + xmlStrlen(ent->name);
1.77 daniel 1078: }
1079: }
1.97 ! daniel 1080: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
! 1081: /*
1.77 daniel 1082: * a PEReference induce to switch the entity flow,
1083: * we break here to flush the current set of chars
1084: * parsed if any. We will be called back later.
1.97 ! daniel 1085: */
1.91 daniel 1086: if (nbchars != 0) break;
1.77 daniel 1087:
1088: xmlParsePEReference(ctxt);
1.79 daniel 1089:
1.97 ! daniel 1090: /*
1.79 daniel 1091: * Pop-up of finished entities.
1.97 ! daniel 1092: */
1.79 daniel 1093: while ((CUR == 0) && (ctxt->inputNr > 1))
1094: xmlPopInput(ctxt);
1095:
1.78 daniel 1096: break;
1.77 daniel 1097: } else {
1098: /* TODO: invalid for UTF-8 , use COPY(out); */
1.97 ! daniel 1099: *out++ = cur;
1.91 daniel 1100: nbchars++;
1.86 daniel 1101: if (out - buffer > buffer_size - 100) {
1102: int index = out - buffer;
1103:
1104: growBuffer(buffer);
1105: out = &buffer[index];
1106: }
1.77 daniel 1107: NEXT;
1108: }
1.97 ! daniel 1109: cur = CUR;
1.77 daniel 1110: }
1111: *out++ = 0;
1112: return(buffer);
1113: }
1114:
1.1 veillard 1115:
1.28 daniel 1116: /************************************************************************
1117: * *
1.75 daniel 1118: * Commodity functions to handle encodings *
1119: * *
1120: ************************************************************************/
1121:
1122: /**
1123: * xmlSwitchEncoding:
1124: * @ctxt: the parser context
1125: * @len: the len of @cur
1126: *
1127: * change the input functions when discovering the character encoding
1128: * of a given entity.
1129: *
1130: */
1131: void
1132: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1133: {
1134: switch (enc) {
1135: case XML_CHAR_ENCODING_ERROR:
1136: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1137: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1138: ctxt->wellFormed = 0;
1139: break;
1140: case XML_CHAR_ENCODING_NONE:
1141: /* let's assume it's UTF-8 without the XML decl */
1142: return;
1143: case XML_CHAR_ENCODING_UTF8:
1144: /* default encoding, no conversion should be needed */
1145: return;
1146: case XML_CHAR_ENCODING_UTF16LE:
1147: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1148: ctxt->sax->error(ctxt->userData,
1149: "char encoding UTF16 little endian not supported\n");
1150: break;
1151: case XML_CHAR_ENCODING_UTF16BE:
1152: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1153: ctxt->sax->error(ctxt->userData,
1154: "char encoding UTF16 big endian not supported\n");
1155: break;
1156: case XML_CHAR_ENCODING_UCS4LE:
1157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1158: ctxt->sax->error(ctxt->userData,
1159: "char encoding USC4 little endian not supported\n");
1160: break;
1161: case XML_CHAR_ENCODING_UCS4BE:
1162: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1163: ctxt->sax->error(ctxt->userData,
1164: "char encoding USC4 big endian not supported\n");
1165: break;
1166: case XML_CHAR_ENCODING_EBCDIC:
1167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1168: ctxt->sax->error(ctxt->userData,
1169: "char encoding EBCDIC not supported\n");
1170: break;
1171: case XML_CHAR_ENCODING_UCS4_2143:
1172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1173: ctxt->sax->error(ctxt->userData,
1174: "char encoding UCS4 2143 not supported\n");
1175: break;
1176: case XML_CHAR_ENCODING_UCS4_3412:
1177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1178: ctxt->sax->error(ctxt->userData,
1179: "char encoding UCS4 3412 not supported\n");
1180: break;
1181: case XML_CHAR_ENCODING_UCS2:
1182: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1183: ctxt->sax->error(ctxt->userData,
1184: "char encoding UCS2 not supported\n");
1185: break;
1186: case XML_CHAR_ENCODING_8859_1:
1187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1188: ctxt->sax->error(ctxt->userData,
1189: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1190: break;
1191: case XML_CHAR_ENCODING_8859_2:
1192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1193: ctxt->sax->error(ctxt->userData,
1194: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1195: break;
1196: case XML_CHAR_ENCODING_8859_3:
1197: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1198: ctxt->sax->error(ctxt->userData,
1199: "char encoding ISO_8859_3 not supported\n");
1200: break;
1201: case XML_CHAR_ENCODING_8859_4:
1202: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1203: ctxt->sax->error(ctxt->userData,
1204: "char encoding ISO_8859_4 not supported\n");
1205: break;
1206: case XML_CHAR_ENCODING_8859_5:
1207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1208: ctxt->sax->error(ctxt->userData,
1209: "char encoding ISO_8859_5 not supported\n");
1210: break;
1211: case XML_CHAR_ENCODING_8859_6:
1212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1213: ctxt->sax->error(ctxt->userData,
1214: "char encoding ISO_8859_6 not supported\n");
1215: break;
1216: case XML_CHAR_ENCODING_8859_7:
1217: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1218: ctxt->sax->error(ctxt->userData,
1219: "char encoding ISO_8859_7 not supported\n");
1220: break;
1221: case XML_CHAR_ENCODING_8859_8:
1222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1223: ctxt->sax->error(ctxt->userData,
1224: "char encoding ISO_8859_8 not supported\n");
1225: break;
1226: case XML_CHAR_ENCODING_8859_9:
1227: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1228: ctxt->sax->error(ctxt->userData,
1229: "char encoding ISO_8859_9 not supported\n");
1230: break;
1231: case XML_CHAR_ENCODING_2022_JP:
1232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1233: ctxt->sax->error(ctxt->userData,
1234: "char encoding ISO-2022-JPnot supported\n");
1235: break;
1236: case XML_CHAR_ENCODING_SHIFT_JIS:
1237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1238: ctxt->sax->error(ctxt->userData,
1239: "char encoding Shift_JISnot supported\n");
1240: break;
1241: case XML_CHAR_ENCODING_EUC_JP:
1242: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1243: ctxt->sax->error(ctxt->userData,
1244: "char encoding EUC-JPnot supported\n");
1245: break;
1246: }
1247: }
1248:
1249: /************************************************************************
1250: * *
1.28 daniel 1251: * Commodity functions to handle CHARs *
1252: * *
1253: ************************************************************************/
1254:
1.50 daniel 1255: /**
1256: * xmlStrndup:
1257: * @cur: the input CHAR *
1258: * @len: the len of @cur
1259: *
1260: * a strndup for array of CHAR's
1.68 daniel 1261: *
1262: * Returns a new CHAR * or NULL
1.1 veillard 1263: */
1.55 daniel 1264: CHAR *
1265: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 1266: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1267:
1268: if (ret == NULL) {
1.86 daniel 1269: fprintf(stderr, "malloc of %ld byte failed\n",
1270: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 1271: return(NULL);
1272: }
1273: memcpy(ret, cur, len * sizeof(CHAR));
1274: ret[len] = 0;
1275: return(ret);
1276: }
1277:
1.50 daniel 1278: /**
1279: * xmlStrdup:
1280: * @cur: the input CHAR *
1281: *
1282: * a strdup for array of CHAR's
1.68 daniel 1283: *
1284: * Returns a new CHAR * or NULL
1.1 veillard 1285: */
1.55 daniel 1286: CHAR *
1287: xmlStrdup(const CHAR *cur) {
1.6 httpng 1288: const CHAR *p = cur;
1.1 veillard 1289:
1290: while (IS_CHAR(*p)) p++;
1291: return(xmlStrndup(cur, p - cur));
1292: }
1293:
1.50 daniel 1294: /**
1295: * xmlCharStrndup:
1296: * @cur: the input char *
1297: * @len: the len of @cur
1298: *
1299: * a strndup for char's to CHAR's
1.68 daniel 1300: *
1301: * Returns a new CHAR * or NULL
1.45 daniel 1302: */
1303:
1.55 daniel 1304: CHAR *
1305: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1306: int i;
1307: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1308:
1309: if (ret == NULL) {
1.86 daniel 1310: fprintf(stderr, "malloc of %ld byte failed\n",
1311: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 1312: return(NULL);
1313: }
1314: for (i = 0;i < len;i++)
1315: ret[i] = (CHAR) cur[i];
1316: ret[len] = 0;
1317: return(ret);
1318: }
1319:
1.50 daniel 1320: /**
1321: * xmlCharStrdup:
1322: * @cur: the input char *
1323: * @len: the len of @cur
1324: *
1325: * a strdup for char's to CHAR's
1.68 daniel 1326: *
1327: * Returns a new CHAR * or NULL
1.45 daniel 1328: */
1329:
1.55 daniel 1330: CHAR *
1331: xmlCharStrdup(const char *cur) {
1.45 daniel 1332: const char *p = cur;
1333:
1334: while (*p != '\0') p++;
1335: return(xmlCharStrndup(cur, p - cur));
1336: }
1337:
1.50 daniel 1338: /**
1339: * xmlStrcmp:
1340: * @str1: the first CHAR *
1341: * @str2: the second CHAR *
1342: *
1343: * a strcmp for CHAR's
1.68 daniel 1344: *
1345: * Returns the integer result of the comparison
1.14 veillard 1346: */
1347:
1.55 daniel 1348: int
1349: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 1350: register int tmp;
1351:
1352: do {
1353: tmp = *str1++ - *str2++;
1354: if (tmp != 0) return(tmp);
1355: } while ((*str1 != 0) && (*str2 != 0));
1356: return (*str1 - *str2);
1357: }
1358:
1.50 daniel 1359: /**
1360: * xmlStrncmp:
1361: * @str1: the first CHAR *
1362: * @str2: the second CHAR *
1363: * @len: the max comparison length
1364: *
1365: * a strncmp for CHAR's
1.68 daniel 1366: *
1367: * Returns the integer result of the comparison
1.14 veillard 1368: */
1369:
1.55 daniel 1370: int
1371: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 1372: register int tmp;
1373:
1374: if (len <= 0) return(0);
1375: do {
1376: tmp = *str1++ - *str2++;
1377: if (tmp != 0) return(tmp);
1378: len--;
1379: if (len <= 0) return(0);
1380: } while ((*str1 != 0) && (*str2 != 0));
1381: return (*str1 - *str2);
1382: }
1383:
1.50 daniel 1384: /**
1385: * xmlStrchr:
1386: * @str: the CHAR * array
1387: * @val: the CHAR to search
1388: *
1389: * a strchr for CHAR's
1.68 daniel 1390: *
1391: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 1392: */
1393:
1.89 daniel 1394: const CHAR *
1.55 daniel 1395: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 1396: while (*str != 0) {
1397: if (*str == val) return((CHAR *) str);
1398: str++;
1399: }
1400: return(NULL);
1.89 daniel 1401: }
1402:
1403: /**
1404: * xmlStrstr:
1405: * @str: the CHAR * array (haystack)
1406: * @val: the CHAR to search (needle)
1407: *
1408: * a strstr for CHAR's
1409: *
1410: * Returns the CHAR * for the first occurence or NULL.
1411: */
1412:
1413: const CHAR *
1414: xmlStrstr(const CHAR *str, CHAR *val) {
1415: int n;
1416:
1417: if (str == NULL) return(NULL);
1418: if (val == NULL) return(NULL);
1419: n = xmlStrlen(val);
1420:
1421: if (n == 0) return(str);
1422: while (*str != 0) {
1423: if (*str == *val) {
1424: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1425: }
1426: str++;
1427: }
1428: return(NULL);
1429: }
1430:
1431: /**
1432: * xmlStrsub:
1433: * @str: the CHAR * array (haystack)
1434: * @start: the index of the first char (zero based)
1435: * @len: the length of the substring
1436: *
1437: * Extract a substring of a given string
1438: *
1439: * Returns the CHAR * for the first occurence or NULL.
1440: */
1441:
1442: CHAR *
1443: xmlStrsub(const CHAR *str, int start, int len) {
1444: int i;
1445:
1446: if (str == NULL) return(NULL);
1447: if (start < 0) return(NULL);
1.90 daniel 1448: if (len < 0) return(NULL);
1.89 daniel 1449:
1450: for (i = 0;i < start;i++) {
1451: if (*str == 0) return(NULL);
1452: str++;
1453: }
1454: if (*str == 0) return(NULL);
1455: return(xmlStrndup(str, len));
1.14 veillard 1456: }
1.28 daniel 1457:
1.50 daniel 1458: /**
1459: * xmlStrlen:
1460: * @str: the CHAR * array
1461: *
1462: * lenght of a CHAR's string
1.68 daniel 1463: *
1464: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 1465: */
1466:
1.55 daniel 1467: int
1468: xmlStrlen(const CHAR *str) {
1.45 daniel 1469: int len = 0;
1470:
1471: if (str == NULL) return(0);
1472: while (*str != 0) {
1473: str++;
1474: len++;
1475: }
1476: return(len);
1477: }
1478:
1.50 daniel 1479: /**
1480: * xmlStrncat:
1.68 daniel 1481: * @cur: the original CHAR * array
1.50 daniel 1482: * @add: the CHAR * array added
1483: * @len: the length of @add
1484: *
1485: * a strncat for array of CHAR's
1.68 daniel 1486: *
1487: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1488: */
1489:
1.55 daniel 1490: CHAR *
1491: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 1492: int size;
1493: CHAR *ret;
1494:
1495: if ((add == NULL) || (len == 0))
1496: return(cur);
1497: if (cur == NULL)
1498: return(xmlStrndup(add, len));
1499:
1500: size = xmlStrlen(cur);
1501: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
1502: if (ret == NULL) {
1.86 daniel 1503: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1504: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 1505: return(cur);
1506: }
1507: memcpy(&ret[size], add, len * sizeof(CHAR));
1508: ret[size + len] = 0;
1509: return(ret);
1510: }
1511:
1.50 daniel 1512: /**
1513: * xmlStrcat:
1.68 daniel 1514: * @cur: the original CHAR * array
1.50 daniel 1515: * @add: the CHAR * array added
1516: *
1517: * a strcat for array of CHAR's
1.68 daniel 1518: *
1519: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1520: */
1.55 daniel 1521: CHAR *
1522: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 1523: const CHAR *p = add;
1524:
1525: if (add == NULL) return(cur);
1526: if (cur == NULL)
1527: return(xmlStrdup(add));
1528:
1529: while (IS_CHAR(*p)) p++;
1530: return(xmlStrncat(cur, add, p - add));
1531: }
1532:
1533: /************************************************************************
1534: * *
1535: * Commodity functions, cleanup needed ? *
1536: * *
1537: ************************************************************************/
1538:
1.50 daniel 1539: /**
1540: * areBlanks:
1541: * @ctxt: an XML parser context
1542: * @str: a CHAR *
1543: * @len: the size of @str
1544: *
1.45 daniel 1545: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1546: *
1547: * TODO: to be corrected accodingly to DTD information if available
1.68 daniel 1548: *
1549: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1550: */
1551:
1552: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
1553: int i;
1554: xmlNodePtr lastChild;
1555:
1556: for (i = 0;i < len;i++)
1557: if (!(IS_BLANK(str[i]))) return(0);
1558:
1559: if (CUR != '<') return(0);
1.72 daniel 1560: if (ctxt->node == NULL) return(0);
1.45 daniel 1561: lastChild = xmlGetLastChild(ctxt->node);
1562: if (lastChild == NULL) {
1563: if (ctxt->node->content != NULL) return(0);
1564: } else if (xmlNodeIsText(lastChild))
1565: return(0);
1566: return(1);
1567: }
1568:
1.50 daniel 1569: /**
1570: * xmlHandleEntity:
1571: * @ctxt: an XML parser context
1572: * @entity: an XML entity pointer.
1573: *
1574: * Default handling of defined entities, when should we define a new input
1.45 daniel 1575: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 1576: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 1577: */
1578:
1.55 daniel 1579: void
1580: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1581: int len;
1.50 daniel 1582: xmlParserInputPtr input;
1.45 daniel 1583:
1584: if (entity->content == NULL) {
1.55 daniel 1585: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1586: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1587: entity->name);
1.59 daniel 1588: ctxt->wellFormed = 0;
1.45 daniel 1589: return;
1590: }
1591: len = xmlStrlen(entity->content);
1592: if (len <= 2) goto handle_as_char;
1593:
1594: /*
1595: * Redefine its content as an input stream.
1596: */
1.50 daniel 1597: input = xmlNewEntityInputStream(ctxt, entity);
1598: xmlPushInput(ctxt, input);
1.45 daniel 1599: return;
1600:
1601: handle_as_char:
1602: /*
1603: * Just handle the content as a set of chars.
1604: */
1.72 daniel 1605: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1606: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1607:
1608: }
1609:
1610: /*
1611: * Forward definition for recusive behaviour.
1612: */
1.77 daniel 1613: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1614: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1615:
1.28 daniel 1616: /************************************************************************
1617: * *
1618: * Extra stuff for namespace support *
1619: * Relates to http://www.w3.org/TR/WD-xml-names *
1620: * *
1621: ************************************************************************/
1622:
1.50 daniel 1623: /**
1624: * xmlNamespaceParseNCName:
1625: * @ctxt: an XML parser context
1626: *
1627: * parse an XML namespace name.
1.28 daniel 1628: *
1629: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1630: *
1631: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1632: * CombiningChar | Extender
1.68 daniel 1633: *
1634: * Returns the namespace name or NULL
1.28 daniel 1635: */
1636:
1.55 daniel 1637: CHAR *
1638: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1639: CHAR buf[XML_MAX_NAMELEN];
1640: int len = 0;
1.28 daniel 1641:
1.40 daniel 1642: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1643:
1.40 daniel 1644: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1645: (CUR == '.') || (CUR == '-') ||
1646: (CUR == '_') ||
1647: (IS_COMBINING(CUR)) ||
1.91 daniel 1648: (IS_EXTENDER(CUR))) {
1649: buf[len++] = CUR;
1.40 daniel 1650: NEXT;
1.91 daniel 1651: if (len >= XML_MAX_NAMELEN) {
1652: fprintf(stderr,
1653: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1654: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1655: (CUR == '.') || (CUR == '-') ||
1656: (CUR == '_') ||
1657: (IS_COMBINING(CUR)) ||
1658: (IS_EXTENDER(CUR)))
1659: NEXT;
1660: break;
1661: }
1662: }
1663: return(xmlStrndup(buf, len));
1.28 daniel 1664: }
1665:
1.50 daniel 1666: /**
1667: * xmlNamespaceParseQName:
1668: * @ctxt: an XML parser context
1669: * @prefix: a CHAR **
1670: *
1671: * parse an XML qualified name
1.28 daniel 1672: *
1673: * [NS 5] QName ::= (Prefix ':')? LocalPart
1674: *
1675: * [NS 6] Prefix ::= NCName
1676: *
1677: * [NS 7] LocalPart ::= NCName
1.68 daniel 1678: *
1679: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1680: * to get the Prefix if any.
1.28 daniel 1681: */
1682:
1.55 daniel 1683: CHAR *
1684: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1685: CHAR *ret = NULL;
1686:
1687: *prefix = NULL;
1688: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1689: if (CUR == ':') {
1.28 daniel 1690: *prefix = ret;
1.40 daniel 1691: NEXT;
1.28 daniel 1692: ret = xmlNamespaceParseNCName(ctxt);
1693: }
1694:
1695: return(ret);
1696: }
1697:
1.50 daniel 1698: /**
1.72 daniel 1699: * xmlSplitQName:
1700: * @name: an XML parser context
1701: * @prefix: a CHAR **
1702: *
1703: * parse an XML qualified name string
1704: *
1705: * [NS 5] QName ::= (Prefix ':')? LocalPart
1706: *
1707: * [NS 6] Prefix ::= NCName
1708: *
1709: * [NS 7] LocalPart ::= NCName
1710: *
1711: * Returns the function returns the local part, and prefix is updated
1712: * to get the Prefix if any.
1713: */
1714:
1715: CHAR *
1716: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1717: CHAR *ret = NULL;
1718: const CHAR *q;
1719: const CHAR *cur = name;
1720:
1721: *prefix = NULL;
1722: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1723: q = cur++;
1724:
1725: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1726: (*cur == '.') || (*cur == '-') ||
1727: (*cur == '_') ||
1728: (IS_COMBINING(*cur)) ||
1729: (IS_EXTENDER(*cur)))
1730: cur++;
1731:
1732: ret = xmlStrndup(q, cur - q);
1733:
1734: if (*cur == ':') {
1735: cur++;
1736: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1737: *prefix = ret;
1738:
1739: q = cur++;
1740:
1741: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1742: (*cur == '.') || (*cur == '-') ||
1743: (*cur == '_') ||
1744: (IS_COMBINING(*cur)) ||
1745: (IS_EXTENDER(*cur)))
1746: cur++;
1747:
1748: ret = xmlStrndup(q, cur - q);
1749: }
1750:
1751: return(ret);
1752: }
1753: /**
1.50 daniel 1754: * xmlNamespaceParseNSDef:
1755: * @ctxt: an XML parser context
1756: *
1757: * parse a namespace prefix declaration
1.28 daniel 1758: *
1759: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1760: *
1761: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1762: *
1763: * Returns the namespace name
1.28 daniel 1764: */
1765:
1.55 daniel 1766: CHAR *
1767: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1768: CHAR *name = NULL;
1769:
1.40 daniel 1770: if ((CUR == 'x') && (NXT(1) == 'm') &&
1771: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1772: (NXT(4) == 's')) {
1773: SKIP(5);
1774: if (CUR == ':') {
1775: NEXT;
1.28 daniel 1776: name = xmlNamespaceParseNCName(ctxt);
1777: }
1778: }
1.39 daniel 1779: return(name);
1.28 daniel 1780: }
1781:
1.50 daniel 1782: /**
1783: * xmlParseQuotedString:
1784: * @ctxt: an XML parser context
1785: *
1.45 daniel 1786: * [OLD] Parse and return a string between quotes or doublequotes
1.68 daniel 1787: *
1788: * Returns the string parser or NULL.
1.45 daniel 1789: */
1.55 daniel 1790: CHAR *
1791: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1792: CHAR *ret = NULL;
1793: const CHAR *q;
1794:
1795: if (CUR == '"') {
1796: NEXT;
1797: q = CUR_PTR;
1798: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1799: if (CUR != '"') {
1800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1801: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1802: ctxt->wellFormed = 0;
1.55 daniel 1803: } else {
1.45 daniel 1804: ret = xmlStrndup(q, CUR_PTR - q);
1805: NEXT;
1806: }
1807: } else if (CUR == '\''){
1808: NEXT;
1809: q = CUR_PTR;
1810: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1811: if (CUR != '\'') {
1812: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1813: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1814: ctxt->wellFormed = 0;
1.55 daniel 1815: } else {
1.45 daniel 1816: ret = xmlStrndup(q, CUR_PTR - q);
1817: NEXT;
1818: }
1819: }
1820: return(ret);
1821: }
1822:
1.50 daniel 1823: /**
1824: * xmlParseNamespace:
1825: * @ctxt: an XML parser context
1826: *
1.45 daniel 1827: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1828: *
1829: * This is what the older xml-name Working Draft specified, a bunch of
1830: * other stuff may still rely on it, so support is still here as
1831: * if ot was declared on the root of the Tree:-(
1832: */
1833:
1.55 daniel 1834: void
1835: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1836: CHAR *href = NULL;
1837: CHAR *prefix = NULL;
1838: int garbage = 0;
1839:
1840: /*
1841: * We just skipped "namespace" or "xml:namespace"
1842: */
1843: SKIP_BLANKS;
1844:
1845: while (IS_CHAR(CUR) && (CUR != '>')) {
1846: /*
1847: * We can have "ns" or "prefix" attributes
1848: * Old encoding as 'href' or 'AS' attributes is still supported
1849: */
1850: if ((CUR == 'n') && (NXT(1) == 's')) {
1851: garbage = 0;
1852: SKIP(2);
1853: SKIP_BLANKS;
1854:
1855: if (CUR != '=') continue;
1856: NEXT;
1857: SKIP_BLANKS;
1858:
1859: href = xmlParseQuotedString(ctxt);
1860: SKIP_BLANKS;
1861: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1862: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1863: garbage = 0;
1864: SKIP(4);
1865: SKIP_BLANKS;
1866:
1867: if (CUR != '=') continue;
1868: NEXT;
1869: SKIP_BLANKS;
1870:
1871: href = xmlParseQuotedString(ctxt);
1872: SKIP_BLANKS;
1873: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1874: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1875: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1876: garbage = 0;
1877: SKIP(6);
1878: SKIP_BLANKS;
1879:
1880: if (CUR != '=') continue;
1881: NEXT;
1882: SKIP_BLANKS;
1883:
1884: prefix = xmlParseQuotedString(ctxt);
1885: SKIP_BLANKS;
1886: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1887: garbage = 0;
1888: SKIP(2);
1889: SKIP_BLANKS;
1890:
1891: if (CUR != '=') continue;
1892: NEXT;
1893: SKIP_BLANKS;
1894:
1895: prefix = xmlParseQuotedString(ctxt);
1896: SKIP_BLANKS;
1897: } else if ((CUR == '?') && (NXT(1) == '>')) {
1898: garbage = 0;
1.91 daniel 1899: NEXT;
1.45 daniel 1900: } else {
1901: /*
1902: * Found garbage when parsing the namespace
1903: */
1904: if (!garbage)
1.55 daniel 1905: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1906: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 1907: ctxt->wellFormed = 0;
1.45 daniel 1908: NEXT;
1909: }
1910: }
1911:
1912: MOVETO_ENDTAG(CUR_PTR);
1913: NEXT;
1914:
1915: /*
1916: * Register the DTD.
1.72 daniel 1917: if (href != NULL)
1918: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 1919: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 1920: */
1921:
1922: if (prefix != NULL) free(prefix);
1923: if (href != NULL) free(href);
1924: }
1925:
1.28 daniel 1926: /************************************************************************
1927: * *
1928: * The parser itself *
1929: * Relates to http://www.w3.org/TR/REC-xml *
1930: * *
1931: ************************************************************************/
1.14 veillard 1932:
1.50 daniel 1933: /**
1.97 ! daniel 1934: * xmlScanName:
! 1935: * @ctxt: an XML parser context
! 1936: *
! 1937: * Trickery: parse an XML name but without consuming the input flow
! 1938: * Needed for rollback cases.
! 1939: *
! 1940: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
! 1941: * CombiningChar | Extender
! 1942: *
! 1943: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
! 1944: *
! 1945: * [6] Names ::= Name (S Name)*
! 1946: *
! 1947: * Returns the Name parsed or NULL
! 1948: */
! 1949:
! 1950: CHAR *
! 1951: xmlScanName(xmlParserCtxtPtr ctxt) {
! 1952: CHAR buf[XML_MAX_NAMELEN];
! 1953: int len = 0;
! 1954:
! 1955: GROW;
! 1956: if (!IS_LETTER(CUR) && (CUR != '_') &&
! 1957: (CUR != ':')) {
! 1958: return(NULL);
! 1959: }
! 1960:
! 1961: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
! 1962: (NXT(len) == '.') || (NXT(len) == '-') ||
! 1963: (NXT(len) == '_') || (NXT(len) == ':') ||
! 1964: (IS_COMBINING(NXT(len))) ||
! 1965: (IS_EXTENDER(NXT(len)))) {
! 1966: buf[len] = NXT(len);
! 1967: len++;
! 1968: if (len >= XML_MAX_NAMELEN) {
! 1969: fprintf(stderr,
! 1970: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
! 1971: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
! 1972: (NXT(len) == '.') || (NXT(len) == '-') ||
! 1973: (NXT(len) == '_') || (NXT(len) == ':') ||
! 1974: (IS_COMBINING(NXT(len))) ||
! 1975: (IS_EXTENDER(NXT(len))))
! 1976: len++;
! 1977: break;
! 1978: }
! 1979: }
! 1980: return(xmlStrndup(buf, len));
! 1981: }
! 1982:
! 1983: /**
1.50 daniel 1984: * xmlParseName:
1985: * @ctxt: an XML parser context
1986: *
1987: * parse an XML name.
1.22 daniel 1988: *
1989: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1990: * CombiningChar | Extender
1991: *
1992: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1993: *
1994: * [6] Names ::= Name (S Name)*
1.68 daniel 1995: *
1996: * Returns the Name parsed or NULL
1.1 veillard 1997: */
1998:
1.55 daniel 1999: CHAR *
2000: xmlParseName(xmlParserCtxtPtr ctxt) {
1.91 daniel 2001: CHAR buf[XML_MAX_NAMELEN];
2002: int len = 0;
1.97 ! daniel 2003: CHAR cur;
1.1 veillard 2004:
1.91 daniel 2005: GROW;
1.97 ! daniel 2006: cur = CUR;
! 2007: if (!IS_LETTER(cur) && (cur != '_') &&
! 2008: (cur != ':')) {
1.91 daniel 2009: return(NULL);
2010: }
1.40 daniel 2011:
1.97 ! daniel 2012: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
! 2013: (cur == '.') || (cur == '-') ||
! 2014: (cur == '_') || (cur == ':') ||
! 2015: (IS_COMBINING(cur)) ||
! 2016: (IS_EXTENDER(cur))) {
! 2017: buf[len++] = cur;
1.40 daniel 2018: NEXT;
1.97 ! daniel 2019: cur = CUR;
1.91 daniel 2020: if (len >= XML_MAX_NAMELEN) {
2021: fprintf(stderr,
2022: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 ! daniel 2023: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
! 2024: (cur == '.') || (cur == '-') ||
! 2025: (cur == '_') || (cur == ':') ||
! 2026: (IS_COMBINING(cur)) ||
! 2027: (IS_EXTENDER(cur))) {
! 2028: NEXT;
! 2029: cur = CUR;
! 2030: }
1.91 daniel 2031: break;
2032: }
2033: }
2034: return(xmlStrndup(buf, len));
1.22 daniel 2035: }
2036:
1.50 daniel 2037: /**
2038: * xmlParseNmtoken:
2039: * @ctxt: an XML parser context
2040: *
2041: * parse an XML Nmtoken.
1.22 daniel 2042: *
2043: * [7] Nmtoken ::= (NameChar)+
2044: *
2045: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2046: *
2047: * Returns the Nmtoken parsed or NULL
1.22 daniel 2048: */
2049:
1.55 daniel 2050: CHAR *
2051: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.91 daniel 2052: CHAR buf[XML_MAX_NAMELEN];
2053: int len = 0;
1.22 daniel 2054:
1.91 daniel 2055: GROW;
1.40 daniel 2056: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2057: (CUR == '.') || (CUR == '-') ||
2058: (CUR == '_') || (CUR == ':') ||
2059: (IS_COMBINING(CUR)) ||
1.91 daniel 2060: (IS_EXTENDER(CUR))) {
2061: buf[len++] = CUR;
1.40 daniel 2062: NEXT;
1.91 daniel 2063: if (len >= XML_MAX_NAMELEN) {
2064: fprintf(stderr,
2065: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2066: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2067: (CUR == '.') || (CUR == '-') ||
2068: (CUR == '_') || (CUR == ':') ||
2069: (IS_COMBINING(CUR)) ||
2070: (IS_EXTENDER(CUR)))
2071: NEXT;
2072: break;
2073: }
2074: }
2075: return(xmlStrndup(buf, len));
1.1 veillard 2076: }
2077:
1.50 daniel 2078: /**
2079: * xmlParseEntityValue:
2080: * @ctxt: an XML parser context
1.78 daniel 2081: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2082: *
2083: * parse a value for ENTITY decl.
1.24 daniel 2084: *
2085: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2086: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2087: *
1.78 daniel 2088: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2089: */
2090:
1.55 daniel 2091: CHAR *
1.78 daniel 2092: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 2093: CHAR *ret = NULL;
1.78 daniel 2094: const CHAR *org = NULL;
1.79 daniel 2095: const CHAR *tst = NULL;
2096: const CHAR *temp = NULL;
1.24 daniel 2097:
1.91 daniel 2098: SHRINK;
1.40 daniel 2099: if (CUR == '"') {
1.96 daniel 2100: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.40 daniel 2101: NEXT;
1.78 daniel 2102: org = CUR_PTR;
1.79 daniel 2103: while (CUR != '"') {
2104: tst = CUR_PTR;
2105: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_BOTH, '"', 0, 0);
1.94 daniel 2106:
2107: /*
2108: * Pop-up of finished entities.
2109: */
2110: while ((CUR == 0) && (ctxt->inputNr > 1))
2111: xmlPopInput(ctxt);
2112:
2113: if ((temp == NULL) && (tst == CUR_PTR)) {
2114: ret = xmlStrndup("", 0);
2115: break;
2116: }
2117: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2118: free((char *)temp);
2119: ret = xmlStrndup("", 0);
2120: break;
2121: }
1.79 daniel 2122: ret = xmlStrcat(ret, temp);
1.80 daniel 2123: if (temp != NULL) free((char *)temp);
1.94 daniel 2124: GROW;
1.79 daniel 2125: }
1.77 daniel 2126: if (CUR != '"') {
1.55 daniel 2127: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2128: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2129: ctxt->wellFormed = 0;
1.78 daniel 2130: } else {
1.94 daniel 2131: if (orig != NULL) /* TODO !!!!!!!!! */
1.78 daniel 2132: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2133: if (ret == NULL)
2134: ret = xmlStrndup("", 0);
1.40 daniel 2135: NEXT;
1.78 daniel 2136: }
1.40 daniel 2137: } else if (CUR == '\'') {
1.96 daniel 2138: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.40 daniel 2139: NEXT;
1.78 daniel 2140: org = CUR_PTR;
1.80 daniel 2141: while (CUR != '\'') {
1.79 daniel 2142: tst = CUR_PTR;
2143: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_BOTH, '\'', 0, 0);
1.94 daniel 2144:
2145: /*
2146: * Pop-up of finished entities.
2147: */
2148: while ((CUR == 0) && (ctxt->inputNr > 1))
2149: xmlPopInput(ctxt);
2150:
2151: if ((temp == NULL) && (tst == CUR_PTR)) {
2152: ret = xmlStrndup("", 0);
2153: break;
2154: }
2155: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2156: free((char *)temp);
2157: ret = xmlStrndup("", 0);
2158: break;
2159: }
1.79 daniel 2160: ret = xmlStrcat(ret, temp);
1.80 daniel 2161: if (temp != NULL) free((char *)temp);
1.94 daniel 2162: GROW;
1.79 daniel 2163: }
1.77 daniel 2164: if (CUR != '\'') {
1.55 daniel 2165: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2166: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2167: ctxt->wellFormed = 0;
1.78 daniel 2168: } else {
1.94 daniel 2169: if (orig != NULL) /* TODO !!!!!!!!! */
1.78 daniel 2170: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2171: if (ret == NULL)
2172: ret = xmlStrndup("", 0);
1.40 daniel 2173: NEXT;
1.78 daniel 2174: }
1.24 daniel 2175: } else {
1.55 daniel 2176: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2177: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2178: ctxt->wellFormed = 0;
1.24 daniel 2179: }
2180:
2181: return(ret);
2182: }
2183:
1.50 daniel 2184: /**
2185: * xmlParseAttValue:
2186: * @ctxt: an XML parser context
2187: *
2188: * parse a value for an attribute
1.78 daniel 2189: * Note: the parser won't do substitution of entities here, this
1.79 daniel 2190: * will be handled later in xmlStringGetNodeList, unless it was
2191: * asked for ctxt->replaceEntities != 0
1.29 daniel 2192: *
2193: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2194: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2195: *
2196: * Returns the AttValue parsed or NULL.
1.29 daniel 2197: */
2198:
1.55 daniel 2199: CHAR *
2200: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 2201: CHAR *ret = NULL;
1.29 daniel 2202:
1.91 daniel 2203: SHRINK;
1.40 daniel 2204: if (CUR == '"') {
1.96 daniel 2205: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2206: NEXT;
1.79 daniel 2207: if (ctxt->replaceEntities != 0)
2208: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
2209: else
2210: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_NONE, '"', '<', 0);
1.77 daniel 2211: if (CUR == '<') {
2212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2213: ctxt->sax->error(ctxt->userData,
2214: "Unescaped '<' not allowed in attributes values\n");
2215: ctxt->wellFormed = 0;
1.29 daniel 2216: }
1.77 daniel 2217: if (CUR != '"') {
1.55 daniel 2218: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2219: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2220: ctxt->wellFormed = 0;
1.77 daniel 2221: } else
1.40 daniel 2222: NEXT;
2223: } else if (CUR == '\'') {
1.96 daniel 2224: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2225: NEXT;
1.79 daniel 2226: if (ctxt->replaceEntities != 0)
2227: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
2228: else
2229: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_NONE, '\'', '<', 0);
1.77 daniel 2230: if (CUR == '<') {
2231: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2232: ctxt->sax->error(ctxt->userData,
2233: "Unescaped '<' not allowed in attributes values\n");
2234: ctxt->wellFormed = 0;
1.29 daniel 2235: }
1.77 daniel 2236: if (CUR != '\'') {
1.55 daniel 2237: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2238: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2239: ctxt->wellFormed = 0;
1.77 daniel 2240: } else
1.40 daniel 2241: NEXT;
1.29 daniel 2242: } else {
1.55 daniel 2243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2244: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2245: ctxt->wellFormed = 0;
1.29 daniel 2246: }
2247:
2248: return(ret);
2249: }
2250:
1.50 daniel 2251: /**
2252: * xmlParseSystemLiteral:
2253: * @ctxt: an XML parser context
2254: *
2255: * parse an XML Literal
1.21 daniel 2256: *
1.22 daniel 2257: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2258: *
2259: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2260: */
2261:
1.55 daniel 2262: CHAR *
2263: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2264: const CHAR *q;
2265: CHAR *ret = NULL;
2266:
1.91 daniel 2267: SHRINK;
1.40 daniel 2268: if (CUR == '"') {
2269: NEXT;
2270: q = CUR_PTR;
2271: while ((IS_CHAR(CUR)) && (CUR != '"'))
2272: NEXT;
2273: if (!IS_CHAR(CUR)) {
1.55 daniel 2274: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2275: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2276: ctxt->wellFormed = 0;
1.21 daniel 2277: } else {
1.40 daniel 2278: ret = xmlStrndup(q, CUR_PTR - q);
2279: NEXT;
1.21 daniel 2280: }
1.40 daniel 2281: } else if (CUR == '\'') {
2282: NEXT;
2283: q = CUR_PTR;
2284: while ((IS_CHAR(CUR)) && (CUR != '\''))
2285: NEXT;
2286: if (!IS_CHAR(CUR)) {
1.55 daniel 2287: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2288: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2289: ctxt->wellFormed = 0;
1.21 daniel 2290: } else {
1.40 daniel 2291: ret = xmlStrndup(q, CUR_PTR - q);
2292: NEXT;
1.21 daniel 2293: }
2294: } else {
1.55 daniel 2295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2296: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2297: ctxt->wellFormed = 0;
1.21 daniel 2298: }
2299:
2300: return(ret);
2301: }
2302:
1.50 daniel 2303: /**
2304: * xmlParsePubidLiteral:
2305: * @ctxt: an XML parser context
1.21 daniel 2306: *
1.50 daniel 2307: * parse an XML public literal
1.68 daniel 2308: *
2309: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2310: *
2311: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2312: */
2313:
1.55 daniel 2314: CHAR *
2315: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2316: const CHAR *q;
2317: CHAR *ret = NULL;
2318: /*
2319: * Name ::= (Letter | '_') (NameChar)*
2320: */
1.91 daniel 2321: SHRINK;
1.40 daniel 2322: if (CUR == '"') {
2323: NEXT;
2324: q = CUR_PTR;
2325: while (IS_PUBIDCHAR(CUR)) NEXT;
2326: if (CUR != '"') {
1.55 daniel 2327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2328: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2329: ctxt->wellFormed = 0;
1.21 daniel 2330: } else {
1.40 daniel 2331: ret = xmlStrndup(q, CUR_PTR - q);
2332: NEXT;
1.21 daniel 2333: }
1.40 daniel 2334: } else if (CUR == '\'') {
2335: NEXT;
2336: q = CUR_PTR;
2337: while ((IS_LETTER(CUR)) && (CUR != '\''))
2338: NEXT;
2339: if (!IS_LETTER(CUR)) {
1.55 daniel 2340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2341: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2342: ctxt->wellFormed = 0;
1.21 daniel 2343: } else {
1.40 daniel 2344: ret = xmlStrndup(q, CUR_PTR - q);
2345: NEXT;
1.21 daniel 2346: }
2347: } else {
1.55 daniel 2348: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2349: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2350: ctxt->wellFormed = 0;
1.21 daniel 2351: }
2352:
2353: return(ret);
2354: }
2355:
1.50 daniel 2356: /**
2357: * xmlParseCharData:
2358: * @ctxt: an XML parser context
2359: * @cdata: int indicating whether we are within a CDATA section
2360: *
2361: * parse a CharData section.
2362: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2363: *
2364: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2365: */
2366:
1.55 daniel 2367: void
2368: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.91 daniel 2369: CHAR buf[1000];
2370: int nbchar = 0;
1.97 ! daniel 2371: CHAR cur;
1.27 daniel 2372:
1.91 daniel 2373: SHRINK;
1.97 ! daniel 2374: /*
! 2375: * !!!!!!!!!!!!
! 2376: * NOTE: NXT(0) is used here to avoid breaking on < or &
! 2377: * entities substitutions.
! 2378: */
! 2379: cur = CUR;
! 2380: while ((IS_CHAR(cur)) && (cur != '<') &&
! 2381: (cur != '&')) {
! 2382: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2383: (NXT(2) == '>')) {
2384: if (cdata) break;
2385: else {
2386: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2387: ctxt->sax->error(ctxt->userData,
1.59 daniel 2388: "Sequence ']]>' not allowed in content\n");
2389: ctxt->wellFormed = 0;
2390: }
2391: }
1.91 daniel 2392: buf[nbchar++] = CUR;
2393: if (nbchar == 1000) {
2394: /*
2395: * Ok the segment is to be consumed as chars.
2396: */
2397: if (ctxt->sax != NULL) {
2398: if (areBlanks(ctxt, buf, nbchar)) {
2399: if (ctxt->sax->ignorableWhitespace != NULL)
2400: ctxt->sax->ignorableWhitespace(ctxt->userData,
2401: buf, nbchar);
2402: } else {
2403: if (ctxt->sax->characters != NULL)
2404: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2405: }
2406: }
2407: nbchar = 0;
2408: }
1.40 daniel 2409: NEXT;
1.97 ! daniel 2410: cur = CUR;
1.27 daniel 2411: }
1.91 daniel 2412: if (nbchar != 0) {
2413: /*
2414: * Ok the segment is to be consumed as chars.
2415: */
2416: if (ctxt->sax != NULL) {
2417: if (areBlanks(ctxt, buf, nbchar)) {
2418: if (ctxt->sax->ignorableWhitespace != NULL)
2419: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2420: } else {
2421: if (ctxt->sax->characters != NULL)
2422: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2423: }
2424: }
1.45 daniel 2425: }
1.27 daniel 2426: }
2427:
1.50 daniel 2428: /**
2429: * xmlParseExternalID:
2430: * @ctxt: an XML parser context
2431: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 2432: * @strict: indicate whether we should restrict parsing to only
2433: * production [75], see NOTE below
1.50 daniel 2434: *
1.67 daniel 2435: * Parse an External ID or a Public ID
2436: *
2437: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2438: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2439: *
2440: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2441: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2442: *
2443: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2444: *
1.68 daniel 2445: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2446: * case publicID receives PubidLiteral, is strict is off
2447: * it is possible to return NULL and have publicID set.
1.22 daniel 2448: */
2449:
1.55 daniel 2450: CHAR *
1.67 daniel 2451: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 2452: CHAR *URI = NULL;
1.22 daniel 2453:
1.91 daniel 2454: SHRINK;
1.40 daniel 2455: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2456: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2457: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2458: SKIP(6);
1.59 daniel 2459: if (!IS_BLANK(CUR)) {
2460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2461: ctxt->sax->error(ctxt->userData,
1.59 daniel 2462: "Space required after 'SYSTEM'\n");
2463: ctxt->wellFormed = 0;
2464: }
1.42 daniel 2465: SKIP_BLANKS;
1.39 daniel 2466: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2467: if (URI == NULL) {
1.55 daniel 2468: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2469: ctxt->sax->error(ctxt->userData,
1.39 daniel 2470: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2471: ctxt->wellFormed = 0;
2472: }
1.40 daniel 2473: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2474: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2475: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2476: SKIP(6);
1.59 daniel 2477: if (!IS_BLANK(CUR)) {
2478: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2479: ctxt->sax->error(ctxt->userData,
1.59 daniel 2480: "Space required after 'PUBLIC'\n");
2481: ctxt->wellFormed = 0;
2482: }
1.42 daniel 2483: SKIP_BLANKS;
1.39 daniel 2484: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2485: if (*publicID == NULL) {
1.55 daniel 2486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2487: ctxt->sax->error(ctxt->userData,
1.39 daniel 2488: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2489: ctxt->wellFormed = 0;
2490: }
1.67 daniel 2491: if (strict) {
2492: /*
2493: * We don't handle [83] so "S SystemLiteral" is required.
2494: */
2495: if (!IS_BLANK(CUR)) {
2496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2497: ctxt->sax->error(ctxt->userData,
1.67 daniel 2498: "Space required after the Public Identifier\n");
2499: ctxt->wellFormed = 0;
2500: }
2501: } else {
2502: /*
2503: * We handle [83] so we return immediately, if
2504: * "S SystemLiteral" is not detected. From a purely parsing
2505: * point of view that's a nice mess.
2506: */
2507: const CHAR *ptr = CUR_PTR;
2508: if (!IS_BLANK(*ptr)) return(NULL);
2509:
2510: while (IS_BLANK(*ptr)) ptr++;
2511: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2512: }
1.42 daniel 2513: SKIP_BLANKS;
1.39 daniel 2514: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2515: if (URI == NULL) {
1.55 daniel 2516: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2517: ctxt->sax->error(ctxt->userData,
1.39 daniel 2518: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2519: ctxt->wellFormed = 0;
2520: }
1.22 daniel 2521: }
1.39 daniel 2522: return(URI);
1.22 daniel 2523: }
2524:
1.50 daniel 2525: /**
2526: * xmlParseComment:
1.69 daniel 2527: * @ctxt: an XML parser context
2528: * @create: should we create a node, or just skip the content
1.50 daniel 2529: *
1.3 veillard 2530: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 2531: * This may or may not create a node (depending on the context)
1.38 daniel 2532: * The spec says that "For compatibility, the string "--" (double-hyphen)
2533: * must not occur within comments. "
1.22 daniel 2534: *
2535: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2536: */
1.72 daniel 2537: void
1.69 daniel 2538: xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1.17 daniel 2539: const CHAR *q, *start;
2540: const CHAR *r;
1.39 daniel 2541: CHAR *val;
1.3 veillard 2542:
2543: /*
1.22 daniel 2544: * Check that there is a comment right here.
1.3 veillard 2545: */
1.40 daniel 2546: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2547: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2548:
1.97 ! daniel 2549: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2550: SHRINK;
1.40 daniel 2551: SKIP(4);
2552: start = q = CUR_PTR;
2553: NEXT;
2554: r = CUR_PTR;
2555: NEXT;
2556: while (IS_CHAR(CUR) &&
2557: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2558: (*r != '-') || (*q != '-'))) {
1.59 daniel 2559: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2561: ctxt->sax->error(ctxt->userData,
1.38 daniel 2562: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2563: ctxt->wellFormed = 0;
2564: }
1.40 daniel 2565: NEXT;r++;q++;
1.3 veillard 2566: }
1.40 daniel 2567: if (!IS_CHAR(CUR)) {
1.55 daniel 2568: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2569: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 2570: ctxt->wellFormed = 0;
1.3 veillard 2571: } else {
1.40 daniel 2572: NEXT;
1.31 daniel 2573: if (create) {
1.39 daniel 2574: val = xmlStrndup(start, q - start);
1.72 daniel 2575: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.74 daniel 2576: ctxt->sax->comment(ctxt->userData, val);
1.39 daniel 2577: free(val);
1.31 daniel 2578: }
1.3 veillard 2579: }
2580: }
2581:
1.50 daniel 2582: /**
2583: * xmlParsePITarget:
2584: * @ctxt: an XML parser context
2585: *
2586: * parse the name of a PI
1.22 daniel 2587: *
2588: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2589: *
2590: * Returns the PITarget name or NULL
1.22 daniel 2591: */
2592:
1.55 daniel 2593: CHAR *
2594: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 2595: CHAR *name;
2596:
2597: name = xmlParseName(ctxt);
2598: if ((name != NULL) && (name[3] == 0) &&
2599: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2600: ((name[1] == 'm') || (name[1] == 'M')) &&
2601: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 2602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2603: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 2604: return(NULL);
2605: }
2606: return(name);
2607: }
2608:
1.50 daniel 2609: /**
2610: * xmlParsePI:
2611: * @ctxt: an XML parser context
2612: *
2613: * parse an XML Processing Instruction.
1.22 daniel 2614: *
2615: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2616: *
1.69 daniel 2617: * The processing is transfered to SAX once parsed.
1.3 veillard 2618: */
2619:
1.55 daniel 2620: void
2621: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 2622: CHAR *target;
2623:
1.40 daniel 2624: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2625: /*
2626: * this is a Processing Instruction.
2627: */
1.40 daniel 2628: SKIP(2);
1.91 daniel 2629: SHRINK;
1.3 veillard 2630:
2631: /*
1.22 daniel 2632: * Parse the target name and check for special support like
2633: * namespace.
2634: *
2635: * TODO : PI handling should be dynamically redefinable using an
2636: * API. Only namespace should be in the code IMHO ...
1.3 veillard 2637: */
1.22 daniel 2638: target = xmlParsePITarget(ctxt);
2639: if (target != NULL) {
1.72 daniel 2640: const CHAR *q = CUR_PTR;
2641:
2642: while (IS_CHAR(CUR) &&
2643: ((CUR != '?') || (NXT(1) != '>')))
2644: NEXT;
2645: if (!IS_CHAR(CUR)) {
2646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2647: ctxt->sax->error(ctxt->userData,
1.72 daniel 2648: "xmlParsePI: PI %s never end ...\n", target);
2649: ctxt->wellFormed = 0;
1.22 daniel 2650: } else {
1.72 daniel 2651: CHAR *data;
1.44 daniel 2652:
1.72 daniel 2653: data = xmlStrndup(q, CUR_PTR - q);
2654: SKIP(2);
1.44 daniel 2655:
1.72 daniel 2656: /*
2657: * SAX: PI detected.
2658: */
2659: if ((ctxt->sax) &&
2660: (ctxt->sax->processingInstruction != NULL))
1.74 daniel 2661: ctxt->sax->processingInstruction(ctxt->userData, target, data);
1.72 daniel 2662: free(data);
1.22 daniel 2663: }
1.39 daniel 2664: free(target);
1.3 veillard 2665: } else {
1.55 daniel 2666: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2667: ctxt->sax->error(ctxt->userData, "xmlParsePI : no target name\n");
1.59 daniel 2668: ctxt->wellFormed = 0;
2669:
1.22 daniel 2670: /********* Should we try to complete parsing the PI ???
1.40 daniel 2671: while (IS_CHAR(CUR) &&
2672: (CUR != '?') && (CUR != '>'))
2673: NEXT;
2674: if (!IS_CHAR(CUR)) {
1.22 daniel 2675: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
2676: target);
2677: }
2678: ********************************************************/
2679: }
2680: }
2681: }
2682:
1.50 daniel 2683: /**
2684: * xmlParseNotationDecl:
2685: * @ctxt: an XML parser context
2686: *
2687: * parse a notation declaration
1.22 daniel 2688: *
2689: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2690: *
2691: * Hence there is actually 3 choices:
2692: * 'PUBLIC' S PubidLiteral
2693: * 'PUBLIC' S PubidLiteral S SystemLiteral
2694: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2695: *
1.67 daniel 2696: * See the NOTE on xmlParseExternalID().
1.22 daniel 2697: */
2698:
1.55 daniel 2699: void
2700: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2701: CHAR *name;
1.67 daniel 2702: CHAR *Pubid;
2703: CHAR *Systemid;
1.22 daniel 2704:
1.40 daniel 2705: if ((CUR == '<') && (NXT(1) == '!') &&
2706: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2707: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2708: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2709: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 2710: SHRINK;
1.40 daniel 2711: SKIP(10);
1.67 daniel 2712: if (!IS_BLANK(CUR)) {
2713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2714: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 2715: ctxt->wellFormed = 0;
2716: return;
2717: }
2718: SKIP_BLANKS;
1.22 daniel 2719:
2720: name = xmlParseName(ctxt);
2721: if (name == NULL) {
1.55 daniel 2722: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2723: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 2724: ctxt->wellFormed = 0;
2725: return;
2726: }
2727: if (!IS_BLANK(CUR)) {
2728: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2729: ctxt->sax->error(ctxt->userData,
1.67 daniel 2730: "Space required after the NOTATION name'\n");
1.59 daniel 2731: ctxt->wellFormed = 0;
1.22 daniel 2732: return;
2733: }
1.42 daniel 2734: SKIP_BLANKS;
1.67 daniel 2735:
1.22 daniel 2736: /*
1.67 daniel 2737: * Parse the IDs.
1.22 daniel 2738: */
1.67 daniel 2739: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2740: SKIP_BLANKS;
2741:
2742: if (CUR == '>') {
1.40 daniel 2743: NEXT;
1.72 daniel 2744: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 2745: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2746: } else {
2747: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2748: ctxt->sax->error(ctxt->userData,
1.67 daniel 2749: "'>' required to close NOTATION declaration\n");
2750: ctxt->wellFormed = 0;
2751: }
1.22 daniel 2752: free(name);
1.67 daniel 2753: if (Systemid != NULL) free(Systemid);
2754: if (Pubid != NULL) free(Pubid);
1.22 daniel 2755: }
2756: }
2757:
1.50 daniel 2758: /**
2759: * xmlParseEntityDecl:
2760: * @ctxt: an XML parser context
2761: *
2762: * parse <!ENTITY declarations
1.22 daniel 2763: *
2764: * [70] EntityDecl ::= GEDecl | PEDecl
2765: *
2766: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2767: *
2768: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2769: *
2770: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2771: *
2772: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2773: *
2774: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 2775: */
2776:
1.55 daniel 2777: void
2778: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2779: CHAR *name = NULL;
1.24 daniel 2780: CHAR *value = NULL;
1.39 daniel 2781: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2782: CHAR *ndata = NULL;
1.39 daniel 2783: int isParameter = 0;
1.78 daniel 2784: CHAR *orig = NULL;
1.22 daniel 2785:
1.94 daniel 2786: GROW;
1.40 daniel 2787: if ((CUR == '<') && (NXT(1) == '!') &&
2788: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2789: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2790: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 2791: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 2792: SHRINK;
1.40 daniel 2793: SKIP(8);
1.59 daniel 2794: if (!IS_BLANK(CUR)) {
2795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2796: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 2797: ctxt->wellFormed = 0;
2798: }
2799: SKIP_BLANKS;
1.40 daniel 2800:
2801: if (CUR == '%') {
2802: NEXT;
1.59 daniel 2803: if (!IS_BLANK(CUR)) {
2804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2805: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 2806: ctxt->wellFormed = 0;
2807: }
1.42 daniel 2808: SKIP_BLANKS;
1.39 daniel 2809: isParameter = 1;
1.22 daniel 2810: }
2811:
2812: name = xmlParseName(ctxt);
1.24 daniel 2813: if (name == NULL) {
1.55 daniel 2814: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2815: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 2816: ctxt->wellFormed = 0;
1.24 daniel 2817: return;
2818: }
1.59 daniel 2819: if (!IS_BLANK(CUR)) {
2820: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2821: ctxt->sax->error(ctxt->userData,
1.59 daniel 2822: "Space required after the entity name\n");
2823: ctxt->wellFormed = 0;
2824: }
1.42 daniel 2825: SKIP_BLANKS;
1.24 daniel 2826:
1.22 daniel 2827: /*
1.68 daniel 2828: * handle the various case of definitions...
1.22 daniel 2829: */
1.39 daniel 2830: if (isParameter) {
1.40 daniel 2831: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 2832: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 2833: if (value) {
1.72 daniel 2834: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2835: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2836: XML_INTERNAL_PARAMETER_ENTITY,
2837: NULL, NULL, value);
2838: }
1.24 daniel 2839: else {
1.67 daniel 2840: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 2841: if (URI) {
1.72 daniel 2842: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2843: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2844: XML_EXTERNAL_PARAMETER_ENTITY,
2845: literal, URI, NULL);
2846: }
1.24 daniel 2847: }
2848: } else {
1.40 daniel 2849: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 2850: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 2851: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2852: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2853: XML_INTERNAL_GENERAL_ENTITY,
2854: NULL, NULL, value);
2855: } else {
1.67 daniel 2856: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 2857: if ((CUR != '>') && (!IS_BLANK(CUR))) {
2858: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2859: ctxt->sax->error(ctxt->userData,
1.59 daniel 2860: "Space required before 'NDATA'\n");
2861: ctxt->wellFormed = 0;
2862: }
1.42 daniel 2863: SKIP_BLANKS;
1.40 daniel 2864: if ((CUR == 'N') && (NXT(1) == 'D') &&
2865: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2866: (NXT(4) == 'A')) {
2867: SKIP(5);
1.59 daniel 2868: if (!IS_BLANK(CUR)) {
2869: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2870: ctxt->sax->error(ctxt->userData,
1.59 daniel 2871: "Space required after 'NDATA'\n");
2872: ctxt->wellFormed = 0;
2873: }
1.42 daniel 2874: SKIP_BLANKS;
1.24 daniel 2875: ndata = xmlParseName(ctxt);
1.72 daniel 2876: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2877: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2878: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2879: literal, URI, ndata);
2880: } else {
1.72 daniel 2881: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2882: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2883: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2884: literal, URI, NULL);
1.24 daniel 2885: }
2886: }
2887: }
1.42 daniel 2888: SKIP_BLANKS;
1.40 daniel 2889: if (CUR != '>') {
1.55 daniel 2890: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2891: ctxt->sax->error(ctxt->userData,
1.31 daniel 2892: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 2893: ctxt->wellFormed = 0;
1.24 daniel 2894: } else
1.40 daniel 2895: NEXT;
1.78 daniel 2896: if (orig != NULL) {
2897: /*
2898: * TODO: somwhat unclean, extending the SAx API would be better !
2899: */
2900: xmlEntityPtr cur = NULL;
2901:
2902: if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
2903: cur = ctxt->sax->getEntity(ctxt, name);
2904: if (cur != NULL)
2905: cur->orig = orig;
2906: else
2907: free(orig);
2908: }
1.39 daniel 2909: if (name != NULL) free(name);
2910: if (value != NULL) free(value);
2911: if (URI != NULL) free(URI);
2912: if (literal != NULL) free(literal);
2913: if (ndata != NULL) free(ndata);
1.22 daniel 2914: }
2915: }
2916:
1.50 daniel 2917: /**
1.59 daniel 2918: * xmlParseDefaultDecl:
2919: * @ctxt: an XML parser context
2920: * @value: Receive a possible fixed default value for the attribute
2921: *
2922: * Parse an attribute default declaration
2923: *
2924: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2925: *
2926: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
2927: * or XML_ATTRIBUTE_FIXED.
2928: */
2929:
2930: int
2931: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
2932: int val;
2933: CHAR *ret;
2934:
2935: *value = NULL;
2936: if ((CUR == '#') && (NXT(1) == 'R') &&
2937: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
2938: (NXT(4) == 'U') && (NXT(5) == 'I') &&
2939: (NXT(6) == 'R') && (NXT(7) == 'E') &&
2940: (NXT(8) == 'D')) {
2941: SKIP(9);
2942: return(XML_ATTRIBUTE_REQUIRED);
2943: }
2944: if ((CUR == '#') && (NXT(1) == 'I') &&
2945: (NXT(2) == 'M') && (NXT(3) == 'P') &&
2946: (NXT(4) == 'L') && (NXT(5) == 'I') &&
2947: (NXT(6) == 'E') && (NXT(7) == 'D')) {
2948: SKIP(8);
2949: return(XML_ATTRIBUTE_IMPLIED);
2950: }
2951: val = XML_ATTRIBUTE_NONE;
2952: if ((CUR == '#') && (NXT(1) == 'F') &&
2953: (NXT(2) == 'I') && (NXT(3) == 'X') &&
2954: (NXT(4) == 'E') && (NXT(5) == 'D')) {
2955: SKIP(6);
2956: val = XML_ATTRIBUTE_FIXED;
2957: if (!IS_BLANK(CUR)) {
2958: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2959: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 2960: ctxt->wellFormed = 0;
2961: }
2962: SKIP_BLANKS;
2963: }
2964: ret = xmlParseAttValue(ctxt);
1.96 daniel 2965: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 2966: if (ret == NULL) {
2967: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2968: ctxt->sax->error(ctxt->userData,
1.59 daniel 2969: "Attribute default value declaration error\n");
2970: ctxt->wellFormed = 0;
2971: } else
2972: *value = ret;
2973: return(val);
2974: }
2975:
2976: /**
1.66 daniel 2977: * xmlParseNotationType:
2978: * @ctxt: an XML parser context
2979: *
2980: * parse an Notation attribute type.
2981: *
2982: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2983: *
2984: * Note: the leading 'NOTATION' S part has already being parsed...
2985: *
2986: * Returns: the notation attribute tree built while parsing
2987: */
2988:
2989: xmlEnumerationPtr
2990: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
2991: CHAR *name;
2992: xmlEnumerationPtr ret = NULL, last = NULL, cur;
2993:
2994: if (CUR != '(') {
2995: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2996: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 2997: ctxt->wellFormed = 0;
2998: return(NULL);
2999: }
1.91 daniel 3000: SHRINK;
1.66 daniel 3001: do {
3002: NEXT;
3003: SKIP_BLANKS;
3004: name = xmlParseName(ctxt);
3005: if (name == NULL) {
3006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3007: ctxt->sax->error(ctxt->userData,
1.66 daniel 3008: "Name expected in NOTATION declaration\n");
3009: ctxt->wellFormed = 0;
3010: return(ret);
3011: }
3012: cur = xmlCreateEnumeration(name);
1.67 daniel 3013: free(name);
1.66 daniel 3014: if (cur == NULL) return(ret);
3015: if (last == NULL) ret = last = cur;
3016: else {
3017: last->next = cur;
3018: last = cur;
3019: }
3020: SKIP_BLANKS;
3021: } while (CUR == '|');
3022: if (CUR != ')') {
3023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3024: ctxt->sax->error(ctxt->userData,
1.66 daniel 3025: "')' required to finish NOTATION declaration\n");
3026: ctxt->wellFormed = 0;
3027: return(ret);
3028: }
3029: NEXT;
3030: return(ret);
3031: }
3032:
3033: /**
3034: * xmlParseEnumerationType:
3035: * @ctxt: an XML parser context
3036: *
3037: * parse an Enumeration attribute type.
3038: *
3039: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3040: *
3041: * Returns: the enumeration attribute tree built while parsing
3042: */
3043:
3044: xmlEnumerationPtr
3045: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3046: CHAR *name;
3047: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3048:
3049: if (CUR != '(') {
3050: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3051: ctxt->sax->error(ctxt->userData,
1.66 daniel 3052: "'(' required to start ATTLIST enumeration\n");
3053: ctxt->wellFormed = 0;
3054: return(NULL);
3055: }
1.91 daniel 3056: SHRINK;
1.66 daniel 3057: do {
3058: NEXT;
3059: SKIP_BLANKS;
3060: name = xmlParseNmtoken(ctxt);
3061: if (name == NULL) {
3062: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3063: ctxt->sax->error(ctxt->userData,
1.66 daniel 3064: "NmToken expected in ATTLIST enumeration\n");
3065: ctxt->wellFormed = 0;
3066: return(ret);
3067: }
3068: cur = xmlCreateEnumeration(name);
1.67 daniel 3069: free(name);
1.66 daniel 3070: if (cur == NULL) return(ret);
3071: if (last == NULL) ret = last = cur;
3072: else {
3073: last->next = cur;
3074: last = cur;
3075: }
3076: SKIP_BLANKS;
3077: } while (CUR == '|');
3078: if (CUR != ')') {
3079: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3080: ctxt->sax->error(ctxt->userData,
1.66 daniel 3081: "')' required to finish ATTLIST enumeration\n");
3082: ctxt->wellFormed = 0;
3083: return(ret);
3084: }
3085: NEXT;
3086: return(ret);
3087: }
3088:
3089: /**
1.50 daniel 3090: * xmlParseEnumeratedType:
3091: * @ctxt: an XML parser context
1.66 daniel 3092: * @tree: the enumeration tree built while parsing
1.50 daniel 3093: *
1.66 daniel 3094: * parse an Enumerated attribute type.
1.22 daniel 3095: *
3096: * [57] EnumeratedType ::= NotationType | Enumeration
3097: *
3098: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3099: *
1.50 daniel 3100: *
1.66 daniel 3101: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3102: */
3103:
1.66 daniel 3104: int
3105: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3106: if ((CUR == 'N') && (NXT(1) == 'O') &&
3107: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3108: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3109: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3110: SKIP(8);
3111: if (!IS_BLANK(CUR)) {
3112: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3113: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 3114: ctxt->wellFormed = 0;
3115: return(0);
3116: }
3117: SKIP_BLANKS;
3118: *tree = xmlParseNotationType(ctxt);
3119: if (*tree == NULL) return(0);
3120: return(XML_ATTRIBUTE_NOTATION);
3121: }
3122: *tree = xmlParseEnumerationType(ctxt);
3123: if (*tree == NULL) return(0);
3124: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3125: }
3126:
1.50 daniel 3127: /**
3128: * xmlParseAttributeType:
3129: * @ctxt: an XML parser context
1.66 daniel 3130: * @tree: the enumeration tree built while parsing
1.50 daniel 3131: *
1.59 daniel 3132: * parse the Attribute list def for an element
1.22 daniel 3133: *
3134: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3135: *
3136: * [55] StringType ::= 'CDATA'
3137: *
3138: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3139: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3140: *
1.69 daniel 3141: * Returns the attribute type
1.22 daniel 3142: */
1.59 daniel 3143: int
1.66 daniel 3144: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3145: SHRINK;
1.40 daniel 3146: if ((CUR == 'C') && (NXT(1) == 'D') &&
3147: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3148: (NXT(4) == 'A')) {
3149: SKIP(5);
1.66 daniel 3150: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3151: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3152: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 ! daniel 3153: (NXT(4) == 'F') && (NXT(5) == 'S')) {
! 3154: SKIP(6);
! 3155: return(XML_ATTRIBUTE_IDREFS);
! 3156: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
! 3157: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3158: (NXT(4) == 'F')) {
3159: SKIP(5);
1.59 daniel 3160: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3161: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3162: SKIP(2);
3163: return(XML_ATTRIBUTE_ID);
1.40 daniel 3164: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3165: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3166: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3167: SKIP(6);
1.59 daniel 3168: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3169: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3170: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3171: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3172: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3173: SKIP(8);
1.59 daniel 3174: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3175: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3176: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3177: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3178: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3179: SKIP(8);
3180: return(XML_ATTRIBUTE_NMTOKENS);
3181: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3182: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3183: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3184: (NXT(6) == 'N')) {
3185: SKIP(7);
1.59 daniel 3186: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3187: }
1.66 daniel 3188: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3189: }
3190:
1.50 daniel 3191: /**
3192: * xmlParseAttributeListDecl:
3193: * @ctxt: an XML parser context
3194: *
3195: * : parse the Attribute list def for an element
1.22 daniel 3196: *
3197: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3198: *
3199: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3200: *
1.22 daniel 3201: */
1.55 daniel 3202: void
3203: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 3204: CHAR *elemName;
3205: CHAR *attrName;
1.66 daniel 3206: xmlEnumerationPtr tree = NULL;
1.22 daniel 3207:
1.40 daniel 3208: if ((CUR == '<') && (NXT(1) == '!') &&
3209: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3210: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3211: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3212: (NXT(8) == 'T')) {
1.40 daniel 3213: SKIP(9);
1.59 daniel 3214: if (!IS_BLANK(CUR)) {
3215: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3216: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 3217: ctxt->wellFormed = 0;
3218: }
1.42 daniel 3219: SKIP_BLANKS;
1.59 daniel 3220: elemName = xmlParseName(ctxt);
3221: if (elemName == NULL) {
1.55 daniel 3222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3223: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 3224: ctxt->wellFormed = 0;
1.22 daniel 3225: return;
3226: }
1.42 daniel 3227: SKIP_BLANKS;
1.40 daniel 3228: while (CUR != '>') {
3229: const CHAR *check = CUR_PTR;
1.59 daniel 3230: int type;
3231: int def;
3232: CHAR *defaultValue = NULL;
3233:
3234: attrName = xmlParseName(ctxt);
3235: if (attrName == NULL) {
3236: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3237: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 3238: ctxt->wellFormed = 0;
3239: break;
3240: }
1.97 ! daniel 3241: GROW;
1.59 daniel 3242: if (!IS_BLANK(CUR)) {
3243: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3244: ctxt->sax->error(ctxt->userData,
1.59 daniel 3245: "Space required after the attribute name\n");
3246: ctxt->wellFormed = 0;
3247: break;
3248: }
3249: SKIP_BLANKS;
3250:
1.66 daniel 3251: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3252: if (type <= 0) break;
1.22 daniel 3253:
1.97 ! daniel 3254: GROW;
1.59 daniel 3255: if (!IS_BLANK(CUR)) {
3256: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3257: ctxt->sax->error(ctxt->userData,
1.59 daniel 3258: "Space required after the attribute type\n");
3259: ctxt->wellFormed = 0;
3260: break;
3261: }
1.42 daniel 3262: SKIP_BLANKS;
1.59 daniel 3263:
3264: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3265: if (def <= 0) break;
3266:
1.97 ! daniel 3267: GROW;
1.59 daniel 3268: if (CUR != '>') {
3269: if (!IS_BLANK(CUR)) {
3270: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3271: ctxt->sax->error(ctxt->userData,
1.59 daniel 3272: "Space required after the attribute default value\n");
3273: ctxt->wellFormed = 0;
3274: break;
3275: }
3276: SKIP_BLANKS;
3277: }
1.40 daniel 3278: if (check == CUR_PTR) {
1.55 daniel 3279: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3280: ctxt->sax->error(ctxt->userData,
1.59 daniel 3281: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 3282: break;
3283: }
1.72 daniel 3284: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3285: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3286: type, def, defaultValue, tree);
1.59 daniel 3287: if (attrName != NULL)
3288: free(attrName);
3289: if (defaultValue != NULL)
3290: free(defaultValue);
1.97 ! daniel 3291: GROW;
1.22 daniel 3292: }
1.40 daniel 3293: if (CUR == '>')
3294: NEXT;
1.22 daniel 3295:
1.59 daniel 3296: free(elemName);
1.22 daniel 3297: }
3298: }
3299:
1.50 daniel 3300: /**
1.61 daniel 3301: * xmlParseElementMixedContentDecl:
3302: * @ctxt: an XML parser context
3303: *
3304: * parse the declaration for a Mixed Element content
3305: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3306: *
3307: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3308: * '(' S? '#PCDATA' S? ')'
3309: *
3310: * returns: the list of the xmlElementContentPtr describing the element choices
3311: */
3312: xmlElementContentPtr
1.62 daniel 3313: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3314: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 3315: CHAR *elem = NULL;
3316:
1.97 ! daniel 3317: GROW;
1.61 daniel 3318: if ((CUR == '#') && (NXT(1) == 'P') &&
3319: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3320: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3321: (NXT(6) == 'A')) {
3322: SKIP(7);
3323: SKIP_BLANKS;
1.91 daniel 3324: SHRINK;
1.63 daniel 3325: if (CUR == ')') {
3326: NEXT;
3327: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3328: return(ret);
3329: }
1.61 daniel 3330: if ((CUR == '(') || (CUR == '|')) {
3331: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3332: if (ret == NULL) return(NULL);
1.63 daniel 3333: } /********** else {
1.61 daniel 3334: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3335: ctxt->sax->error(ctxt->userData,
1.61 daniel 3336: "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
3337: ctxt->wellFormed = 0;
3338: return(NULL);
1.63 daniel 3339: } **********/
1.61 daniel 3340: while (CUR == '|') {
1.64 daniel 3341: NEXT;
1.61 daniel 3342: if (elem == NULL) {
3343: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3344: if (ret == NULL) return(NULL);
3345: ret->c1 = cur;
1.64 daniel 3346: cur = ret;
1.61 daniel 3347: } else {
1.64 daniel 3348: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3349: if (n == NULL) return(NULL);
3350: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3351: cur->c2 = n;
3352: cur = n;
1.66 daniel 3353: free(elem);
1.61 daniel 3354: }
3355: SKIP_BLANKS;
3356: elem = xmlParseName(ctxt);
3357: if (elem == NULL) {
3358: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3359: ctxt->sax->error(ctxt->userData,
1.61 daniel 3360: "xmlParseElementMixedContentDecl : Name expected\n");
3361: ctxt->wellFormed = 0;
3362: xmlFreeElementContent(cur);
3363: return(NULL);
3364: }
3365: SKIP_BLANKS;
1.97 ! daniel 3366: GROW;
1.61 daniel 3367: }
1.63 daniel 3368: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3369: if (elem != NULL) {
1.61 daniel 3370: cur->c2 = xmlNewElementContent(elem,
3371: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3372: free(elem);
3373: }
1.65 daniel 3374: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3375: SKIP(2);
1.61 daniel 3376: } else {
1.66 daniel 3377: if (elem != NULL) free(elem);
1.61 daniel 3378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3379: ctxt->sax->error(ctxt->userData,
1.63 daniel 3380: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3381: ctxt->wellFormed = 0;
3382: xmlFreeElementContent(ret);
3383: return(NULL);
3384: }
3385:
3386: } else {
3387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3388: ctxt->sax->error(ctxt->userData,
1.61 daniel 3389: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3390: ctxt->wellFormed = 0;
3391: }
3392: return(ret);
3393: }
3394:
3395: /**
3396: * xmlParseElementChildrenContentDecl:
1.50 daniel 3397: * @ctxt: an XML parser context
3398: *
1.61 daniel 3399: * parse the declaration for a Mixed Element content
3400: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3401: *
1.61 daniel 3402: *
1.22 daniel 3403: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3404: *
3405: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3406: *
3407: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3408: *
3409: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3410: *
1.62 daniel 3411: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3412: * hierarchy.
3413: */
3414: xmlElementContentPtr
1.62 daniel 3415: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3416: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 3417: CHAR *elem;
3418: CHAR type = 0;
3419:
1.94 daniel 3420: /* !!!!!!!!!!!!!!!! PE Refs can occur here !!!!!!!!!!! */
1.62 daniel 3421: SKIP_BLANKS;
1.94 daniel 3422: GROW;
1.62 daniel 3423: if (CUR == '(') {
1.63 daniel 3424: /* Recurse on first child */
1.62 daniel 3425: NEXT;
3426: SKIP_BLANKS;
3427: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3428: SKIP_BLANKS;
3429: } else {
3430: elem = xmlParseName(ctxt);
3431: if (elem == NULL) {
3432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3433: ctxt->sax->error(ctxt->userData,
1.62 daniel 3434: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3435: ctxt->wellFormed = 0;
3436: return(NULL);
3437: }
3438: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3439: if (CUR == '?') {
3440: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3441: NEXT;
3442: } else if (CUR == '*') {
3443: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3444: NEXT;
3445: } else if (CUR == '+') {
3446: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3447: NEXT;
3448: } else {
3449: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3450: }
1.66 daniel 3451: free(elem);
1.62 daniel 3452: }
3453: SKIP_BLANKS;
1.91 daniel 3454: SHRINK;
1.62 daniel 3455: while (CUR != ')') {
1.63 daniel 3456: /*
3457: * Each loop we parse one separator and one element.
3458: */
1.62 daniel 3459: if (CUR == ',') {
3460: if (type == 0) type = CUR;
3461:
3462: /*
3463: * Detect "Name | Name , Name" error
3464: */
3465: else if (type != CUR) {
3466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3467: ctxt->sax->error(ctxt->userData,
1.62 daniel 3468: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3469: type);
3470: ctxt->wellFormed = 0;
3471: xmlFreeElementContent(ret);
3472: return(NULL);
3473: }
1.64 daniel 3474: NEXT;
1.62 daniel 3475:
1.63 daniel 3476: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3477: if (op == NULL) {
3478: xmlFreeElementContent(ret);
3479: return(NULL);
3480: }
3481: if (last == NULL) {
3482: op->c1 = ret;
1.65 daniel 3483: ret = cur = op;
1.63 daniel 3484: } else {
3485: cur->c2 = op;
3486: op->c1 = last;
3487: cur =op;
1.65 daniel 3488: last = NULL;
1.63 daniel 3489: }
1.62 daniel 3490: } else if (CUR == '|') {
3491: if (type == 0) type = CUR;
3492:
3493: /*
1.63 daniel 3494: * Detect "Name , Name | Name" error
1.62 daniel 3495: */
3496: else if (type != CUR) {
3497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3498: ctxt->sax->error(ctxt->userData,
1.62 daniel 3499: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3500: type);
3501: ctxt->wellFormed = 0;
3502: xmlFreeElementContent(ret);
3503: return(NULL);
3504: }
1.64 daniel 3505: NEXT;
1.62 daniel 3506:
1.63 daniel 3507: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3508: if (op == NULL) {
3509: xmlFreeElementContent(ret);
3510: return(NULL);
3511: }
3512: if (last == NULL) {
3513: op->c1 = ret;
1.65 daniel 3514: ret = cur = op;
1.63 daniel 3515: } else {
3516: cur->c2 = op;
3517: op->c1 = last;
3518: cur =op;
1.65 daniel 3519: last = NULL;
1.63 daniel 3520: }
1.62 daniel 3521: } else {
3522: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3523: ctxt->sax->error(ctxt->userData,
1.62 daniel 3524: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3525: ctxt->wellFormed = 0;
3526: xmlFreeElementContent(ret);
3527: return(NULL);
3528: }
3529: SKIP_BLANKS;
3530: if (CUR == '(') {
1.63 daniel 3531: /* Recurse on second child */
1.62 daniel 3532: NEXT;
3533: SKIP_BLANKS;
1.65 daniel 3534: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3535: SKIP_BLANKS;
3536: } else {
3537: elem = xmlParseName(ctxt);
3538: if (elem == NULL) {
3539: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3540: ctxt->sax->error(ctxt->userData,
1.62 daniel 3541: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3542: ctxt->wellFormed = 0;
3543: return(NULL);
3544: }
1.65 daniel 3545: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3546: free(elem);
1.62 daniel 3547: }
1.63 daniel 3548: if (CUR == '?') {
3549: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3550: NEXT;
3551: } else if (CUR == '*') {
3552: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3553: NEXT;
3554: } else if (CUR == '+') {
3555: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3556: NEXT;
3557: } else {
3558: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3559: }
3560: SKIP_BLANKS;
1.97 ! daniel 3561: GROW;
1.64 daniel 3562: }
1.65 daniel 3563: if ((cur != NULL) && (last != NULL)) {
3564: cur->c2 = last;
1.62 daniel 3565: }
3566: NEXT;
3567: if (CUR == '?') {
3568: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3569: NEXT;
3570: } else if (CUR == '*') {
3571: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3572: NEXT;
3573: } else if (CUR == '+') {
3574: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3575: NEXT;
3576: } else {
3577: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
3578: }
3579: return(ret);
1.61 daniel 3580: }
3581:
3582: /**
3583: * xmlParseElementContentDecl:
3584: * @ctxt: an XML parser context
3585: * @name: the name of the element being defined.
3586: * @result: the Element Content pointer will be stored here if any
1.22 daniel 3587: *
1.61 daniel 3588: * parse the declaration for an Element content either Mixed or Children,
3589: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3590: *
3591: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 3592: *
1.61 daniel 3593: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 3594: */
3595:
1.61 daniel 3596: int
3597: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3598: xmlElementContentPtr *result) {
3599:
3600: xmlElementContentPtr tree = NULL;
3601: int res;
3602:
3603: *result = NULL;
3604:
3605: if (CUR != '(') {
3606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3607: ctxt->sax->error(ctxt->userData,
1.61 daniel 3608: "xmlParseElementContentDecl : '(' expected\n");
3609: ctxt->wellFormed = 0;
3610: return(-1);
3611: }
3612: NEXT;
1.97 ! daniel 3613: GROW;
1.61 daniel 3614: SKIP_BLANKS;
3615: if ((CUR == '#') && (NXT(1) == 'P') &&
3616: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3617: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3618: (NXT(6) == 'A')) {
1.62 daniel 3619: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 3620: res = XML_ELEMENT_TYPE_MIXED;
3621: } else {
1.62 daniel 3622: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 3623: res = XML_ELEMENT_TYPE_ELEMENT;
3624: }
3625: SKIP_BLANKS;
1.63 daniel 3626: /****************************
1.61 daniel 3627: if (CUR != ')') {
3628: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3629: ctxt->sax->error(ctxt->userData,
1.61 daniel 3630: "xmlParseElementContentDecl : ')' expected\n");
3631: ctxt->wellFormed = 0;
3632: return(-1);
3633: }
1.63 daniel 3634: ****************************/
3635: *result = tree;
1.61 daniel 3636: return(res);
1.22 daniel 3637: }
3638:
1.50 daniel 3639: /**
3640: * xmlParseElementDecl:
3641: * @ctxt: an XML parser context
3642: *
3643: * parse an Element declaration.
1.22 daniel 3644: *
3645: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3646: *
3647: * TODO There is a check [ VC: Unique Element Type Declaration ]
1.69 daniel 3648: *
3649: * Returns the type of the element, or -1 in case of error
1.22 daniel 3650: */
1.59 daniel 3651: int
1.55 daniel 3652: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3653: CHAR *name;
1.59 daniel 3654: int ret = -1;
1.61 daniel 3655: xmlElementContentPtr content = NULL;
1.22 daniel 3656:
1.97 ! daniel 3657: GROW;
1.40 daniel 3658: if ((CUR == '<') && (NXT(1) == '!') &&
3659: (NXT(2) == 'E') && (NXT(3) == 'L') &&
3660: (NXT(4) == 'E') && (NXT(5) == 'M') &&
3661: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 3662: (NXT(8) == 'T')) {
1.40 daniel 3663: SKIP(9);
1.59 daniel 3664: if (!IS_BLANK(CUR)) {
3665: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3666: ctxt->sax->error(ctxt->userData,
1.59 daniel 3667: "Space required after 'ELEMENT'\n");
3668: ctxt->wellFormed = 0;
3669: }
1.42 daniel 3670: SKIP_BLANKS;
1.22 daniel 3671: name = xmlParseName(ctxt);
3672: if (name == NULL) {
1.55 daniel 3673: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3674: ctxt->sax->error(ctxt->userData,
1.59 daniel 3675: "xmlParseElementDecl: no name for Element\n");
3676: ctxt->wellFormed = 0;
3677: return(-1);
3678: }
3679: if (!IS_BLANK(CUR)) {
3680: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3681: ctxt->sax->error(ctxt->userData,
1.59 daniel 3682: "Space required after the element name\n");
3683: ctxt->wellFormed = 0;
1.22 daniel 3684: }
1.42 daniel 3685: SKIP_BLANKS;
1.40 daniel 3686: if ((CUR == 'E') && (NXT(1) == 'M') &&
3687: (NXT(2) == 'P') && (NXT(3) == 'T') &&
3688: (NXT(4) == 'Y')) {
3689: SKIP(5);
1.22 daniel 3690: /*
3691: * Element must always be empty.
3692: */
1.59 daniel 3693: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 3694: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3695: (NXT(2) == 'Y')) {
3696: SKIP(3);
1.22 daniel 3697: /*
3698: * Element is a generic container.
3699: */
1.59 daniel 3700: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 3701: } else if (CUR == '(') {
3702: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 3703: } else {
1.61 daniel 3704: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3705: ctxt->sax->error(ctxt->userData,
1.61 daniel 3706: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
3707: ctxt->wellFormed = 0;
3708: if (name != NULL) free(name);
3709: return(-1);
1.22 daniel 3710: }
1.42 daniel 3711: SKIP_BLANKS;
1.40 daniel 3712: if (CUR != '>') {
1.55 daniel 3713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3714: ctxt->sax->error(ctxt->userData,
1.31 daniel 3715: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 3716: ctxt->wellFormed = 0;
1.61 daniel 3717: } else {
1.40 daniel 3718: NEXT;
1.72 daniel 3719: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 3720: ctxt->sax->elementDecl(ctxt->userData, name, ret,
3721: content);
1.61 daniel 3722: }
1.84 daniel 3723: if (content != NULL) {
3724: xmlFreeElementContent(content);
3725: }
1.61 daniel 3726: if (name != NULL) {
3727: free(name);
3728: }
1.22 daniel 3729: }
1.59 daniel 3730: return(ret);
1.22 daniel 3731: }
3732:
1.50 daniel 3733: /**
3734: * xmlParseMarkupDecl:
3735: * @ctxt: an XML parser context
3736: *
3737: * parse Markup declarations
1.22 daniel 3738: *
3739: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
3740: * NotationDecl | PI | Comment
3741: *
3742: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
3743: */
1.55 daniel 3744: void
3745: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 3746: GROW;
1.22 daniel 3747: xmlParseElementDecl(ctxt);
3748: xmlParseAttributeListDecl(ctxt);
3749: xmlParseEntityDecl(ctxt);
3750: xmlParseNotationDecl(ctxt);
3751: xmlParsePI(ctxt);
1.31 daniel 3752: xmlParseComment(ctxt, 0);
1.97 ! daniel 3753: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 3754: }
3755:
1.50 daniel 3756: /**
1.76 daniel 3757: * xmlParseTextDecl:
3758: * @ctxt: an XML parser context
3759: *
3760: * parse an XML declaration header for external entities
3761: *
3762: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
3763: *
3764: * Returns the only valuable info for an external parsed entity, the encoding
3765: */
3766:
3767: CHAR *
3768: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
3769: CHAR *version;
3770: CHAR *encoding = NULL;
3771:
3772: /*
3773: * We know that '<?xml' is here.
3774: */
3775: SKIP(5);
3776:
3777: if (!IS_BLANK(CUR)) {
3778: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3779: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
3780: ctxt->wellFormed = 0;
3781: }
3782: SKIP_BLANKS;
3783:
3784: /*
3785: * We may have the VersionInfo here.
3786: */
3787: version = xmlParseVersionInfo(ctxt);
3788: /* TODO: we should actually inherit from the referencing doc if absent
3789: if (version == NULL)
3790: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3791: ctxt->version = xmlStrdup(version);
3792: */
3793: if (version != NULL)
3794: free(version);
3795:
3796: /*
3797: * We must have the encoding declaration
3798: */
3799: if (!IS_BLANK(CUR)) {
3800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
3802: ctxt->wellFormed = 0;
3803: }
3804: encoding = xmlParseEncodingDecl(ctxt);
3805:
3806: SKIP_BLANKS;
3807: if ((CUR == '?') && (NXT(1) == '>')) {
3808: SKIP(2);
3809: } else if (CUR == '>') {
3810: /* Deprecated old WD ... */
3811: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3812: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
3813: ctxt->wellFormed = 0;
3814: NEXT;
3815: } else {
3816: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3817: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
3818: ctxt->wellFormed = 0;
3819: MOVETO_ENDTAG(CUR_PTR);
3820: NEXT;
3821: }
3822: return(encoding);
3823: }
3824:
3825: /*
3826: * xmlParseConditionalSections
3827: * @ctxt: an XML parser context
3828: *
3829: * TODO : Conditionnal section are not yet supported !
3830: *
3831: * [61] conditionalSect ::= includeSect | ignoreSect
3832: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
3833: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
3834: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
3835: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
3836: */
3837:
3838: void
3839: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
3840: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3841: ctxt->sax->warning(ctxt->userData,
3842: "XML conditional section not supported\n");
3843: /*
3844: * Skip up to the end of the conditionnal section.
3845: */
3846: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
3847: NEXT;
3848: if (CUR == 0) {
3849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3850: ctxt->sax->error(ctxt->userData,
3851: "XML conditional section not closed\n");
3852: ctxt->wellFormed = 0;
3853: }
3854: }
3855:
3856: /**
3857: * xmlParseExternalSubset
3858: * @ctxt: an XML parser context
3859: *
3860: * parse Markup declarations from an external subset
3861: *
3862: * [30] extSubset ::= textDecl? extSubsetDecl
3863: *
3864: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
3865: *
3866: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
3867: */
3868: void
1.79 daniel 3869: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
3870: const CHAR *SystemID) {
1.76 daniel 3871: if ((CUR == '<') && (NXT(1) == '?') &&
3872: (NXT(2) == 'x') && (NXT(3) == 'm') &&
3873: (NXT(4) == 'l')) {
3874: xmlParseTextDecl(ctxt);
3875: }
1.79 daniel 3876: if (ctxt->myDoc == NULL) {
3877: ctxt->myDoc = xmlNewDoc("1.0");
3878: }
3879: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
3880: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
3881:
1.96 daniel 3882: ctxt->instate = XML_PARSER_DTD;
1.76 daniel 3883: while (((CUR == '<') && (NXT(1) == '?')) ||
3884: ((CUR == '<') && (NXT(1) == '!')) ||
3885: IS_BLANK(CUR)) {
3886: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
3887: xmlParseConditionalSections(ctxt);
3888: } else if (IS_BLANK(CUR)) {
3889: NEXT;
3890: } else if (CUR == '%') {
3891: xmlParsePEReference(ctxt);
3892: } else
3893: xmlParseMarkupDecl(ctxt);
1.77 daniel 3894:
3895: /*
3896: * Pop-up of finished entities.
3897: */
3898: while ((CUR == 0) && (ctxt->inputNr > 1))
3899: xmlPopInput(ctxt);
3900:
1.76 daniel 3901: }
3902:
3903: if (CUR != 0) {
3904: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3905: ctxt->sax->error(ctxt->userData,
3906: "Extra content at the end of the document\n");
3907: ctxt->wellFormed = 0;
3908: }
3909:
3910: }
3911:
3912: /**
1.77 daniel 3913: * xmlParseReference:
3914: * @ctxt: an XML parser context
3915: *
3916: * parse and handle entity references in content, depending on the SAX
3917: * interface, this may end-up in a call to character() if this is a
1.79 daniel 3918: * CharRef, a predefined entity, if there is no reference() callback.
3919: * or if the parser was asked to switch to that mode.
1.77 daniel 3920: *
3921: * [67] Reference ::= EntityRef | CharRef
3922: */
3923: void
3924: xmlParseReference(xmlParserCtxtPtr ctxt) {
3925: xmlEntityPtr ent;
3926: CHAR *val;
3927: if (CUR != '&') return;
3928:
3929: if (NXT(1) == '#') {
3930: CHAR out[2];
3931: int val = xmlParseCharRef(ctxt);
3932: /* TODO: invalid for UTF-8 variable encoding !!! */
3933: out[0] = val;
3934: out[1] = 0;
3935: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3936: ctxt->sax->characters(ctxt->userData, out, 1);
3937: } else {
3938: ent = xmlParseEntityRef(ctxt);
3939: if (ent == NULL) return;
3940: if ((ent->name != NULL) &&
3941: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.79 daniel 3942: (ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
3943: (ctxt->replaceEntities == 0)) {
3944:
1.77 daniel 3945: /*
3946: * Create a node.
3947: */
3948: ctxt->sax->reference(ctxt->userData, ent->name);
3949: return;
3950: }
3951: val = ent->content;
3952: if (val == NULL) return;
3953: /*
3954: * inline the entity.
3955: */
3956: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3957: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
3958: }
1.24 daniel 3959: }
3960:
1.50 daniel 3961: /**
3962: * xmlParseEntityRef:
3963: * @ctxt: an XML parser context
3964: *
3965: * parse ENTITY references declarations
1.24 daniel 3966: *
3967: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 3968: *
1.77 daniel 3969: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 3970: */
1.77 daniel 3971: xmlEntityPtr
1.55 daniel 3972: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 3973: CHAR *name;
1.72 daniel 3974: xmlEntityPtr ent = NULL;
1.24 daniel 3975:
1.91 daniel 3976: GROW;
1.40 daniel 3977: if (CUR == '&') {
3978: NEXT;
1.24 daniel 3979: name = xmlParseName(ctxt);
3980: if (name == NULL) {
1.55 daniel 3981: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3982: ctxt->sax->error(ctxt->userData, "xmlParseEntityRef: no name\n");
1.59 daniel 3983: ctxt->wellFormed = 0;
1.24 daniel 3984: } else {
1.40 daniel 3985: if (CUR == ';') {
3986: NEXT;
1.24 daniel 3987: /*
1.77 daniel 3988: * Ask first SAX for entity resolution, otherwise try the
3989: * predefined set.
3990: */
3991: if (ctxt->sax != NULL) {
3992: if (ctxt->sax->getEntity != NULL)
3993: ent = ctxt->sax->getEntity(ctxt->userData, name);
3994: if (ent == NULL)
3995: ent = xmlGetPredefinedEntity(name);
3996: }
3997:
3998: /*
1.59 daniel 3999: * Well Formedness Constraint if:
4000: * - standalone
4001: * or
4002: * - no external subset and no external parameter entities
4003: * referenced
4004: * then
4005: * the entity referenced must have been declared
4006: *
1.72 daniel 4007: * TODO: to be double checked !!! This is wrong !
1.59 daniel 4008: */
1.77 daniel 4009: if (ent == NULL) {
4010: if (ctxt->sax != NULL) {
1.72 daniel 4011: if (((ctxt->sax->isStandalone != NULL) &&
1.77 daniel 4012: ctxt->sax->isStandalone(ctxt->userData) == 1) ||
1.72 daniel 4013: (((ctxt->sax->hasInternalSubset == NULL) ||
1.74 daniel 4014: ctxt->sax->hasInternalSubset(ctxt->userData) == 0) &&
1.72 daniel 4015: ((ctxt->sax->hasExternalSubset == NULL) ||
1.74 daniel 4016: ctxt->sax->hasExternalSubset(ctxt->userData) == 0))) {
1.77 daniel 4017: if (ctxt->sax->error != NULL)
4018: ctxt->sax->error(ctxt->userData,
4019: "Entity '%s' not defined\n", name);
4020: ctxt->wellFormed = 0;
4021: }
4022: } else {
4023: fprintf(stderr, "Entity '%s' not defined\n", name);
4024: ctxt->wellFormed = 0;
1.59 daniel 4025: }
1.77 daniel 4026: }
1.59 daniel 4027:
4028: /*
4029: * Well Formedness Constraint :
4030: * The referenced entity must be a parsed entity.
4031: */
4032: if (ent != NULL) {
4033: switch (ent->type) {
4034: case XML_INTERNAL_PARAMETER_ENTITY:
4035: case XML_EXTERNAL_PARAMETER_ENTITY:
4036: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4037: ctxt->sax->error(ctxt->userData,
1.59 daniel 4038: "Attempt to reference the parameter entity '%s'\n", name);
4039: ctxt->wellFormed = 0;
4040: break;
4041:
4042: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
4043: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4044: ctxt->sax->error(ctxt->userData,
1.59 daniel 4045: "Attempt to reference unparsed entity '%s'\n", name);
4046: ctxt->wellFormed = 0;
4047: break;
4048: }
4049: }
4050:
4051: /*
1.77 daniel 4052: * TODO: !!!
1.59 daniel 4053: * Well Formedness Constraint :
4054: * The referenced entity must not lead to recursion !
4055: */
4056:
1.77 daniel 4057:
1.24 daniel 4058: } else {
1.55 daniel 4059: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4060: ctxt->sax->error(ctxt->userData,
1.59 daniel 4061: "xmlParseEntityRef: expecting ';'\n");
4062: ctxt->wellFormed = 0;
1.24 daniel 4063: }
1.45 daniel 4064: free(name);
1.24 daniel 4065: }
4066: }
1.77 daniel 4067: return(ent);
1.24 daniel 4068: }
4069:
1.50 daniel 4070: /**
4071: * xmlParsePEReference:
4072: * @ctxt: an XML parser context
4073: *
4074: * parse PEReference declarations
1.77 daniel 4075: * The entity content is handled directly by pushing it's content as
4076: * a new input stream.
1.22 daniel 4077: *
4078: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4079: *
1.22 daniel 4080: */
1.77 daniel 4081: void
1.55 daniel 4082: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 4083: CHAR *name;
1.72 daniel 4084: xmlEntityPtr entity = NULL;
1.50 daniel 4085: xmlParserInputPtr input;
1.22 daniel 4086:
1.40 daniel 4087: if (CUR == '%') {
4088: NEXT;
1.22 daniel 4089: name = xmlParseName(ctxt);
4090: if (name == NULL) {
1.55 daniel 4091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4092: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 4093: ctxt->wellFormed = 0;
1.22 daniel 4094: } else {
1.40 daniel 4095: if (CUR == ';') {
4096: NEXT;
1.72 daniel 4097: if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
1.79 daniel 4098: entity = ctxt->sax->getEntity(ctxt->userData, name);
1.72 daniel 4099: /* TODO !!!! Must check that it's of the proper type !!! */
1.45 daniel 4100: if (entity == NULL) {
1.55 daniel 4101: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.74 daniel 4102: ctxt->sax->warning(ctxt->userData,
1.59 daniel 4103: "xmlParsePEReference: %%%s; not found\n", name);
1.50 daniel 4104: } else {
4105: input = xmlNewEntityInputStream(ctxt, entity);
4106: xmlPushInput(ctxt, input);
1.45 daniel 4107: }
1.22 daniel 4108: } else {
1.55 daniel 4109: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4110: ctxt->sax->error(ctxt->userData,
1.59 daniel 4111: "xmlParsePEReference: expecting ';'\n");
4112: ctxt->wellFormed = 0;
1.22 daniel 4113: }
1.45 daniel 4114: free(name);
1.3 veillard 4115: }
4116: }
4117: }
4118:
1.50 daniel 4119: /**
4120: * xmlParseDocTypeDecl :
4121: * @ctxt: an XML parser context
4122: *
4123: * parse a DOCTYPE declaration
1.21 daniel 4124: *
1.22 daniel 4125: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4126: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 4127: */
4128:
1.55 daniel 4129: void
4130: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 4131: CHAR *name;
4132: CHAR *ExternalID = NULL;
1.39 daniel 4133: CHAR *URI = NULL;
1.21 daniel 4134:
4135: /*
4136: * We know that '<!DOCTYPE' has been detected.
4137: */
1.40 daniel 4138: SKIP(9);
1.21 daniel 4139:
1.42 daniel 4140: SKIP_BLANKS;
1.21 daniel 4141:
4142: /*
4143: * Parse the DOCTYPE name.
4144: */
4145: name = xmlParseName(ctxt);
4146: if (name == NULL) {
1.55 daniel 4147: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4148: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4149: ctxt->wellFormed = 0;
1.21 daniel 4150: }
4151:
1.42 daniel 4152: SKIP_BLANKS;
1.21 daniel 4153:
4154: /*
1.22 daniel 4155: * Check for SystemID and ExternalID
4156: */
1.67 daniel 4157: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.42 daniel 4158: SKIP_BLANKS;
1.36 daniel 4159:
1.76 daniel 4160: /*
4161: * NOTE: the SAX callback may try to fetch the external subset
4162: * entity and fill it up !
4163: */
1.72 daniel 4164: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4165: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4166:
4167: /*
4168: * Is there any DTD definition ?
4169: */
1.40 daniel 4170: if (CUR == '[') {
1.96 daniel 4171: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4172: NEXT;
1.22 daniel 4173: /*
4174: * Parse the succession of Markup declarations and
4175: * PEReferences.
4176: * Subsequence (markupdecl | PEReference | S)*
4177: */
1.40 daniel 4178: while (CUR != ']') {
4179: const CHAR *check = CUR_PTR;
1.22 daniel 4180:
1.42 daniel 4181: SKIP_BLANKS;
1.22 daniel 4182: xmlParseMarkupDecl(ctxt);
1.50 daniel 4183: xmlParsePEReference(ctxt);
1.22 daniel 4184:
1.40 daniel 4185: if (CUR_PTR == check) {
1.55 daniel 4186: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4187: ctxt->sax->error(ctxt->userData,
1.31 daniel 4188: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4189: ctxt->wellFormed = 0;
1.22 daniel 4190: break;
4191: }
1.77 daniel 4192:
4193: /*
4194: * Pop-up of finished entities.
4195: */
4196: while ((CUR == 0) && (ctxt->inputNr > 1))
4197: xmlPopInput(ctxt);
4198:
1.22 daniel 4199: }
1.40 daniel 4200: if (CUR == ']') NEXT;
1.22 daniel 4201: }
4202:
4203: /*
4204: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4205: */
1.40 daniel 4206: if (CUR != '>') {
1.55 daniel 4207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4208: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4209: ctxt->wellFormed = 0;
1.22 daniel 4210: /* We shouldn't try to resynchronize ... */
1.21 daniel 4211: }
1.40 daniel 4212: NEXT;
1.22 daniel 4213:
4214: /*
4215: * Cleanup, since we don't use all those identifiers
4216: * TODO : the DOCTYPE if available should be stored !
4217: */
1.39 daniel 4218: if (URI != NULL) free(URI);
1.22 daniel 4219: if (ExternalID != NULL) free(ExternalID);
4220: if (name != NULL) free(name);
1.21 daniel 4221: }
4222:
1.50 daniel 4223: /**
4224: * xmlParseAttribute:
4225: * @ctxt: an XML parser context
1.72 daniel 4226: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 4227: *
4228: * parse an attribute
1.3 veillard 4229: *
1.22 daniel 4230: * [41] Attribute ::= Name Eq AttValue
4231: *
4232: * [25] Eq ::= S? '=' S?
4233: *
1.29 daniel 4234: * With namespace:
4235: *
4236: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4237: *
4238: * Also the case QName == xmlns:??? is handled independently as a namespace
4239: * definition.
1.69 daniel 4240: *
1.72 daniel 4241: * Returns the attribute name, and the value in *value.
1.3 veillard 4242: */
4243:
1.72 daniel 4244: CHAR *
4245: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 4246: CHAR *name, *val;
1.3 veillard 4247:
1.72 daniel 4248: *value = NULL;
4249: name = xmlParseName(ctxt);
1.22 daniel 4250: if (name == NULL) {
1.55 daniel 4251: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4252: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4253: ctxt->wellFormed = 0;
1.52 daniel 4254: return(NULL);
1.3 veillard 4255: }
4256:
4257: /*
1.29 daniel 4258: * read the value
1.3 veillard 4259: */
1.42 daniel 4260: SKIP_BLANKS;
1.40 daniel 4261: if (CUR == '=') {
4262: NEXT;
1.42 daniel 4263: SKIP_BLANKS;
1.72 daniel 4264: val = xmlParseAttValue(ctxt);
1.96 daniel 4265: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4266: } else {
1.55 daniel 4267: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4268: ctxt->sax->error(ctxt->userData,
1.59 daniel 4269: "Specification mandate value for attribute %s\n", name);
4270: ctxt->wellFormed = 0;
1.52 daniel 4271: return(NULL);
1.43 daniel 4272: }
4273:
1.72 daniel 4274: *value = val;
4275: return(name);
1.3 veillard 4276: }
4277:
1.50 daniel 4278: /**
4279: * xmlParseStartTag:
4280: * @ctxt: an XML parser context
4281: *
4282: * parse a start of tag either for rule element or
4283: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4284: *
4285: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4286: *
1.29 daniel 4287: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4288: *
4289: * With namespace:
4290: *
4291: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4292: *
4293: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4294: *
4295: * Returns the element name parsed
1.2 veillard 4296: */
4297:
1.83 daniel 4298: CHAR *
1.69 daniel 4299: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 4300: CHAR *name;
4301: CHAR *attname;
4302: CHAR *attvalue;
4303: const CHAR **atts = NULL;
4304: int nbatts = 0;
4305: int maxatts = 0;
4306: int i;
1.2 veillard 4307:
1.83 daniel 4308: if (CUR != '<') return(NULL);
1.40 daniel 4309: NEXT;
1.3 veillard 4310:
1.72 daniel 4311: name = xmlParseName(ctxt);
1.59 daniel 4312: if (name == NULL) {
4313: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4314: ctxt->sax->error(ctxt->userData,
1.59 daniel 4315: "xmlParseStartTag: invalid element name\n");
4316: ctxt->wellFormed = 0;
1.83 daniel 4317: return(NULL);
1.50 daniel 4318: }
4319:
4320: /*
1.3 veillard 4321: * Now parse the attributes, it ends up with the ending
4322: *
4323: * (S Attribute)* S?
4324: */
1.42 daniel 4325: SKIP_BLANKS;
1.91 daniel 4326: GROW;
1.40 daniel 4327: while ((IS_CHAR(CUR)) &&
4328: (CUR != '>') &&
4329: ((CUR != '/') || (NXT(1) != '>'))) {
4330: const CHAR *q = CUR_PTR;
1.91 daniel 4331: int cons = ctxt->input->consumed;
1.29 daniel 4332:
1.72 daniel 4333: attname = xmlParseAttribute(ctxt, &attvalue);
4334: if ((attname != NULL) && (attvalue != NULL)) {
4335: /*
4336: * Well formedness requires at most one declaration of an attribute
4337: */
4338: for (i = 0; i < nbatts;i += 2) {
4339: if (!xmlStrcmp(atts[i], attname)) {
4340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4341: ctxt->sax->error(ctxt->userData, "Attribute %s redefined\n",
1.72 daniel 4342: name);
4343: ctxt->wellFormed = 0;
4344: free(attname);
4345: free(attvalue);
4346: break;
4347: }
4348: }
4349:
4350: /*
4351: * Add the pair to atts
4352: */
4353: if (atts == NULL) {
4354: maxatts = 10;
4355: atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
4356: if (atts == NULL) {
1.86 daniel 4357: fprintf(stderr, "malloc of %ld byte failed\n",
4358: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4359: return(NULL);
1.72 daniel 4360: }
4361: } else if (nbatts + 2 < maxatts) {
4362: maxatts *= 2;
4363: atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
4364: if (atts == NULL) {
1.86 daniel 4365: fprintf(stderr, "realloc of %ld byte failed\n",
4366: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4367: return(NULL);
1.72 daniel 4368: }
4369: }
4370: atts[nbatts++] = attname;
4371: atts[nbatts++] = attvalue;
4372: atts[nbatts] = NULL;
4373: atts[nbatts + 1] = NULL;
4374: }
4375:
1.42 daniel 4376: SKIP_BLANKS;
1.91 daniel 4377: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 4378: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4379: ctxt->sax->error(ctxt->userData,
1.31 daniel 4380: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 4381: ctxt->wellFormed = 0;
1.29 daniel 4382: break;
1.3 veillard 4383: }
1.91 daniel 4384: GROW;
1.3 veillard 4385: }
4386:
1.43 daniel 4387: /*
1.72 daniel 4388: * SAX: Start of Element !
1.43 daniel 4389: */
1.72 daniel 4390: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 4391: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 4392:
1.72 daniel 4393: if (atts != NULL) {
4394: for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
4395: free(atts);
4396: }
1.83 daniel 4397: return(name);
1.3 veillard 4398: }
4399:
1.50 daniel 4400: /**
4401: * xmlParseEndTag:
4402: * @ctxt: an XML parser context
1.83 daniel 4403: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 4404: *
4405: * parse an end of tag
1.27 daniel 4406: *
4407: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 4408: *
4409: * With namespace
4410: *
1.72 daniel 4411: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 4412: */
4413:
1.55 daniel 4414: void
1.83 daniel 4415: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 4416: CHAR *name;
1.7 veillard 4417:
1.91 daniel 4418: GROW;
1.40 daniel 4419: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 4420: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4421: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 4422: ctxt->wellFormed = 0;
1.27 daniel 4423: return;
4424: }
1.40 daniel 4425: SKIP(2);
1.7 veillard 4426:
1.72 daniel 4427: name = xmlParseName(ctxt);
1.7 veillard 4428:
4429: /*
4430: * We should definitely be at the ending "S? '>'" part
4431: */
1.91 daniel 4432: GROW;
1.42 daniel 4433: SKIP_BLANKS;
1.40 daniel 4434: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 4435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4436: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 4437: ctxt->wellFormed = 0;
1.7 veillard 4438: } else
1.40 daniel 4439: NEXT;
1.7 veillard 4440:
1.72 daniel 4441: /*
1.83 daniel 4442: * Well formedness constraints, opening and closing must match.
4443: */
4444: if (xmlStrcmp(name, tagname)) {
4445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4446: ctxt->sax->error(ctxt->userData,
4447: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4448: ctxt->wellFormed = 0;
4449: }
4450:
4451: /*
1.72 daniel 4452: * SAX: End of Tag
4453: */
4454: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 4455: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 4456:
4457: if (name != NULL)
4458: free(name);
4459:
1.7 veillard 4460: return;
4461: }
4462:
1.50 daniel 4463: /**
4464: * xmlParseCDSect:
4465: * @ctxt: an XML parser context
4466: *
4467: * Parse escaped pure raw content.
1.29 daniel 4468: *
4469: * [18] CDSect ::= CDStart CData CDEnd
4470: *
4471: * [19] CDStart ::= '<![CDATA['
4472: *
4473: * [20] Data ::= (Char* - (Char* ']]>' Char*))
4474: *
4475: * [21] CDEnd ::= ']]>'
1.3 veillard 4476: */
1.55 daniel 4477: void
4478: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 4479: const CHAR *r, *s, *base;
1.3 veillard 4480:
1.40 daniel 4481: if ((CUR == '<') && (NXT(1) == '!') &&
4482: (NXT(2) == '[') && (NXT(3) == 'C') &&
4483: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4484: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4485: (NXT(8) == '[')) {
4486: SKIP(9);
1.29 daniel 4487: } else
1.45 daniel 4488: return;
1.40 daniel 4489: base = CUR_PTR;
4490: if (!IS_CHAR(CUR)) {
1.55 daniel 4491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4492: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4493: ctxt->wellFormed = 0;
1.45 daniel 4494: return;
1.3 veillard 4495: }
1.91 daniel 4496: r = CUR_PTR;
4497: NEXT;
1.40 daniel 4498: if (!IS_CHAR(CUR)) {
1.55 daniel 4499: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4500: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4501: ctxt->wellFormed = 0;
1.45 daniel 4502: return;
1.3 veillard 4503: }
1.91 daniel 4504: s = CUR_PTR;
4505: NEXT;
1.40 daniel 4506: while (IS_CHAR(CUR) &&
4507: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
4508: r++;s++;NEXT;
1.3 veillard 4509: }
1.40 daniel 4510: if (!IS_CHAR(CUR)) {
1.55 daniel 4511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4512: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4513: ctxt->wellFormed = 0;
1.45 daniel 4514: return;
1.3 veillard 4515: }
1.16 daniel 4516:
1.45 daniel 4517: /*
4518: * Ok the segment [base CUR_PTR] is to be consumed as chars.
4519: */
4520: if (ctxt->sax != NULL) {
1.72 daniel 4521: if (areBlanks(ctxt, base, CUR_PTR - base)) {
4522: if (ctxt->sax->ignorableWhitespace != NULL)
1.74 daniel 4523: ctxt->sax->ignorableWhitespace(ctxt->userData, base,
1.72 daniel 4524: (CUR_PTR - base) - 2);
4525: } else {
4526: if (ctxt->sax->characters != NULL)
1.74 daniel 4527: ctxt->sax->characters(ctxt->userData, base, (CUR_PTR - base) - 2);
1.72 daniel 4528: }
1.45 daniel 4529: }
1.2 veillard 4530: }
4531:
1.50 daniel 4532: /**
4533: * xmlParseContent:
4534: * @ctxt: an XML parser context
4535: *
4536: * Parse a content:
1.2 veillard 4537: *
1.27 daniel 4538: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 4539: */
4540:
1.55 daniel 4541: void
4542: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 ! daniel 4543: GROW;
1.40 daniel 4544: while ((CUR != '<') || (NXT(1) != '/')) {
4545: const CHAR *test = CUR_PTR;
1.91 daniel 4546: int cons = ctxt->input->consumed;
1.27 daniel 4547:
4548: /*
4549: * First case : a Processing Instruction.
4550: */
1.40 daniel 4551: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 4552: xmlParsePI(ctxt);
4553: }
1.72 daniel 4554:
1.27 daniel 4555: /*
4556: * Second case : a CDSection
4557: */
1.40 daniel 4558: else if ((CUR == '<') && (NXT(1) == '!') &&
4559: (NXT(2) == '[') && (NXT(3) == 'C') &&
4560: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4561: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4562: (NXT(8) == '[')) {
1.45 daniel 4563: xmlParseCDSect(ctxt);
1.27 daniel 4564: }
1.72 daniel 4565:
1.27 daniel 4566: /*
4567: * Third case : a comment
4568: */
1.40 daniel 4569: else if ((CUR == '<') && (NXT(1) == '!') &&
4570: (NXT(2) == '-') && (NXT(3) == '-')) {
1.72 daniel 4571: xmlParseComment(ctxt, 1);
1.97 ! daniel 4572: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 4573: }
1.72 daniel 4574:
1.27 daniel 4575: /*
4576: * Fourth case : a sub-element.
4577: */
1.40 daniel 4578: else if (CUR == '<') {
1.72 daniel 4579: xmlParseElement(ctxt);
1.45 daniel 4580: }
1.72 daniel 4581:
1.45 daniel 4582: /*
1.50 daniel 4583: * Fifth case : a reference. If if has not been resolved,
4584: * parsing returns it's Name, create the node
1.45 daniel 4585: */
1.97 ! daniel 4586:
1.45 daniel 4587: else if (CUR == '&') {
1.77 daniel 4588: xmlParseReference(ctxt);
1.27 daniel 4589: }
1.72 daniel 4590:
1.27 daniel 4591: /*
4592: * Last case, text. Note that References are handled directly.
4593: */
4594: else {
1.45 daniel 4595: xmlParseCharData(ctxt, 0);
1.3 veillard 4596: }
1.14 veillard 4597:
1.91 daniel 4598: GROW;
1.14 veillard 4599: /*
1.45 daniel 4600: * Pop-up of finished entities.
1.14 veillard 4601: */
1.69 daniel 4602: while ((CUR == 0) && (ctxt->inputNr > 1))
4603: xmlPopInput(ctxt);
1.45 daniel 4604:
1.91 daniel 4605: if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
1.55 daniel 4606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4607: ctxt->sax->error(ctxt->userData,
1.59 daniel 4608: "detected an error in element content\n");
4609: ctxt->wellFormed = 0;
1.29 daniel 4610: break;
4611: }
1.3 veillard 4612: }
1.2 veillard 4613: }
4614:
1.50 daniel 4615: /**
4616: * xmlParseElement:
4617: * @ctxt: an XML parser context
4618: *
4619: * parse an XML element, this is highly recursive
1.26 daniel 4620: *
4621: * [39] element ::= EmptyElemTag | STag content ETag
4622: *
4623: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 4624: */
1.26 daniel 4625:
1.72 daniel 4626: void
1.69 daniel 4627: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 4628: const CHAR *openTag = CUR_PTR;
1.83 daniel 4629: CHAR *name;
1.32 daniel 4630: xmlParserNodeInfo node_info;
1.2 veillard 4631:
1.32 daniel 4632: /* Capture start position */
1.40 daniel 4633: node_info.begin_pos = CUR_PTR - ctxt->input->base;
4634: node_info.begin_line = ctxt->input->line;
1.32 daniel 4635:
1.83 daniel 4636: name = xmlParseStartTag(ctxt);
4637: if (name == NULL) {
4638: return;
4639: }
1.2 veillard 4640:
4641: /*
4642: * Check for an Empty Element.
4643: */
1.40 daniel 4644: if ((CUR == '/') && (NXT(1) == '>')) {
4645: SKIP(2);
1.72 daniel 4646: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 4647: ctxt->sax->endElement(ctxt->userData, name);
4648: free(name);
1.72 daniel 4649: return;
1.2 veillard 4650: }
1.91 daniel 4651: if (CUR == '>') {
4652: NEXT;
4653: } else {
1.55 daniel 4654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4655: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 4656: openTag);
1.59 daniel 4657: ctxt->wellFormed = 0;
1.45 daniel 4658:
4659: /*
4660: * end of parsing of this node.
4661: */
4662: nodePop(ctxt);
1.83 daniel 4663: free(name);
1.72 daniel 4664: return;
1.2 veillard 4665: }
4666:
4667: /*
4668: * Parse the content of the element:
4669: */
1.45 daniel 4670: xmlParseContent(ctxt);
1.40 daniel 4671: if (!IS_CHAR(CUR)) {
1.55 daniel 4672: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4673: ctxt->sax->error(ctxt->userData,
1.57 daniel 4674: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 4675: ctxt->wellFormed = 0;
1.45 daniel 4676:
4677: /*
4678: * end of parsing of this node.
4679: */
4680: nodePop(ctxt);
1.83 daniel 4681: free(name);
1.72 daniel 4682: return;
1.2 veillard 4683: }
4684:
4685: /*
1.27 daniel 4686: * parse the end of tag: '</' should be here.
1.2 veillard 4687: */
1.83 daniel 4688: xmlParseEndTag(ctxt, name);
4689: free(name);
1.2 veillard 4690: }
4691:
1.50 daniel 4692: /**
4693: * xmlParseVersionNum:
4694: * @ctxt: an XML parser context
4695: *
4696: * parse the XML version value.
1.29 daniel 4697: *
4698: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 4699: *
4700: * Returns the string giving the XML version number, or NULL
1.29 daniel 4701: */
1.55 daniel 4702: CHAR *
4703: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 4704: const CHAR *q = CUR_PTR;
1.29 daniel 4705: CHAR *ret;
4706:
1.40 daniel 4707: while (IS_CHAR(CUR) &&
4708: (((CUR >= 'a') && (CUR <= 'z')) ||
4709: ((CUR >= 'A') && (CUR <= 'Z')) ||
4710: ((CUR >= '0') && (CUR <= '9')) ||
4711: (CUR == '_') || (CUR == '.') ||
4712: (CUR == ':') || (CUR == '-'))) NEXT;
4713: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 4714: return(ret);
4715: }
4716:
1.50 daniel 4717: /**
4718: * xmlParseVersionInfo:
4719: * @ctxt: an XML parser context
4720: *
4721: * parse the XML version.
1.29 daniel 4722: *
4723: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
4724: *
4725: * [25] Eq ::= S? '=' S?
1.50 daniel 4726: *
1.68 daniel 4727: * Returns the version string, e.g. "1.0"
1.29 daniel 4728: */
4729:
1.55 daniel 4730: CHAR *
4731: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 4732: CHAR *version = NULL;
4733: const CHAR *q;
4734:
1.40 daniel 4735: if ((CUR == 'v') && (NXT(1) == 'e') &&
4736: (NXT(2) == 'r') && (NXT(3) == 's') &&
4737: (NXT(4) == 'i') && (NXT(5) == 'o') &&
4738: (NXT(6) == 'n')) {
4739: SKIP(7);
1.42 daniel 4740: SKIP_BLANKS;
1.40 daniel 4741: if (CUR != '=') {
1.55 daniel 4742: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4743: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 4744: ctxt->wellFormed = 0;
1.31 daniel 4745: return(NULL);
4746: }
1.40 daniel 4747: NEXT;
1.42 daniel 4748: SKIP_BLANKS;
1.40 daniel 4749: if (CUR == '"') {
4750: NEXT;
4751: q = CUR_PTR;
1.29 daniel 4752: version = xmlParseVersionNum(ctxt);
1.55 daniel 4753: if (CUR != '"') {
4754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4755: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4756: ctxt->wellFormed = 0;
1.55 daniel 4757: } else
1.40 daniel 4758: NEXT;
4759: } else if (CUR == '\''){
4760: NEXT;
4761: q = CUR_PTR;
1.29 daniel 4762: version = xmlParseVersionNum(ctxt);
1.55 daniel 4763: if (CUR != '\'') {
4764: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4765: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4766: ctxt->wellFormed = 0;
1.55 daniel 4767: } else
1.40 daniel 4768: NEXT;
1.31 daniel 4769: } else {
1.55 daniel 4770: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4771: ctxt->sax->error(ctxt->userData,
1.59 daniel 4772: "xmlParseVersionInfo : expected ' or \"\n");
4773: ctxt->wellFormed = 0;
1.29 daniel 4774: }
4775: }
4776: return(version);
4777: }
4778:
1.50 daniel 4779: /**
4780: * xmlParseEncName:
4781: * @ctxt: an XML parser context
4782: *
4783: * parse the XML encoding name
1.29 daniel 4784: *
4785: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 4786: *
1.68 daniel 4787: * Returns the encoding name value or NULL
1.29 daniel 4788: */
1.55 daniel 4789: CHAR *
4790: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 4791: const CHAR *q = CUR_PTR;
1.29 daniel 4792: CHAR *ret = NULL;
4793:
1.40 daniel 4794: if (((CUR >= 'a') && (CUR <= 'z')) ||
4795: ((CUR >= 'A') && (CUR <= 'Z'))) {
4796: NEXT;
4797: while (IS_CHAR(CUR) &&
4798: (((CUR >= 'a') && (CUR <= 'z')) ||
4799: ((CUR >= 'A') && (CUR <= 'Z')) ||
4800: ((CUR >= '0') && (CUR <= '9')) ||
4801: (CUR == '-'))) NEXT;
4802: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 4803: } else {
1.55 daniel 4804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4805: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 4806: ctxt->wellFormed = 0;
1.29 daniel 4807: }
4808: return(ret);
4809: }
4810:
1.50 daniel 4811: /**
4812: * xmlParseEncodingDecl:
4813: * @ctxt: an XML parser context
4814: *
4815: * parse the XML encoding declaration
1.29 daniel 4816: *
4817: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 4818: *
4819: * TODO: this should setup the conversion filters.
4820: *
1.68 daniel 4821: * Returns the encoding value or NULL
1.29 daniel 4822: */
4823:
1.55 daniel 4824: CHAR *
4825: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4826: CHAR *encoding = NULL;
4827: const CHAR *q;
4828:
1.42 daniel 4829: SKIP_BLANKS;
1.40 daniel 4830: if ((CUR == 'e') && (NXT(1) == 'n') &&
4831: (NXT(2) == 'c') && (NXT(3) == 'o') &&
4832: (NXT(4) == 'd') && (NXT(5) == 'i') &&
4833: (NXT(6) == 'n') && (NXT(7) == 'g')) {
4834: SKIP(8);
1.42 daniel 4835: SKIP_BLANKS;
1.40 daniel 4836: if (CUR != '=') {
1.55 daniel 4837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4838: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 4839: ctxt->wellFormed = 0;
1.31 daniel 4840: return(NULL);
4841: }
1.40 daniel 4842: NEXT;
1.42 daniel 4843: SKIP_BLANKS;
1.40 daniel 4844: if (CUR == '"') {
4845: NEXT;
4846: q = CUR_PTR;
1.29 daniel 4847: encoding = xmlParseEncName(ctxt);
1.55 daniel 4848: if (CUR != '"') {
4849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4850: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4851: ctxt->wellFormed = 0;
1.55 daniel 4852: } else
1.40 daniel 4853: NEXT;
4854: } else if (CUR == '\''){
4855: NEXT;
4856: q = CUR_PTR;
1.29 daniel 4857: encoding = xmlParseEncName(ctxt);
1.55 daniel 4858: if (CUR != '\'') {
4859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4860: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4861: ctxt->wellFormed = 0;
1.55 daniel 4862: } else
1.40 daniel 4863: NEXT;
4864: } else if (CUR == '"'){
1.55 daniel 4865: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4866: ctxt->sax->error(ctxt->userData,
1.59 daniel 4867: "xmlParseEncodingDecl : expected ' or \"\n");
4868: ctxt->wellFormed = 0;
1.29 daniel 4869: }
4870: }
4871: return(encoding);
4872: }
4873:
1.50 daniel 4874: /**
4875: * xmlParseSDDecl:
4876: * @ctxt: an XML parser context
4877: *
4878: * parse the XML standalone declaration
1.29 daniel 4879: *
4880: * [32] SDDecl ::= S 'standalone' Eq
4881: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.68 daniel 4882: *
4883: * Returns 1 if standalone, 0 otherwise
1.29 daniel 4884: */
4885:
1.55 daniel 4886: int
4887: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4888: int standalone = -1;
4889:
1.42 daniel 4890: SKIP_BLANKS;
1.40 daniel 4891: if ((CUR == 's') && (NXT(1) == 't') &&
4892: (NXT(2) == 'a') && (NXT(3) == 'n') &&
4893: (NXT(4) == 'd') && (NXT(5) == 'a') &&
4894: (NXT(6) == 'l') && (NXT(7) == 'o') &&
4895: (NXT(8) == 'n') && (NXT(9) == 'e')) {
4896: SKIP(10);
1.81 daniel 4897: SKIP_BLANKS;
1.40 daniel 4898: if (CUR != '=') {
1.55 daniel 4899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4900: ctxt->sax->error(ctxt->userData,
1.59 daniel 4901: "XML standalone declaration : expected '='\n");
4902: ctxt->wellFormed = 0;
1.32 daniel 4903: return(standalone);
4904: }
1.40 daniel 4905: NEXT;
1.42 daniel 4906: SKIP_BLANKS;
1.40 daniel 4907: if (CUR == '\''){
4908: NEXT;
4909: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4910: standalone = 0;
1.40 daniel 4911: SKIP(2);
4912: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4913: (NXT(2) == 's')) {
1.29 daniel 4914: standalone = 1;
1.40 daniel 4915: SKIP(3);
1.29 daniel 4916: } else {
1.55 daniel 4917: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4918: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 4919: ctxt->wellFormed = 0;
1.29 daniel 4920: }
1.55 daniel 4921: if (CUR != '\'') {
4922: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4923: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 4924: ctxt->wellFormed = 0;
1.55 daniel 4925: } else
1.40 daniel 4926: NEXT;
4927: } else if (CUR == '"'){
4928: NEXT;
4929: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4930: standalone = 0;
1.40 daniel 4931: SKIP(2);
4932: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4933: (NXT(2) == 's')) {
1.29 daniel 4934: standalone = 1;
1.40 daniel 4935: SKIP(3);
1.29 daniel 4936: } else {
1.55 daniel 4937: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4938: ctxt->sax->error(ctxt->userData,
1.59 daniel 4939: "standalone accepts only 'yes' or 'no'\n");
4940: ctxt->wellFormed = 0;
1.29 daniel 4941: }
1.55 daniel 4942: if (CUR != '"') {
4943: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4944: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 4945: ctxt->wellFormed = 0;
1.55 daniel 4946: } else
1.40 daniel 4947: NEXT;
1.37 daniel 4948: } else {
1.55 daniel 4949: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4950: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 4951: ctxt->wellFormed = 0;
1.37 daniel 4952: }
1.29 daniel 4953: }
4954: return(standalone);
4955: }
4956:
1.50 daniel 4957: /**
4958: * xmlParseXMLDecl:
4959: * @ctxt: an XML parser context
4960: *
4961: * parse an XML declaration header
1.29 daniel 4962: *
4963: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 4964: */
4965:
1.55 daniel 4966: void
4967: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 4968: CHAR *version;
4969:
4970: /*
1.19 daniel 4971: * We know that '<?xml' is here.
1.1 veillard 4972: */
1.40 daniel 4973: SKIP(5);
1.1 veillard 4974:
1.59 daniel 4975: if (!IS_BLANK(CUR)) {
4976: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4977: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 4978: ctxt->wellFormed = 0;
4979: }
1.42 daniel 4980: SKIP_BLANKS;
1.1 veillard 4981:
4982: /*
1.29 daniel 4983: * We should have the VersionInfo here.
1.1 veillard 4984: */
1.29 daniel 4985: version = xmlParseVersionInfo(ctxt);
4986: if (version == NULL)
1.45 daniel 4987: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 4988: ctxt->version = xmlStrdup(version);
1.45 daniel 4989: free(version);
1.29 daniel 4990:
4991: /*
4992: * We may have the encoding declaration
4993: */
1.59 daniel 4994: if (!IS_BLANK(CUR)) {
4995: if ((CUR == '?') && (NXT(1) == '>')) {
4996: SKIP(2);
4997: return;
4998: }
4999: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5000: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5001: ctxt->wellFormed = 0;
5002: }
1.72 daniel 5003: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5004:
5005: /*
1.29 daniel 5006: * We may have the standalone status.
1.1 veillard 5007: */
1.72 daniel 5008: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5009: if ((CUR == '?') && (NXT(1) == '>')) {
5010: SKIP(2);
5011: return;
5012: }
5013: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5014: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5015: ctxt->wellFormed = 0;
5016: }
5017: SKIP_BLANKS;
1.72 daniel 5018: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5019:
1.42 daniel 5020: SKIP_BLANKS;
1.40 daniel 5021: if ((CUR == '?') && (NXT(1) == '>')) {
5022: SKIP(2);
5023: } else if (CUR == '>') {
1.31 daniel 5024: /* Deprecated old WD ... */
1.55 daniel 5025: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5026: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 5027: ctxt->wellFormed = 0;
1.40 daniel 5028: NEXT;
1.29 daniel 5029: } else {
1.55 daniel 5030: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5031: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 5032: ctxt->wellFormed = 0;
1.40 daniel 5033: MOVETO_ENDTAG(CUR_PTR);
5034: NEXT;
1.29 daniel 5035: }
1.1 veillard 5036: }
5037:
1.50 daniel 5038: /**
5039: * xmlParseMisc:
5040: * @ctxt: an XML parser context
5041: *
5042: * parse an XML Misc* optionnal field.
1.21 daniel 5043: *
1.22 daniel 5044: * [27] Misc ::= Comment | PI | S
1.1 veillard 5045: */
5046:
1.55 daniel 5047: void
5048: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5049: while (((CUR == '<') && (NXT(1) == '?')) ||
5050: ((CUR == '<') && (NXT(1) == '!') &&
5051: (NXT(2) == '-') && (NXT(3) == '-')) ||
5052: IS_BLANK(CUR)) {
5053: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5054: xmlParsePI(ctxt);
1.40 daniel 5055: } else if (IS_BLANK(CUR)) {
5056: NEXT;
1.1 veillard 5057: } else
1.31 daniel 5058: xmlParseComment(ctxt, 0);
1.1 veillard 5059: }
5060: }
5061:
1.50 daniel 5062: /**
5063: * xmlParseDocument :
5064: * @ctxt: an XML parser context
5065: *
5066: * parse an XML document (and build a tree if using the standard SAX
5067: * interface).
1.21 daniel 5068: *
1.22 daniel 5069: * [1] document ::= prolog element Misc*
1.29 daniel 5070: *
5071: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5072: *
1.68 daniel 5073: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5074: * as a result of the parsing.
1.1 veillard 5075: */
5076:
1.55 daniel 5077: int
5078: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5079: xmlDefaultSAXHandlerInit();
5080:
1.91 daniel 5081: GROW;
5082:
1.14 veillard 5083: /*
1.44 daniel 5084: * SAX: beginning of the document processing.
5085: */
1.72 daniel 5086: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5087: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5088:
5089: /*
1.14 veillard 5090: * We should check for encoding here and plug-in some
5091: * conversion code TODO !!!!
5092: */
1.1 veillard 5093:
5094: /*
5095: * Wipe out everything which is before the first '<'
5096: */
1.59 daniel 5097: if (IS_BLANK(CUR)) {
5098: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5099: ctxt->sax->error(ctxt->userData,
1.59 daniel 5100: "Extra spaces at the beginning of the document are not allowed\n");
5101: ctxt->wellFormed = 0;
5102: SKIP_BLANKS;
5103: }
5104:
5105: if (CUR == 0) {
5106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5107: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 5108: ctxt->wellFormed = 0;
5109: }
1.1 veillard 5110:
5111: /*
5112: * Check for the XMLDecl in the Prolog.
5113: */
1.91 daniel 5114: GROW;
1.40 daniel 5115: if ((CUR == '<') && (NXT(1) == '?') &&
5116: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5117: (NXT(4) == 'l')) {
1.19 daniel 5118: xmlParseXMLDecl(ctxt);
5119: /* SKIP_EOL(cur); */
1.42 daniel 5120: SKIP_BLANKS;
1.40 daniel 5121: } else if ((CUR == '<') && (NXT(1) == '?') &&
5122: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5123: (NXT(4) == 'L')) {
1.19 daniel 5124: /*
5125: * The first drafts were using <?XML and the final W3C REC
5126: * now use <?xml ...
5127: */
1.16 daniel 5128: xmlParseXMLDecl(ctxt);
1.1 veillard 5129: /* SKIP_EOL(cur); */
1.42 daniel 5130: SKIP_BLANKS;
1.1 veillard 5131: } else {
1.72 daniel 5132: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5133: }
1.72 daniel 5134: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5135: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5136:
5137: /*
5138: * The Misc part of the Prolog
5139: */
1.91 daniel 5140: GROW;
1.16 daniel 5141: xmlParseMisc(ctxt);
1.1 veillard 5142:
5143: /*
1.29 daniel 5144: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5145: * (doctypedecl Misc*)?
5146: */
1.91 daniel 5147: GROW;
1.40 daniel 5148: if ((CUR == '<') && (NXT(1) == '!') &&
5149: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5150: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5151: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5152: (NXT(8) == 'E')) {
1.22 daniel 5153: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5154: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5155: xmlParseMisc(ctxt);
1.21 daniel 5156: }
5157:
5158: /*
5159: * Time to start parsing the tree itself
1.1 veillard 5160: */
1.91 daniel 5161: GROW;
1.96 daniel 5162: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5163: xmlParseElement(ctxt);
1.96 daniel 5164: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5165:
5166: /*
5167: * The Misc part at the end
5168: */
5169: xmlParseMisc(ctxt);
1.16 daniel 5170:
1.59 daniel 5171: if (CUR != 0) {
5172: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5173: ctxt->sax->error(ctxt->userData,
1.59 daniel 5174: "Extra content at the end of the document\n");
5175: ctxt->wellFormed = 0;
5176: }
1.96 daniel 5177: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5178:
1.44 daniel 5179: /*
5180: * SAX: end of the document processing.
5181: */
1.72 daniel 5182: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5183: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5184: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5185: return(0);
5186: }
5187:
1.50 daniel 5188: /**
1.86 daniel 5189: * xmlCreateDocParserCtxt :
1.50 daniel 5190: * @cur: a pointer to an array of CHAR
5191: *
1.69 daniel 5192: * Create a parser context for an XML in-memory document.
5193: *
5194: * Returns the new parser context or NULL
1.16 daniel 5195: */
1.69 daniel 5196: xmlParserCtxtPtr
5197: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 5198: xmlParserCtxtPtr ctxt;
1.40 daniel 5199: xmlParserInputPtr input;
1.75 daniel 5200: xmlCharEncoding enc;
1.16 daniel 5201:
1.97 ! daniel 5202: ctxt = xmlNewParserCtxt();
1.16 daniel 5203: if (ctxt == NULL) {
5204: return(NULL);
5205: }
1.96 daniel 5206: input = xmlNewInputStream(ctxt);
1.40 daniel 5207: if (input == NULL) {
1.97 ! daniel 5208: xmlFreeParserCtxt(ctxt);
1.40 daniel 5209: return(NULL);
5210: }
5211:
1.75 daniel 5212: /*
5213: * plug some encoding conversion routines here. !!!
5214: */
5215: enc = xmlDetectCharEncoding(cur);
5216: xmlSwitchEncoding(ctxt, enc);
5217:
1.40 daniel 5218: input->base = cur;
5219: input->cur = cur;
5220:
5221: inputPush(ctxt, input);
1.69 daniel 5222: return(ctxt);
5223: }
5224:
5225: /**
5226: * xmlSAXParseDoc :
5227: * @sax: the SAX handler block
5228: * @cur: a pointer to an array of CHAR
5229: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5230: * documents
5231: *
5232: * parse an XML in-memory document and build a tree.
5233: * It use the given SAX function block to handle the parsing callback.
5234: * If sax is NULL, fallback to the default DOM tree building routines.
5235: *
5236: * Returns the resulting document tree
5237: */
5238:
5239: xmlDocPtr
5240: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
5241: xmlDocPtr ret;
5242: xmlParserCtxtPtr ctxt;
5243:
5244: if (cur == NULL) return(NULL);
1.16 daniel 5245:
5246:
1.69 daniel 5247: ctxt = xmlCreateDocParserCtxt(cur);
5248: if (ctxt == NULL) return(NULL);
1.74 daniel 5249: if (sax != NULL) {
5250: ctxt->sax = sax;
5251: ctxt->userData = NULL;
5252: }
1.69 daniel 5253:
1.16 daniel 5254: xmlParseDocument(ctxt);
1.72 daniel 5255: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5256: else {
5257: ret = NULL;
1.72 daniel 5258: xmlFreeDoc(ctxt->myDoc);
5259: ctxt->myDoc = NULL;
1.59 daniel 5260: }
1.86 daniel 5261: if (sax != NULL)
5262: ctxt->sax = NULL;
1.69 daniel 5263: xmlFreeParserCtxt(ctxt);
1.16 daniel 5264:
1.1 veillard 5265: return(ret);
5266: }
5267:
1.50 daniel 5268: /**
1.55 daniel 5269: * xmlParseDoc :
5270: * @cur: a pointer to an array of CHAR
5271: *
5272: * parse an XML in-memory document and build a tree.
5273: *
1.68 daniel 5274: * Returns the resulting document tree
1.55 daniel 5275: */
5276:
1.69 daniel 5277: xmlDocPtr
5278: xmlParseDoc(CHAR *cur) {
1.59 daniel 5279: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 5280: }
5281:
5282: /**
5283: * xmlSAXParseDTD :
5284: * @sax: the SAX handler block
5285: * @ExternalID: a NAME* containing the External ID of the DTD
5286: * @SystemID: a NAME* containing the URL to the DTD
5287: *
5288: * Load and parse an external subset.
5289: *
5290: * Returns the resulting xmlDtdPtr or NULL in case of error.
5291: */
5292:
5293: xmlDtdPtr
5294: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5295: const CHAR *SystemID) {
5296: xmlDtdPtr ret = NULL;
5297: xmlParserCtxtPtr ctxt;
1.83 daniel 5298: xmlParserInputPtr input = NULL;
1.76 daniel 5299: xmlCharEncoding enc;
5300:
5301: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5302:
1.97 ! daniel 5303: ctxt = xmlNewParserCtxt();
1.76 daniel 5304: if (ctxt == NULL) {
5305: return(NULL);
5306: }
5307:
5308: /*
5309: * Set-up the SAX context
5310: */
5311: if (ctxt == NULL) return(NULL);
5312: if (sax != NULL) {
1.93 veillard 5313: if (ctxt->sax != NULL)
5314: free(ctxt->sax);
1.76 daniel 5315: ctxt->sax = sax;
5316: ctxt->userData = NULL;
5317: }
5318:
5319: /*
5320: * Ask the Entity resolver to load the damn thing
5321: */
5322:
5323: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5324: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5325: if (input == NULL) {
1.86 daniel 5326: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5327: xmlFreeParserCtxt(ctxt);
5328: return(NULL);
5329: }
5330:
5331: /*
5332: * plug some encoding conversion routines here. !!!
5333: */
5334: xmlPushInput(ctxt, input);
5335: enc = xmlDetectCharEncoding(ctxt->input->cur);
5336: xmlSwitchEncoding(ctxt, enc);
5337:
1.95 veillard 5338: if (input->filename == NULL)
5339: input->filename = xmlStrdup(SystemID);
1.76 daniel 5340: input->line = 1;
5341: input->col = 1;
5342: input->base = ctxt->input->cur;
5343: input->cur = ctxt->input->cur;
5344: input->free = NULL;
5345:
5346: /*
5347: * let's parse that entity knowing it's an external subset.
5348: */
1.79 daniel 5349: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 5350:
5351: if (ctxt->myDoc != NULL) {
5352: if (ctxt->wellFormed) {
5353: ret = ctxt->myDoc->intSubset;
5354: ctxt->myDoc->intSubset = NULL;
5355: } else {
5356: ret = NULL;
5357: }
5358: xmlFreeDoc(ctxt->myDoc);
5359: ctxt->myDoc = NULL;
5360: }
1.86 daniel 5361: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5362: xmlFreeParserCtxt(ctxt);
5363:
5364: return(ret);
5365: }
5366:
5367: /**
5368: * xmlParseDTD :
5369: * @ExternalID: a NAME* containing the External ID of the DTD
5370: * @SystemID: a NAME* containing the URL to the DTD
5371: *
5372: * Load and parse an external subset.
5373: *
5374: * Returns the resulting xmlDtdPtr or NULL in case of error.
5375: */
5376:
5377: xmlDtdPtr
5378: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5379: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 5380: }
5381:
5382: /**
5383: * xmlRecoverDoc :
5384: * @cur: a pointer to an array of CHAR
5385: *
5386: * parse an XML in-memory document and build a tree.
5387: * In the case the document is not Well Formed, a tree is built anyway
5388: *
1.68 daniel 5389: * Returns the resulting document tree
1.59 daniel 5390: */
5391:
1.69 daniel 5392: xmlDocPtr
5393: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 5394: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 5395: }
5396:
5397: /**
1.69 daniel 5398: * xmlCreateFileParserCtxt :
1.50 daniel 5399: * @filename: the filename
5400: *
1.69 daniel 5401: * Create a parser context for a file content.
5402: * Automatic support for ZLIB/Compress compressed document is provided
5403: * by default if found at compile-time.
1.50 daniel 5404: *
1.69 daniel 5405: * Returns the new parser context or NULL
1.9 httpng 5406: */
1.69 daniel 5407: xmlParserCtxtPtr
5408: xmlCreateFileParserCtxt(const char *filename)
5409: {
5410: xmlParserCtxtPtr ctxt;
1.40 daniel 5411: xmlParserInputPtr inputStream;
1.91 daniel 5412: xmlParserInputBufferPtr buf;
1.9 httpng 5413:
1.91 daniel 5414: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5415: if (buf == NULL) return(NULL);
1.9 httpng 5416:
1.97 ! daniel 5417: ctxt = xmlNewParserCtxt();
1.16 daniel 5418: if (ctxt == NULL) {
5419: return(NULL);
5420: }
1.97 ! daniel 5421:
1.96 daniel 5422: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 5423: if (inputStream == NULL) {
1.97 ! daniel 5424: xmlFreeParserCtxt(ctxt);
1.40 daniel 5425: return(NULL);
5426: }
5427:
5428: inputStream->filename = strdup(filename);
1.91 daniel 5429: inputStream->buf = buf;
5430: inputStream->base = inputStream->buf->buffer->content;
5431: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 5432:
1.40 daniel 5433: inputPush(ctxt, inputStream);
1.69 daniel 5434: return(ctxt);
5435: }
5436:
5437: /**
5438: * xmlSAXParseFile :
5439: * @sax: the SAX handler block
5440: * @filename: the filename
5441: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5442: * documents
5443: *
5444: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5445: * compressed document is provided by default if found at compile-time.
5446: * It use the given SAX function block to handle the parsing callback.
5447: * If sax is NULL, fallback to the default DOM tree building routines.
5448: *
5449: * Returns the resulting document tree
5450: */
5451:
1.79 daniel 5452: xmlDocPtr
5453: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 5454: int recovery) {
5455: xmlDocPtr ret;
5456: xmlParserCtxtPtr ctxt;
5457:
5458: ctxt = xmlCreateFileParserCtxt(filename);
5459: if (ctxt == NULL) return(NULL);
1.74 daniel 5460: if (sax != NULL) {
1.93 veillard 5461: if (ctxt->sax != NULL)
5462: free(ctxt->sax);
1.74 daniel 5463: ctxt->sax = sax;
5464: ctxt->userData = NULL;
5465: }
1.16 daniel 5466:
5467: xmlParseDocument(ctxt);
1.40 daniel 5468:
1.72 daniel 5469: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5470: else {
5471: ret = NULL;
1.72 daniel 5472: xmlFreeDoc(ctxt->myDoc);
5473: ctxt->myDoc = NULL;
1.59 daniel 5474: }
1.86 daniel 5475: if (sax != NULL)
5476: ctxt->sax = NULL;
1.69 daniel 5477: xmlFreeParserCtxt(ctxt);
1.20 daniel 5478:
5479: return(ret);
5480: }
5481:
1.55 daniel 5482: /**
5483: * xmlParseFile :
5484: * @filename: the filename
5485: *
5486: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5487: * compressed document is provided by default if found at compile-time.
5488: *
1.68 daniel 5489: * Returns the resulting document tree
1.55 daniel 5490: */
5491:
1.79 daniel 5492: xmlDocPtr
5493: xmlParseFile(const char *filename) {
1.59 daniel 5494: return(xmlSAXParseFile(NULL, filename, 0));
5495: }
5496:
5497: /**
5498: * xmlRecoverFile :
5499: * @filename: the filename
5500: *
5501: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
5502: * compressed document is provided by default if found at compile-time.
5503: * In the case the document is not Well Formed, a tree is built anyway
5504: *
1.68 daniel 5505: * Returns the resulting document tree
1.59 daniel 5506: */
5507:
1.79 daniel 5508: xmlDocPtr
5509: xmlRecoverFile(const char *filename) {
1.59 daniel 5510: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 5511: }
1.32 daniel 5512:
1.50 daniel 5513: /**
1.82 daniel 5514: * xmlSubstituteEntitiesDefault :
5515: * @val: int 0 or 1
1.79 daniel 5516: *
5517: * Set and return the previous value for default entity support.
5518: * Initially the parser always keep entity references instead of substituting
5519: * entity values in the output. This function has to be used to change the
5520: * default parser behaviour
5521: * SAX::subtituteEntities() has to be used for changing that on a file by
5522: * file basis.
5523: *
5524: * Returns the last value for 0 for no substitution, 1 for substitution.
5525: */
5526:
5527: int
5528: xmlSubstituteEntitiesDefault(int val) {
5529: int old = xmlSubstituteEntitiesDefaultValue;
5530:
5531: xmlSubstituteEntitiesDefaultValue = val;
5532: return(old);
5533: }
5534:
5535: /**
1.69 daniel 5536: * xmlCreateMemoryParserCtxt :
1.68 daniel 5537: * @buffer: an pointer to a char array
1.50 daniel 5538: * @size: the siwe of the array
5539: *
1.69 daniel 5540: * Create a parser context for an XML in-memory document.
1.50 daniel 5541: *
1.69 daniel 5542: * Returns the new parser context or NULL
1.20 daniel 5543: */
1.69 daniel 5544: xmlParserCtxtPtr
5545: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 5546: xmlParserCtxtPtr ctxt;
1.40 daniel 5547: xmlParserInputPtr input;
1.75 daniel 5548: xmlCharEncoding enc;
1.40 daniel 5549:
5550: buffer[size - 1] = '\0';
5551:
1.97 ! daniel 5552: ctxt = xmlNewParserCtxt();
1.20 daniel 5553: if (ctxt == NULL) {
5554: return(NULL);
5555: }
1.97 ! daniel 5556:
1.96 daniel 5557: input = xmlNewInputStream(ctxt);
1.40 daniel 5558: if (input == NULL) {
1.97 ! daniel 5559: xmlFreeParserCtxt(ctxt);
1.40 daniel 5560: return(NULL);
5561: }
1.20 daniel 5562:
1.40 daniel 5563: input->filename = NULL;
5564: input->line = 1;
5565: input->col = 1;
1.96 daniel 5566: input->buf = NULL;
1.91 daniel 5567: input->consumed = 0;
1.45 daniel 5568:
5569: /*
1.75 daniel 5570: * plug some encoding conversion routines here. !!!
1.45 daniel 5571: */
1.75 daniel 5572: enc = xmlDetectCharEncoding(buffer);
5573: xmlSwitchEncoding(ctxt, enc);
5574:
1.40 daniel 5575: input->base = buffer;
5576: input->cur = buffer;
1.69 daniel 5577: input->free = NULL;
1.20 daniel 5578:
1.40 daniel 5579: inputPush(ctxt, input);
1.69 daniel 5580: return(ctxt);
5581: }
5582:
5583: /**
5584: * xmlSAXParseMemory :
5585: * @sax: the SAX handler block
5586: * @buffer: an pointer to a char array
5587: * @size: the siwe of the array
5588: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5589: * documents
5590: *
5591: * parse an XML in-memory block and use the given SAX function block
5592: * to handle the parsing callback. If sax is NULL, fallback to the default
5593: * DOM tree building routines.
5594: *
5595: * Returns the resulting document tree
5596: */
5597: xmlDocPtr
5598: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
5599: xmlDocPtr ret;
5600: xmlParserCtxtPtr ctxt;
5601:
5602: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
5603: if (ctxt == NULL) return(NULL);
1.74 daniel 5604: if (sax != NULL) {
5605: ctxt->sax = sax;
5606: ctxt->userData = NULL;
5607: }
1.20 daniel 5608:
5609: xmlParseDocument(ctxt);
1.40 daniel 5610:
1.72 daniel 5611: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5612: else {
5613: ret = NULL;
1.72 daniel 5614: xmlFreeDoc(ctxt->myDoc);
5615: ctxt->myDoc = NULL;
1.59 daniel 5616: }
1.86 daniel 5617: if (sax != NULL)
5618: ctxt->sax = NULL;
1.69 daniel 5619: xmlFreeParserCtxt(ctxt);
1.16 daniel 5620:
1.9 httpng 5621: return(ret);
1.17 daniel 5622: }
5623:
1.55 daniel 5624: /**
5625: * xmlParseMemory :
1.68 daniel 5626: * @buffer: an pointer to a char array
1.55 daniel 5627: * @size: the size of the array
5628: *
5629: * parse an XML in-memory block and build a tree.
5630: *
1.68 daniel 5631: * Returns the resulting document tree
1.55 daniel 5632: */
5633:
5634: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 5635: return(xmlSAXParseMemory(NULL, buffer, size, 0));
5636: }
5637:
5638: /**
5639: * xmlRecoverMemory :
1.68 daniel 5640: * @buffer: an pointer to a char array
1.59 daniel 5641: * @size: the size of the array
5642: *
5643: * parse an XML in-memory block and build a tree.
5644: * In the case the document is not Well Formed, a tree is built anyway
5645: *
1.68 daniel 5646: * Returns the resulting document tree
1.59 daniel 5647: */
5648:
5649: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
5650: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 5651: }
5652:
5653:
1.50 daniel 5654: /**
5655: * xmlSetupParserForBuffer:
5656: * @ctxt: an XML parser context
5657: * @buffer: a CHAR * buffer
5658: * @filename: a file name
5659: *
1.19 daniel 5660: * Setup the parser context to parse a new buffer; Clears any prior
5661: * contents from the parser context. The buffer parameter must not be
5662: * NULL, but the filename parameter can be
5663: */
1.55 daniel 5664: void
5665: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 5666: const char* filename)
5667: {
1.96 daniel 5668: xmlParserInputPtr input;
1.40 daniel 5669:
1.96 daniel 5670: input = xmlNewInputStream(ctxt);
5671: if (input == NULL) {
5672: perror("malloc");
5673: free(ctxt);
5674: exit(1);
5675: }
5676:
5677: xmlClearParserCtxt(ctxt);
5678: if (filename != NULL)
5679: input->filename = strdup(filename);
5680: input->base = buffer;
5681: input->cur = buffer;
5682: inputPush(ctxt, input);
1.17 daniel 5683: }
5684:
1.32 daniel 5685:
1.50 daniel 5686: /**
5687: * xmlParserFindNodeInfo:
5688: * @ctxt: an XML parser context
5689: * @node: an XML node within the tree
5690: *
5691: * Find the parser node info struct for a given node
5692: *
1.68 daniel 5693: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 5694: */
5695: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
5696: const xmlNode* node)
5697: {
5698: unsigned long pos;
5699:
5700: /* Find position where node should be at */
5701: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
5702: if ( ctx->node_seq.buffer[pos].node == node )
5703: return &ctx->node_seq.buffer[pos];
5704: else
5705: return NULL;
5706: }
5707:
5708:
1.50 daniel 5709: /**
5710: * xmlInitNodeInfoSeq :
5711: * @seq: a node info sequence pointer
5712: *
5713: * -- Initialize (set to initial state) node info sequence
1.32 daniel 5714: */
1.55 daniel 5715: void
5716: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 5717: {
5718: seq->length = 0;
5719: seq->maximum = 0;
5720: seq->buffer = NULL;
5721: }
5722:
1.50 daniel 5723: /**
5724: * xmlClearNodeInfoSeq :
5725: * @seq: a node info sequence pointer
5726: *
5727: * -- Clear (release memory and reinitialize) node
1.32 daniel 5728: * info sequence
5729: */
1.55 daniel 5730: void
5731: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 5732: {
5733: if ( seq->buffer != NULL )
5734: free(seq->buffer);
5735: xmlInitNodeInfoSeq(seq);
5736: }
5737:
5738:
1.50 daniel 5739: /**
5740: * xmlParserFindNodeInfoIndex:
5741: * @seq: a node info sequence pointer
5742: * @node: an XML node pointer
5743: *
5744: *
1.32 daniel 5745: * xmlParserFindNodeInfoIndex : Find the index that the info record for
5746: * the given node is or should be at in a sorted sequence
1.68 daniel 5747: *
5748: * Returns a long indicating the position of the record
1.32 daniel 5749: */
5750: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
5751: const xmlNode* node)
5752: {
5753: unsigned long upper, lower, middle;
5754: int found = 0;
5755:
5756: /* Do a binary search for the key */
5757: lower = 1;
5758: upper = seq->length;
5759: middle = 0;
5760: while ( lower <= upper && !found) {
5761: middle = lower + (upper - lower) / 2;
5762: if ( node == seq->buffer[middle - 1].node )
5763: found = 1;
5764: else if ( node < seq->buffer[middle - 1].node )
5765: upper = middle - 1;
5766: else
5767: lower = middle + 1;
5768: }
5769:
5770: /* Return position */
5771: if ( middle == 0 || seq->buffer[middle - 1].node < node )
5772: return middle;
5773: else
5774: return middle - 1;
5775: }
5776:
5777:
1.50 daniel 5778: /**
5779: * xmlParserAddNodeInfo:
5780: * @ctxt: an XML parser context
1.68 daniel 5781: * @info: a node info sequence pointer
1.50 daniel 5782: *
5783: * Insert node info record into the sorted sequence
1.32 daniel 5784: */
1.55 daniel 5785: void
5786: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 5787: const xmlParserNodeInfo* info)
1.32 daniel 5788: {
5789: unsigned long pos;
5790: static unsigned int block_size = 5;
5791:
5792: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 5793: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
5794: if ( pos < ctxt->node_seq.length
5795: && ctxt->node_seq.buffer[pos].node == info->node ) {
5796: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 5797: }
5798:
5799: /* Otherwise, we need to add new node to buffer */
5800: else {
5801: /* Expand buffer by 5 if needed */
1.55 daniel 5802: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 5803: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 5804: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
5805: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 5806:
1.55 daniel 5807: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 5808: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
5809: else
1.55 daniel 5810: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 5811:
5812: if ( tmp_buffer == NULL ) {
1.55 daniel 5813: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5814: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 5815: return;
5816: }
1.55 daniel 5817: ctxt->node_seq.buffer = tmp_buffer;
5818: ctxt->node_seq.maximum += block_size;
1.32 daniel 5819: }
5820:
5821: /* If position is not at end, move elements out of the way */
1.55 daniel 5822: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 5823: unsigned long i;
5824:
1.55 daniel 5825: for ( i = ctxt->node_seq.length; i > pos; i-- )
5826: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 5827: }
5828:
5829: /* Copy element and increase length */
1.55 daniel 5830: ctxt->node_seq.buffer[pos] = *info;
5831: ctxt->node_seq.length++;
1.32 daniel 5832: }
5833: }
1.77 daniel 5834:
5835:
Webmaster