Annotation of XML/parser.c, revision 1.70
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.61 daniel 33: #include "valid.h"
1.69 daniel 34: #include "parserInternals.h"
1.1 veillard 35:
1.45 daniel 36: /************************************************************************
37: * *
38: * Parser stacks related functions and macros *
39: * *
40: ************************************************************************/
1.1 veillard 41: /*
1.40 daniel 42: * Generic function for accessing stacks in the Parser Context
1.1 veillard 43: */
44:
1.31 daniel 45: #define PUSH_AND_POP(type, name) \
1.40 daniel 46: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 47: if (ctxt->name##Nr >= ctxt->name##Max) { \
48: ctxt->name##Max *= 2; \
1.40 daniel 49: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
50: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
51: if (ctxt->name##Tab == NULL) { \
1.31 daniel 52: fprintf(stderr, "realloc failed !\n"); \
53: exit(1); \
54: } \
55: } \
1.40 daniel 56: ctxt->name##Tab[ctxt->name##Nr] = value; \
57: ctxt->name = value; \
58: return(ctxt->name##Nr++); \
1.31 daniel 59: } \
1.40 daniel 60: type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 61: type ret; \
1.40 daniel 62: if (ctxt->name##Nr <= 0) return(0); \
63: ctxt->name##Nr--; \
1.50 daniel 64: if (ctxt->name##Nr > 0) \
65: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
66: else \
67: ctxt->name = NULL; \
1.69 daniel 68: ret = ctxt->name##Tab[ctxt->name##Nr]; \
69: ctxt->name##Tab[ctxt->name##Nr] = 0; \
70: return(ret); \
1.31 daniel 71: } \
72:
1.40 daniel 73: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 74: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 75:
1.55 daniel 76: /*
77: * Macros for accessing the content. Those should be used only by the parser,
78: * and not exported.
79: *
80: * Dirty macros, i.e. one need to make assumption on the context to use them
81: *
82: * CUR_PTR return the current pointer to the CHAR to be parsed.
83: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
84: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
85: * in UNICODE mode. This should be used internally by the parser
86: * only to compare to ASCII values otherwise it would break when
87: * running with UTF-8 encoding.
88: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
89: * to compare on ASCII based substring.
90: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
91: * strings within the parser.
92: *
93: * Clean macros, not dependent of an ASCII context.
94: *
95: * CURRENT Returns the current char value, with the full decoding of
96: * UTF-8 if we are using this mode. It returns an int.
97: * NEXT Skip to the next character, this does the proper decoding
98: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
99: * It returns the pointer to the current CHAR.
100: */
1.45 daniel 101:
102: #define CUR (*ctxt->input->cur)
1.55 daniel 103: #define SKIP(val) ctxt->input->cur += (val)
104: #define NXT(val) ctxt->input->cur[(val)]
105: #define CUR_PTR ctxt->input->cur
106:
107: #define SKIP_BLANKS \
108: while (IS_BLANK(*(ctxt->input->cur))) NEXT
109:
110: #ifndef USE_UTF_8
111: #define CURRENT (*ctxt->input->cur)
1.45 daniel 112: #define NEXT ((*ctxt->input->cur) ? \
113: (((*(ctxt->input->cur) == '\n') ? \
114: (ctxt->input->line++, ctxt->input->col = 1) : \
115: (ctxt->input->col++)), ctxt->input->cur++) : \
116: (xmlPopInput(ctxt), ctxt->input->cur))
1.55 daniel 117: #else
118: #endif
1.42 daniel 119:
1.40 daniel 120:
1.50 daniel 121: /**
122: * xmlPopInput:
123: * @ctxt: an XML parser context
124: *
1.40 daniel 125: * xmlPopInput: the current input pointed by ctxt->input came to an end
126: * pop it and return the next char.
1.45 daniel 127: *
128: * TODO A deallocation of the popped Input structure is needed
1.68 daniel 129: *
130: * Returns the current CHAR in the parser context
1.40 daniel 131: */
1.55 daniel 132: CHAR
133: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 134: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 135: xmlFreeInputStream(inputPop(ctxt));
1.40 daniel 136: return(CUR);
137: }
138:
1.50 daniel 139: /**
140: * xmlPushInput:
141: * @ctxt: an XML parser context
142: * @input: an XML parser input fragment (entity, XML fragment ...).
143: *
1.40 daniel 144: * xmlPushInput: switch to a new input stream which is stacked on top
145: * of the previous one(s).
146: */
1.55 daniel 147: void
148: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 149: if (input == NULL) return;
150: inputPush(ctxt, input);
151: }
152:
1.50 daniel 153: /**
1.69 daniel 154: * xmlFreeInputStream:
155: * @input: an xmlParserInputPtr
156: *
157: * Free up an input stream.
158: */
159: void
160: xmlFreeInputStream(xmlParserInputPtr input) {
161: if (input == NULL) return;
162:
163: if (input->filename != NULL) free((char *) input->filename);
164: if ((input->free != NULL) && (input->base != NULL))
165: input->free((char *) input->base);
166: memset(input, -1, sizeof(xmlParserInput));
167: free(input);
168: }
169:
170: /**
1.50 daniel 171: * xmlNewEntityInputStream:
172: * @ctxt: an XML parser context
173: * @entity: an Entity pointer
174: *
1.45 daniel 175: * Create a new input stream based on a memory buffer.
1.68 daniel 176: * Returns the new input stream
1.45 daniel 177: */
1.50 daniel 178: xmlParserInputPtr
179: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 180: xmlParserInputPtr input;
181:
182: if (entity == NULL) {
1.55 daniel 183: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
184: ctxt->sax->error(ctxt,
1.45 daniel 185: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 186: return(NULL);
1.45 daniel 187: }
188: if (entity->content == NULL) {
1.55 daniel 189: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
190: ctxt->sax->error(ctxt,
1.45 daniel 191: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 192: return(NULL);
1.45 daniel 193: }
194: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
195: if (input == NULL) {
1.55 daniel 196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
197: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 198: return(NULL);
1.45 daniel 199: }
200: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
201: input->base = entity->content;
202: input->cur = entity->content;
203: input->line = 1;
204: input->col = 1;
1.69 daniel 205: input->free = NULL;
1.50 daniel 206: return(input);
1.45 daniel 207: }
208:
1.59 daniel 209: /**
210: * xmlNewStringInputStream:
211: * @ctxt: an XML parser context
212: * @entity: an Entity pointer
213: *
214: * Create a new input stream based on a memory buffer.
1.68 daniel 215: * Returns the new input stream
1.59 daniel 216: */
217: xmlParserInputPtr
218: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *string) {
219: xmlParserInputPtr input;
220:
221: if (string == NULL) {
222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
223: ctxt->sax->error(ctxt,
224: "internal: xmlNewStringInputStream string = NULL\n");
225: return(NULL);
226: }
227: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
228: if (input == NULL) {
229: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
230: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
231: return(NULL);
232: }
233: input->filename = NULL;
234: input->base = string;
235: input->cur = string;
236: input->line = 1;
237: input->col = 1;
1.69 daniel 238: input->free = NULL;
1.59 daniel 239: return(input);
240: }
241:
1.45 daniel 242: /*
1.40 daniel 243: * A few macros needed to help building the parser.
244: */
245:
1.1 veillard 246: #ifdef UNICODE
1.30 daniel 247: /************************************************************************
248: * *
249: * UNICODE version of the macros. *
250: * *
251: ************************************************************************/
1.1 veillard 252: /*
1.22 daniel 253: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
254: * | [#x10000-#x10FFFF]
255: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 256: */
257: #define IS_CHAR(c) \
1.59 daniel 258: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
259: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
260: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
261: ((c) <= 0x10FFFF))
1.1 veillard 262:
1.22 daniel 263: /*
264: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
265: */
1.42 daniel 266: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
267: ((c) == 0x0D))
1.1 veillard 268:
1.22 daniel 269: /*
1.30 daniel 270: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 271: *
1.30 daniel 272: * VI is your friend !
273: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
274: * and
275: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 276: */
1.1 veillard 277: #define IS_BASECHAR(c) \
1.30 daniel 278: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
279: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
280: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
281: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
282: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
283: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
284: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
285: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
286: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
287: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
288: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
289: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
290: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
291: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
292: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
293: ((c) == 0x0386) || \
294: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
295: ((c) == 0x038C) || \
296: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
297: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
298: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
299: ((c) == 0x03DA) || \
300: ((c) == 0x03DC) || \
301: ((c) == 0x03DE) || \
302: ((c) == 0x03E0) || \
303: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
304: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
305: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
306: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
307: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
308: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
309: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
310: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
311: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
312: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
313: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
314: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
315: ((c) == 0x0559) || \
316: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
317: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
318: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
319: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
320: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
321: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
322: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
323: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
324: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
325: ((c) == 0x06D5) || \
326: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
327: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
328: ((c) == 0x093D) || \
329: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
330: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
331: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
332: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
333: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
334: ((c) == 0x09B2) || \
335: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
336: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
337: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
338: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
339: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
340: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
341: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
342: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
343: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
344: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
345: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
346: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
347: ((c) == 0x0A5E) || \
348: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
349: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
350: ((c) == 0x0A8D) || \
351: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
352: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
353: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
354: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
355: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
356: ((c) == 0x0ABD) || \
357: ((c) == 0x0AE0) || \
358: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
359: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
360: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
361: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
362: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
363: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
364: ((c) == 0x0B3D) || \
365: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
366: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
367: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
368: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
369: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
370: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
371: ((c) == 0x0B9C) || \
372: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
373: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
374: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
375: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
376: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
377: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
378: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
379: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
380: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
381: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
382: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
383: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
384: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
385: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
386: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
387: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
388: ((c) == 0x0CDE) || \
389: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
390: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
391: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
392: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
393: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
394: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
395: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
396: ((c) == 0x0E30) || \
397: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
398: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
399: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
400: ((c) == 0x0E84) || \
401: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
402: ((c) == 0x0E8A) || \
403: ((c) == 0x0E8D) || \
404: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
405: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
406: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
407: ((c) == 0x0EA5) || \
408: ((c) == 0x0EA7) || \
409: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
410: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
411: ((c) == 0x0EB0) || \
412: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
413: ((c) == 0x0EBD) || \
414: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
415: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
416: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
417: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
418: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
419: ((c) == 0x1100) || \
420: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
421: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
422: ((c) == 0x1109) || \
423: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
424: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
425: ((c) == 0x113C) || \
426: ((c) == 0x113E) || \
427: ((c) == 0x1140) || \
428: ((c) == 0x114C) || \
429: ((c) == 0x114E) || \
430: ((c) == 0x1150) || \
431: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
432: ((c) == 0x1159) || \
433: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
434: ((c) == 0x1163) || \
435: ((c) == 0x1165) || \
436: ((c) == 0x1167) || \
437: ((c) == 0x1169) || \
438: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
439: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
440: ((c) == 0x1175) || \
441: ((c) == 0x119E) || \
442: ((c) == 0x11A8) || \
443: ((c) == 0x11AB) || \
444: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
445: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
446: ((c) == 0x11BA) || \
447: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
448: ((c) == 0x11EB) || \
449: ((c) == 0x11F0) || \
450: ((c) == 0x11F9) || \
451: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
452: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
453: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
454: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
455: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
456: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
457: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
458: ((c) == 0x1F59) || \
459: ((c) == 0x1F5B) || \
460: ((c) == 0x1F5D) || \
461: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
462: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
463: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
464: ((c) == 0x1FBE) || \
465: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
466: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
467: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
468: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
469: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
470: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
471: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
472: ((c) == 0x2126) || \
473: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
474: ((c) == 0x212E) || \
475: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
476: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
477: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
478: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
479: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 480:
1.22 daniel 481: /*
482: * [88] Digit ::= ... long list see REC ...
483: */
1.30 daniel 484: #define IS_DIGIT(c) \
485: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
486: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
487: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
488: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
489: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
490: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
491: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
492: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
493: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
494: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
495: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
496: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
497: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
498: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
499: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 500:
1.22 daniel 501: /*
502: * [87] CombiningChar ::= ... long list see REC ...
503: */
1.30 daniel 504: #define IS_COMBINING(c) \
505: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
506: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
507: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
508: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
509: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
510: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
511: ((c) == 0x05BF) || \
512: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
513: ((c) == 0x05C4) || \
514: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
515: ((c) == 0x0670) || \
516: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
517: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
518: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
519: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
520: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
521: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
522: ((c) == 0x093C) || \
523: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
524: ((c) == 0x094D) || \
525: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
526: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
527: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
528: ((c) == 0x09BC) || \
529: ((c) == 0x09BE) || \
530: ((c) == 0x09BF) || \
531: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
532: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
533: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
534: ((c) == 0x09D7) || \
535: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
536: ((c) == 0x0A02) || \
537: ((c) == 0x0A3C) || \
538: ((c) == 0x0A3E) || \
539: ((c) == 0x0A3F) || \
540: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
541: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
542: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
543: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
544: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
545: ((c) == 0x0ABC) || \
546: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
547: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
548: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
549: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
550: ((c) == 0x0B3C) || \
551: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
552: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
553: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
554: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
555: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
556: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
557: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
558: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
559: ((c) == 0x0BD7) || \
560: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
561: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
562: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
563: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
564: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
565: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
566: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
567: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
568: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
569: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
570: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
571: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
572: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
573: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
574: ((c) == 0x0D57) || \
575: ((c) == 0x0E31) || \
576: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
577: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
578: ((c) == 0x0EB1) || \
579: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
580: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
581: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
582: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
583: ((c) == 0x0F35) || \
584: ((c) == 0x0F37) || \
585: ((c) == 0x0F39) || \
586: ((c) == 0x0F3E) || \
587: ((c) == 0x0F3F) || \
588: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
589: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
590: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
591: ((c) == 0x0F97) || \
592: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
593: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
594: ((c) == 0x0FB9) || \
595: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
596: ((c) == 0x20E1) || \
597: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
598: ((c) == 0x3099) || \
599: ((c) == 0x309A))
1.3 veillard 600:
1.22 daniel 601: /*
602: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
603: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
604: * [#x309D-#x309E] | [#x30FC-#x30FE]
605: */
1.3 veillard 606: #define IS_EXTENDER(c) \
607: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
608: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
609: ((c) == 0xec6) || ((c) == 0x3005) \
610: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
611: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 612: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 613:
1.22 daniel 614: /*
615: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
616: */
1.1 veillard 617: #define IS_IDEOGRAPHIC(c) \
618: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
619: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
620: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
621: ((c) == 0x3007))
622:
1.22 daniel 623: /*
624: * [84] Letter ::= BaseChar | Ideographic
625: */
1.1 veillard 626: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
627:
628: #else
1.55 daniel 629: #ifndef USE_UTF_8
1.30 daniel 630: /************************************************************************
631: * *
1.55 daniel 632: * 8bits / ISO-Latin version of the macros. *
1.30 daniel 633: * *
634: ************************************************************************/
1.1 veillard 635: /*
1.22 daniel 636: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
637: * | [#x10000-#x10FFFF]
638: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 639: */
640: #define IS_CHAR(c) \
1.59 daniel 641: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
642: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
643: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
1.1 veillard 644:
1.22 daniel 645: /*
646: * [85] BaseChar ::= ... long list see REC ...
647: */
1.1 veillard 648: #define IS_BASECHAR(c) \
649: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
650: (((c) >= 0x61) && ((c) <= 0x7a)) || \
651: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
652: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
653: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
654: (((c) >= 0xf8) && ((c) <= 0xff)) || \
655: ((c) == 0xba))
656:
1.22 daniel 657: /*
658: * [88] Digit ::= ... long list see REC ...
659: */
1.1 veillard 660: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
661:
1.22 daniel 662: /*
663: * [84] Letter ::= BaseChar | Ideographic
664: */
1.1 veillard 665: #define IS_LETTER(c) IS_BASECHAR(c)
666:
1.22 daniel 667:
668: /*
669: * [87] CombiningChar ::= ... long list see REC ...
670: */
1.1 veillard 671: #define IS_COMBINING(c) 0
672:
1.22 daniel 673: /*
674: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
675: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
676: * [#x309D-#x309E] | [#x30FC-#x30FE]
677: */
1.3 veillard 678: #define IS_EXTENDER(c) ((c) == 0xb7)
679:
1.55 daniel 680: #else /* USE_UTF_8 */
681: /************************************************************************
682: * *
683: * 8bits / UTF-8 version of the macros. *
684: * *
685: ************************************************************************/
686:
687: TODO !!!
688: #endif /* USE_UTF_8 */
1.21 daniel 689: #endif /* !UNICODE */
1.1 veillard 690:
1.22 daniel 691: /*
692: * Blank chars.
693: *
694: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
695: */
696: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
697: ((c) == 0x0D))
698:
699: /*
700: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
701: */
1.21 daniel 702: #define IS_PUBIDCHAR(c) \
703: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
704: (((c) >= 'a') && ((c) <= 'z')) || \
705: (((c) >= 'A') && ((c) <= 'Z')) || \
706: (((c) >= '0') && ((c) <= '9')) || \
707: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
708: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
709: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
710: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
711: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 712:
713: #define SKIP_EOL(p) \
714: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
715: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
716:
717: #define MOVETO_ENDTAG(p) \
1.39 daniel 718: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 719:
720: #define MOVETO_STARTTAG(p) \
1.39 daniel 721: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 722:
1.28 daniel 723: /************************************************************************
724: * *
725: * Commodity functions to handle CHARs *
726: * *
727: ************************************************************************/
728:
1.50 daniel 729: /**
730: * xmlStrndup:
731: * @cur: the input CHAR *
732: * @len: the len of @cur
733: *
734: * a strndup for array of CHAR's
1.68 daniel 735: *
736: * Returns a new CHAR * or NULL
1.1 veillard 737: */
1.55 daniel 738: CHAR *
739: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 740: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
741:
742: if (ret == NULL) {
743: fprintf(stderr, "malloc of %d byte failed\n",
744: (len + 1) * sizeof(CHAR));
745: return(NULL);
746: }
747: memcpy(ret, cur, len * sizeof(CHAR));
748: ret[len] = 0;
749: return(ret);
750: }
751:
1.50 daniel 752: /**
753: * xmlStrdup:
754: * @cur: the input CHAR *
755: *
756: * a strdup for array of CHAR's
1.68 daniel 757: *
758: * Returns a new CHAR * or NULL
1.1 veillard 759: */
1.55 daniel 760: CHAR *
761: xmlStrdup(const CHAR *cur) {
1.6 httpng 762: const CHAR *p = cur;
1.1 veillard 763:
764: while (IS_CHAR(*p)) p++;
765: return(xmlStrndup(cur, p - cur));
766: }
767:
1.50 daniel 768: /**
769: * xmlCharStrndup:
770: * @cur: the input char *
771: * @len: the len of @cur
772: *
773: * a strndup for char's to CHAR's
1.68 daniel 774: *
775: * Returns a new CHAR * or NULL
1.45 daniel 776: */
777:
1.55 daniel 778: CHAR *
779: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 780: int i;
781: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
782:
783: if (ret == NULL) {
784: fprintf(stderr, "malloc of %d byte failed\n",
785: (len + 1) * sizeof(CHAR));
786: return(NULL);
787: }
788: for (i = 0;i < len;i++)
789: ret[i] = (CHAR) cur[i];
790: ret[len] = 0;
791: return(ret);
792: }
793:
1.50 daniel 794: /**
795: * xmlCharStrdup:
796: * @cur: the input char *
797: * @len: the len of @cur
798: *
799: * a strdup for char's to CHAR's
1.68 daniel 800: *
801: * Returns a new CHAR * or NULL
1.45 daniel 802: */
803:
1.55 daniel 804: CHAR *
805: xmlCharStrdup(const char *cur) {
1.45 daniel 806: const char *p = cur;
807:
808: while (*p != '\0') p++;
809: return(xmlCharStrndup(cur, p - cur));
810: }
811:
1.50 daniel 812: /**
813: * xmlStrcmp:
814: * @str1: the first CHAR *
815: * @str2: the second CHAR *
816: *
817: * a strcmp for CHAR's
1.68 daniel 818: *
819: * Returns the integer result of the comparison
1.14 veillard 820: */
821:
1.55 daniel 822: int
823: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 824: register int tmp;
825:
826: do {
827: tmp = *str1++ - *str2++;
828: if (tmp != 0) return(tmp);
829: } while ((*str1 != 0) && (*str2 != 0));
830: return (*str1 - *str2);
831: }
832:
1.50 daniel 833: /**
834: * xmlStrncmp:
835: * @str1: the first CHAR *
836: * @str2: the second CHAR *
837: * @len: the max comparison length
838: *
839: * a strncmp for CHAR's
1.68 daniel 840: *
841: * Returns the integer result of the comparison
1.14 veillard 842: */
843:
1.55 daniel 844: int
845: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 846: register int tmp;
847:
848: if (len <= 0) return(0);
849: do {
850: tmp = *str1++ - *str2++;
851: if (tmp != 0) return(tmp);
852: len--;
853: if (len <= 0) return(0);
854: } while ((*str1 != 0) && (*str2 != 0));
855: return (*str1 - *str2);
856: }
857:
1.50 daniel 858: /**
859: * xmlStrchr:
860: * @str: the CHAR * array
861: * @val: the CHAR to search
862: *
863: * a strchr for CHAR's
1.68 daniel 864: *
865: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 866: */
867:
1.55 daniel 868: CHAR *
869: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 870: while (*str != 0) {
871: if (*str == val) return((CHAR *) str);
872: str++;
873: }
874: return(NULL);
875: }
1.28 daniel 876:
1.50 daniel 877: /**
878: * xmlStrlen:
879: * @str: the CHAR * array
880: *
881: * lenght of a CHAR's string
1.68 daniel 882: *
883: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 884: */
885:
1.55 daniel 886: int
887: xmlStrlen(const CHAR *str) {
1.45 daniel 888: int len = 0;
889:
890: if (str == NULL) return(0);
891: while (*str != 0) {
892: str++;
893: len++;
894: }
895: return(len);
896: }
897:
1.50 daniel 898: /**
899: * xmlStrncat:
1.68 daniel 900: * @cur: the original CHAR * array
1.50 daniel 901: * @add: the CHAR * array added
902: * @len: the length of @add
903: *
904: * a strncat for array of CHAR's
1.68 daniel 905: *
906: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 907: */
908:
1.55 daniel 909: CHAR *
910: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 911: int size;
912: CHAR *ret;
913:
914: if ((add == NULL) || (len == 0))
915: return(cur);
916: if (cur == NULL)
917: return(xmlStrndup(add, len));
918:
919: size = xmlStrlen(cur);
920: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
921: if (ret == NULL) {
922: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
923: (size + len + 1) * sizeof(CHAR));
924: return(cur);
925: }
926: memcpy(&ret[size], add, len * sizeof(CHAR));
927: ret[size + len] = 0;
928: return(ret);
929: }
930:
1.50 daniel 931: /**
932: * xmlStrcat:
1.68 daniel 933: * @cur: the original CHAR * array
1.50 daniel 934: * @add: the CHAR * array added
935: *
936: * a strcat for array of CHAR's
1.68 daniel 937: *
938: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 939: */
1.55 daniel 940: CHAR *
941: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 942: const CHAR *p = add;
943:
944: if (add == NULL) return(cur);
945: if (cur == NULL)
946: return(xmlStrdup(add));
947:
948: while (IS_CHAR(*p)) p++;
949: return(xmlStrncat(cur, add, p - add));
950: }
951:
952: /************************************************************************
953: * *
954: * Commodity functions, cleanup needed ? *
955: * *
956: ************************************************************************/
957:
1.50 daniel 958: /**
959: * areBlanks:
960: * @ctxt: an XML parser context
961: * @str: a CHAR *
962: * @len: the size of @str
963: *
1.45 daniel 964: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 965: *
966: * TODO: to be corrected accodingly to DTD information if available
1.68 daniel 967: *
968: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 969: */
970:
971: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
972: int i;
973: xmlNodePtr lastChild;
974:
975: for (i = 0;i < len;i++)
976: if (!(IS_BLANK(str[i]))) return(0);
977:
978: if (CUR != '<') return(0);
979: lastChild = xmlGetLastChild(ctxt->node);
980: if (lastChild == NULL) {
981: if (ctxt->node->content != NULL) return(0);
982: } else if (xmlNodeIsText(lastChild))
983: return(0);
984: return(1);
985: }
986:
1.50 daniel 987: /**
988: * xmlHandleEntity:
989: * @ctxt: an XML parser context
990: * @entity: an XML entity pointer.
991: *
992: * Default handling of defined entities, when should we define a new input
1.45 daniel 993: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 994: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 995: */
996:
1.55 daniel 997: void
998: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 999: int len;
1.50 daniel 1000: xmlParserInputPtr input;
1.45 daniel 1001:
1002: if (entity->content == NULL) {
1.55 daniel 1003: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1004: ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1005: entity->name);
1.59 daniel 1006: ctxt->wellFormed = 0;
1.45 daniel 1007: return;
1008: }
1009: len = xmlStrlen(entity->content);
1010: if (len <= 2) goto handle_as_char;
1011:
1012: /*
1013: * Redefine its content as an input stream.
1014: */
1.50 daniel 1015: input = xmlNewEntityInputStream(ctxt, entity);
1016: xmlPushInput(ctxt, input);
1.45 daniel 1017: return;
1018:
1019: handle_as_char:
1020: /*
1021: * Just handle the content as a set of chars.
1022: */
1023: if (ctxt->sax != NULL)
1024: ctxt->sax->characters(ctxt, entity->content, 0, len);
1025:
1026: }
1027:
1028: /*
1029: * Forward definition for recusive behaviour.
1030: */
1031: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.50 daniel 1032: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
1033: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1034:
1.28 daniel 1035: /************************************************************************
1036: * *
1037: * Extra stuff for namespace support *
1038: * Relates to http://www.w3.org/TR/WD-xml-names *
1039: * *
1040: ************************************************************************/
1041:
1.50 daniel 1042: /**
1043: * xmlNamespaceParseNCName:
1044: * @ctxt: an XML parser context
1045: *
1046: * parse an XML namespace name.
1.28 daniel 1047: *
1048: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1049: *
1050: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1051: * CombiningChar | Extender
1.68 daniel 1052: *
1053: * Returns the namespace name or NULL
1.28 daniel 1054: */
1055:
1.55 daniel 1056: CHAR *
1057: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.28 daniel 1058: const CHAR *q;
1059: CHAR *ret = NULL;
1060:
1.40 daniel 1061: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1062: q = NEXT;
1.28 daniel 1063:
1.40 daniel 1064: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1065: (CUR == '.') || (CUR == '-') ||
1066: (CUR == '_') ||
1067: (IS_COMBINING(CUR)) ||
1068: (IS_EXTENDER(CUR)))
1069: NEXT;
1.28 daniel 1070:
1.40 daniel 1071: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 1072:
1073: return(ret);
1074: }
1075:
1.50 daniel 1076: /**
1077: * xmlNamespaceParseQName:
1078: * @ctxt: an XML parser context
1079: * @prefix: a CHAR **
1080: *
1081: * parse an XML qualified name
1.28 daniel 1082: *
1083: * [NS 5] QName ::= (Prefix ':')? LocalPart
1084: *
1085: * [NS 6] Prefix ::= NCName
1086: *
1087: * [NS 7] LocalPart ::= NCName
1.68 daniel 1088: *
1089: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1090: * to get the Prefix if any.
1.28 daniel 1091: */
1092:
1.55 daniel 1093: CHAR *
1094: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1095: CHAR *ret = NULL;
1096:
1097: *prefix = NULL;
1098: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1099: if (CUR == ':') {
1.28 daniel 1100: *prefix = ret;
1.40 daniel 1101: NEXT;
1.28 daniel 1102: ret = xmlNamespaceParseNCName(ctxt);
1103: }
1104:
1105: return(ret);
1106: }
1107:
1.50 daniel 1108: /**
1109: * xmlNamespaceParseNSDef:
1110: * @ctxt: an XML parser context
1111: *
1112: * parse a namespace prefix declaration
1.28 daniel 1113: *
1114: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1115: *
1116: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1117: *
1118: * Returns the namespace name
1.28 daniel 1119: */
1120:
1.55 daniel 1121: CHAR *
1122: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1123: CHAR *name = NULL;
1124:
1.40 daniel 1125: if ((CUR == 'x') && (NXT(1) == 'm') &&
1126: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1127: (NXT(4) == 's')) {
1128: SKIP(5);
1129: if (CUR == ':') {
1130: NEXT;
1.28 daniel 1131: name = xmlNamespaceParseNCName(ctxt);
1132: }
1133: }
1.39 daniel 1134: return(name);
1.28 daniel 1135: }
1136:
1.50 daniel 1137: /**
1138: * xmlParseQuotedString:
1139: * @ctxt: an XML parser context
1140: *
1.45 daniel 1141: * [OLD] Parse and return a string between quotes or doublequotes
1.68 daniel 1142: *
1143: * Returns the string parser or NULL.
1.45 daniel 1144: */
1.55 daniel 1145: CHAR *
1146: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1147: CHAR *ret = NULL;
1148: const CHAR *q;
1149:
1150: if (CUR == '"') {
1151: NEXT;
1152: q = CUR_PTR;
1153: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1154: if (CUR != '"') {
1155: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1156: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 daniel 1157: ctxt->wellFormed = 0;
1.55 daniel 1158: } else {
1.45 daniel 1159: ret = xmlStrndup(q, CUR_PTR - q);
1160: NEXT;
1161: }
1162: } else if (CUR == '\''){
1163: NEXT;
1164: q = CUR_PTR;
1165: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1166: if (CUR != '\'') {
1167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1168: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 daniel 1169: ctxt->wellFormed = 0;
1.55 daniel 1170: } else {
1.45 daniel 1171: ret = xmlStrndup(q, CUR_PTR - q);
1172: NEXT;
1173: }
1174: }
1175: return(ret);
1176: }
1177:
1.50 daniel 1178: /**
1179: * xmlParseNamespace:
1180: * @ctxt: an XML parser context
1181: *
1.45 daniel 1182: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1183: *
1184: * This is what the older xml-name Working Draft specified, a bunch of
1185: * other stuff may still rely on it, so support is still here as
1186: * if ot was declared on the root of the Tree:-(
1187: */
1188:
1.55 daniel 1189: void
1190: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1191: CHAR *href = NULL;
1192: CHAR *prefix = NULL;
1193: int garbage = 0;
1194:
1195: /*
1196: * We just skipped "namespace" or "xml:namespace"
1197: */
1198: SKIP_BLANKS;
1199:
1200: while (IS_CHAR(CUR) && (CUR != '>')) {
1201: /*
1202: * We can have "ns" or "prefix" attributes
1203: * Old encoding as 'href' or 'AS' attributes is still supported
1204: */
1205: if ((CUR == 'n') && (NXT(1) == 's')) {
1206: garbage = 0;
1207: SKIP(2);
1208: SKIP_BLANKS;
1209:
1210: if (CUR != '=') continue;
1211: NEXT;
1212: SKIP_BLANKS;
1213:
1214: href = xmlParseQuotedString(ctxt);
1215: SKIP_BLANKS;
1216: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1217: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1218: garbage = 0;
1219: SKIP(4);
1220: SKIP_BLANKS;
1221:
1222: if (CUR != '=') continue;
1223: NEXT;
1224: SKIP_BLANKS;
1225:
1226: href = xmlParseQuotedString(ctxt);
1227: SKIP_BLANKS;
1228: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1229: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1230: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1231: garbage = 0;
1232: SKIP(6);
1233: SKIP_BLANKS;
1234:
1235: if (CUR != '=') continue;
1236: NEXT;
1237: SKIP_BLANKS;
1238:
1239: prefix = xmlParseQuotedString(ctxt);
1240: SKIP_BLANKS;
1241: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1242: garbage = 0;
1243: SKIP(2);
1244: SKIP_BLANKS;
1245:
1246: if (CUR != '=') continue;
1247: NEXT;
1248: SKIP_BLANKS;
1249:
1250: prefix = xmlParseQuotedString(ctxt);
1251: SKIP_BLANKS;
1252: } else if ((CUR == '?') && (NXT(1) == '>')) {
1253: garbage = 0;
1254: CUR_PTR ++;
1255: } else {
1256: /*
1257: * Found garbage when parsing the namespace
1258: */
1259: if (!garbage)
1.55 daniel 1260: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1261: ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
1.59 daniel 1262: ctxt->wellFormed = 0;
1.45 daniel 1263: NEXT;
1264: }
1265: }
1266:
1267: MOVETO_ENDTAG(CUR_PTR);
1268: NEXT;
1269:
1270: /*
1271: * Register the DTD.
1272: */
1273: if (href != NULL)
1274: xmlNewGlobalNs(ctxt->doc, href, prefix);
1275:
1276: if (prefix != NULL) free(prefix);
1277: if (href != NULL) free(href);
1278: }
1279:
1.28 daniel 1280: /************************************************************************
1281: * *
1282: * The parser itself *
1283: * Relates to http://www.w3.org/TR/REC-xml *
1284: * *
1285: ************************************************************************/
1.14 veillard 1286:
1.50 daniel 1287: /**
1288: * xmlParseName:
1289: * @ctxt: an XML parser context
1290: *
1291: * parse an XML name.
1.22 daniel 1292: *
1293: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1294: * CombiningChar | Extender
1295: *
1296: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1297: *
1298: * [6] Names ::= Name (S Name)*
1.68 daniel 1299: *
1300: * Returns the Name parsed or NULL
1.1 veillard 1301: */
1302:
1.55 daniel 1303: CHAR *
1304: xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1305: const CHAR *q;
1306: CHAR *ret = NULL;
1.1 veillard 1307:
1.40 daniel 1308: if (!IS_LETTER(CUR) && (CUR != '_') &&
1309: (CUR != ':')) return(NULL);
1310: q = NEXT;
1311:
1312: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1313: (CUR == '.') || (CUR == '-') ||
1314: (CUR == '_') || (CUR == ':') ||
1315: (IS_COMBINING(CUR)) ||
1316: (IS_EXTENDER(CUR)))
1317: NEXT;
1.22 daniel 1318:
1.40 daniel 1319: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1320:
1321: return(ret);
1322: }
1323:
1.50 daniel 1324: /**
1325: * xmlParseNmtoken:
1326: * @ctxt: an XML parser context
1327: *
1328: * parse an XML Nmtoken.
1.22 daniel 1329: *
1330: * [7] Nmtoken ::= (NameChar)+
1331: *
1332: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1333: *
1334: * Returns the Nmtoken parsed or NULL
1.22 daniel 1335: */
1336:
1.55 daniel 1337: CHAR *
1338: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.22 daniel 1339: const CHAR *q;
1340: CHAR *ret = NULL;
1341:
1.40 daniel 1342: q = NEXT;
1.22 daniel 1343:
1.40 daniel 1344: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1345: (CUR == '.') || (CUR == '-') ||
1346: (CUR == '_') || (CUR == ':') ||
1347: (IS_COMBINING(CUR)) ||
1348: (IS_EXTENDER(CUR)))
1349: NEXT;
1.3 veillard 1350:
1.40 daniel 1351: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1352:
1.3 veillard 1353: return(ret);
1.1 veillard 1354: }
1355:
1.50 daniel 1356: /**
1357: * xmlParseEntityValue:
1358: * @ctxt: an XML parser context
1359: *
1360: * parse a value for ENTITY decl.
1.24 daniel 1361: *
1362: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1363: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1364: *
1365: * Returns the EntityValue parsed or NULL
1.24 daniel 1366: */
1367:
1.55 daniel 1368: CHAR *
1369: xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1370: CHAR *ret = NULL, *cur;
1.24 daniel 1371: const CHAR *q;
1372:
1.40 daniel 1373: if (CUR == '"') {
1374: NEXT;
1.24 daniel 1375:
1.40 daniel 1376: q = CUR_PTR;
1377: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1378: if (CUR == '%') {
1.46 daniel 1379: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1380: cur = xmlParsePEReference(ctxt);
1.46 daniel 1381: ret = xmlStrcat(ret, cur);
1382: q = CUR_PTR;
1.40 daniel 1383: } else if (CUR == '&') {
1.46 daniel 1384: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1385: cur = xmlParseReference(ctxt);
1386: if (cur != NULL) {
1387: CHAR buf[2];
1388: buf[0] = '&';
1389: buf[1] = 0;
1390: ret = xmlStrncat(ret, buf, 1);
1391: ret = xmlStrcat(ret, cur);
1392: buf[0] = ';';
1393: buf[1] = 0;
1394: ret = xmlStrncat(ret, buf, 1);
1395: }
1.46 daniel 1396: q = CUR_PTR;
1.24 daniel 1397: } else
1.40 daniel 1398: NEXT;
1.24 daniel 1399: }
1.40 daniel 1400: if (!IS_CHAR(CUR)) {
1.55 daniel 1401: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1402: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 daniel 1403: ctxt->wellFormed = 0;
1.24 daniel 1404: } else {
1.46 daniel 1405: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1406: NEXT;
1.24 daniel 1407: }
1.40 daniel 1408: } else if (CUR == '\'') {
1409: NEXT;
1410: q = CUR_PTR;
1411: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1412: if (CUR == '%') {
1.46 daniel 1413: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1414: cur = xmlParsePEReference(ctxt);
1.46 daniel 1415: ret = xmlStrcat(ret, cur);
1416: q = CUR_PTR;
1.40 daniel 1417: } else if (CUR == '&') {
1.46 daniel 1418: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1419: cur = xmlParseReference(ctxt);
1420: if (cur != NULL) {
1421: CHAR buf[2];
1422: buf[0] = '&';
1423: buf[1] = 0;
1424: ret = xmlStrncat(ret, buf, 1);
1425: ret = xmlStrcat(ret, cur);
1426: buf[0] = ';';
1427: buf[1] = 0;
1428: ret = xmlStrncat(ret, buf, 1);
1429: }
1.46 daniel 1430: q = CUR_PTR;
1.24 daniel 1431: } else
1.40 daniel 1432: NEXT;
1.24 daniel 1433: }
1.40 daniel 1434: if (!IS_CHAR(CUR)) {
1.55 daniel 1435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1436: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 daniel 1437: ctxt->wellFormed = 0;
1.24 daniel 1438: } else {
1.46 daniel 1439: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1440: NEXT;
1.24 daniel 1441: }
1442: } else {
1.55 daniel 1443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1444: ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.59 daniel 1445: ctxt->wellFormed = 0;
1.24 daniel 1446: }
1447:
1448: return(ret);
1449: }
1450:
1.50 daniel 1451: /**
1452: * xmlParseAttValue:
1453: * @ctxt: an XML parser context
1454: *
1455: * parse a value for an attribute
1.29 daniel 1456: *
1457: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1458: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 1459: *
1460: * Returns the AttValue parsed or NULL.
1.29 daniel 1461: */
1462:
1.55 daniel 1463: CHAR *
1464: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1465: CHAR *ret = NULL, *cur;
1.29 daniel 1466: const CHAR *q;
1467:
1.40 daniel 1468: if (CUR == '"') {
1469: NEXT;
1.29 daniel 1470:
1.40 daniel 1471: q = CUR_PTR;
1472: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1.59 daniel 1473: if (CUR == '<') {
1474: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1475: ctxt->sax->error(ctxt,
1476: "Unescaped '<' not allowed in attributes values\n");
1477: ctxt->wellFormed = 0;
1478: }
1.40 daniel 1479: if (CUR == '&') {
1.46 daniel 1480: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1481: cur = xmlParseReference(ctxt);
1482: if (cur != NULL) {
1483: /*
1484: * Special case for '&', we don't want to
1485: * resolve it here since it will break later
1486: * when searching entities in the string.
1487: */
1488: if ((cur[0] == '&') && (cur[1] == 0)) {
1489: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1490: ret = xmlStrncat(ret, buf, 5);
1491: } else
1492: ret = xmlStrcat(ret, cur);
1493: free(cur);
1494: }
1.46 daniel 1495: q = CUR_PTR;
1.29 daniel 1496: } else
1.40 daniel 1497: NEXT;
1.50 daniel 1498: /*
1499: * Pop out finished entity references.
1500: */
1501: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1502: if (CUR_PTR != q)
1503: ret = xmlStrncat(ret, q, CUR_PTR - q);
1504: xmlPopInput(ctxt);
1505: q = CUR_PTR;
1506: }
1.29 daniel 1507: }
1.40 daniel 1508: if (!IS_CHAR(CUR)) {
1.55 daniel 1509: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1510: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 daniel 1511: ctxt->wellFormed = 0;
1.29 daniel 1512: } else {
1.46 daniel 1513: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1514: NEXT;
1.29 daniel 1515: }
1.40 daniel 1516: } else if (CUR == '\'') {
1517: NEXT;
1518: q = CUR_PTR;
1519: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1.59 daniel 1520: if (CUR == '<') {
1521: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1522: ctxt->sax->error(ctxt,
1523: "Unescaped '<' not allowed in attributes values\n");
1524: ctxt->wellFormed = 0;
1525: }
1.40 daniel 1526: if (CUR == '&') {
1.46 daniel 1527: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1528: cur = xmlParseReference(ctxt);
1529: if (cur != NULL) {
1530: /*
1531: * Special case for '&', we don't want to
1532: * resolve it here since it will break later
1533: * when searching entities in the string.
1534: */
1535: if ((cur[0] == '&') && (cur[1] == 0)) {
1536: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1537: ret = xmlStrncat(ret, buf, 5);
1538: } else
1539: ret = xmlStrcat(ret, cur);
1540: free(cur);
1541: }
1.46 daniel 1542: q = CUR_PTR;
1.29 daniel 1543: } else
1.40 daniel 1544: NEXT;
1.50 daniel 1545: /*
1546: * Pop out finished entity references.
1547: */
1548: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1549: if (CUR_PTR != q)
1550: ret = xmlStrncat(ret, q, CUR_PTR - q);
1551: xmlPopInput(ctxt);
1552: q = CUR_PTR;
1553: }
1.29 daniel 1554: }
1.40 daniel 1555: if (!IS_CHAR(CUR)) {
1.55 daniel 1556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1557: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 daniel 1558: ctxt->wellFormed = 0;
1.29 daniel 1559: } else {
1.46 daniel 1560: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1561: NEXT;
1.29 daniel 1562: }
1563: } else {
1.55 daniel 1564: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1565: ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
1.59 daniel 1566: ctxt->wellFormed = 0;
1.29 daniel 1567: }
1568:
1569: return(ret);
1570: }
1571:
1.50 daniel 1572: /**
1573: * xmlParseSystemLiteral:
1574: * @ctxt: an XML parser context
1575: *
1576: * parse an XML Literal
1.21 daniel 1577: *
1.22 daniel 1578: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 1579: *
1580: * Returns the SystemLiteral parsed or NULL
1.21 daniel 1581: */
1582:
1.55 daniel 1583: CHAR *
1584: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1585: const CHAR *q;
1586: CHAR *ret = NULL;
1587:
1.40 daniel 1588: if (CUR == '"') {
1589: NEXT;
1590: q = CUR_PTR;
1591: while ((IS_CHAR(CUR)) && (CUR != '"'))
1592: NEXT;
1593: if (!IS_CHAR(CUR)) {
1.55 daniel 1594: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1595: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 daniel 1596: ctxt->wellFormed = 0;
1.21 daniel 1597: } else {
1.40 daniel 1598: ret = xmlStrndup(q, CUR_PTR - q);
1599: NEXT;
1.21 daniel 1600: }
1.40 daniel 1601: } else if (CUR == '\'') {
1602: NEXT;
1603: q = CUR_PTR;
1604: while ((IS_CHAR(CUR)) && (CUR != '\''))
1605: NEXT;
1606: if (!IS_CHAR(CUR)) {
1.55 daniel 1607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 daniel 1609: ctxt->wellFormed = 0;
1.21 daniel 1610: } else {
1.40 daniel 1611: ret = xmlStrndup(q, CUR_PTR - q);
1612: NEXT;
1.21 daniel 1613: }
1614: } else {
1.55 daniel 1615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 daniel 1617: ctxt->wellFormed = 0;
1.21 daniel 1618: }
1619:
1620: return(ret);
1621: }
1622:
1.50 daniel 1623: /**
1624: * xmlParsePubidLiteral:
1625: * @ctxt: an XML parser context
1.21 daniel 1626: *
1.50 daniel 1627: * parse an XML public literal
1.68 daniel 1628: *
1629: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1630: *
1631: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 1632: */
1633:
1.55 daniel 1634: CHAR *
1635: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1636: const CHAR *q;
1637: CHAR *ret = NULL;
1638: /*
1639: * Name ::= (Letter | '_') (NameChar)*
1640: */
1.40 daniel 1641: if (CUR == '"') {
1642: NEXT;
1643: q = CUR_PTR;
1644: while (IS_PUBIDCHAR(CUR)) NEXT;
1645: if (CUR != '"') {
1.55 daniel 1646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1647: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 daniel 1648: ctxt->wellFormed = 0;
1.21 daniel 1649: } else {
1.40 daniel 1650: ret = xmlStrndup(q, CUR_PTR - q);
1651: NEXT;
1.21 daniel 1652: }
1.40 daniel 1653: } else if (CUR == '\'') {
1654: NEXT;
1655: q = CUR_PTR;
1656: while ((IS_LETTER(CUR)) && (CUR != '\''))
1657: NEXT;
1658: if (!IS_LETTER(CUR)) {
1.55 daniel 1659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 daniel 1661: ctxt->wellFormed = 0;
1.21 daniel 1662: } else {
1.40 daniel 1663: ret = xmlStrndup(q, CUR_PTR - q);
1664: NEXT;
1.21 daniel 1665: }
1666: } else {
1.55 daniel 1667: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1668: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 daniel 1669: ctxt->wellFormed = 0;
1.21 daniel 1670: }
1671:
1672: return(ret);
1673: }
1674:
1.50 daniel 1675: /**
1676: * xmlParseCharData:
1677: * @ctxt: an XML parser context
1678: * @cdata: int indicating whether we are within a CDATA section
1679: *
1680: * parse a CharData section.
1681: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1682: *
1683: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1684: */
1685:
1.55 daniel 1686: void
1687: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1688: const CHAR *q;
1689:
1.40 daniel 1690: q = CUR_PTR;
1691: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1692: (CUR != '&')) {
1.59 daniel 1693: if ((CUR == ']') && (NXT(1) == ']') &&
1694: (NXT(2) == '>')) {
1695: if (cdata) break;
1696: else {
1697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698: ctxt->sax->error(ctxt,
1699: "Sequence ']]>' not allowed in content\n");
1700: ctxt->wellFormed = 0;
1701: }
1702: }
1.40 daniel 1703: NEXT;
1.27 daniel 1704: }
1.45 daniel 1705: if (q == CUR_PTR) return;
1706:
1707: /*
1708: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1709: */
1710: if (ctxt->sax != NULL) {
1711: if (areBlanks(ctxt, q, CUR_PTR - q))
1712: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1713: else
1714: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1715: }
1.27 daniel 1716: }
1717:
1.50 daniel 1718: /**
1719: * xmlParseExternalID:
1720: * @ctxt: an XML parser context
1721: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 1722: * @strict: indicate whether we should restrict parsing to only
1723: * production [75], see NOTE below
1.50 daniel 1724: *
1.67 daniel 1725: * Parse an External ID or a Public ID
1726: *
1727: * NOTE: Productions [75] and [83] interract badly since [75] can generate
1728: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 1729: *
1730: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1731: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 1732: *
1733: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1734: *
1.68 daniel 1735: * Returns the function returns SystemLiteral and in the second
1.67 daniel 1736: * case publicID receives PubidLiteral, is strict is off
1737: * it is possible to return NULL and have publicID set.
1.22 daniel 1738: */
1739:
1.55 daniel 1740: CHAR *
1.67 daniel 1741: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 1742: CHAR *URI = NULL;
1.22 daniel 1743:
1.40 daniel 1744: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1745: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1746: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1747: SKIP(6);
1.59 daniel 1748: if (!IS_BLANK(CUR)) {
1749: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1750: ctxt->sax->error(ctxt,
1751: "Space required after 'SYSTEM'\n");
1752: ctxt->wellFormed = 0;
1753: }
1.42 daniel 1754: SKIP_BLANKS;
1.39 daniel 1755: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1756: if (URI == NULL) {
1.55 daniel 1757: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1758: ctxt->sax->error(ctxt,
1.39 daniel 1759: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 1760: ctxt->wellFormed = 0;
1761: }
1.40 daniel 1762: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1763: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1764: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1765: SKIP(6);
1.59 daniel 1766: if (!IS_BLANK(CUR)) {
1767: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1768: ctxt->sax->error(ctxt,
1769: "Space required after 'PUBLIC'\n");
1770: ctxt->wellFormed = 0;
1771: }
1.42 daniel 1772: SKIP_BLANKS;
1.39 daniel 1773: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 1774: if (*publicID == NULL) {
1.55 daniel 1775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1776: ctxt->sax->error(ctxt,
1.39 daniel 1777: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 1778: ctxt->wellFormed = 0;
1779: }
1.67 daniel 1780: if (strict) {
1781: /*
1782: * We don't handle [83] so "S SystemLiteral" is required.
1783: */
1784: if (!IS_BLANK(CUR)) {
1785: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1786: ctxt->sax->error(ctxt,
1787: "Space required after the Public Identifier\n");
1788: ctxt->wellFormed = 0;
1789: }
1790: } else {
1791: /*
1792: * We handle [83] so we return immediately, if
1793: * "S SystemLiteral" is not detected. From a purely parsing
1794: * point of view that's a nice mess.
1795: */
1796: const CHAR *ptr = CUR_PTR;
1797: if (!IS_BLANK(*ptr)) return(NULL);
1798:
1799: while (IS_BLANK(*ptr)) ptr++;
1800: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 1801: }
1.42 daniel 1802: SKIP_BLANKS;
1.39 daniel 1803: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1804: if (URI == NULL) {
1.55 daniel 1805: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1806: ctxt->sax->error(ctxt,
1.39 daniel 1807: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 1808: ctxt->wellFormed = 0;
1809: }
1.22 daniel 1810: }
1.39 daniel 1811: return(URI);
1.22 daniel 1812: }
1813:
1.50 daniel 1814: /**
1815: * xmlParseComment:
1.69 daniel 1816: * @ctxt: an XML parser context
1817: * @create: should we create a node, or just skip the content
1.50 daniel 1818: *
1.3 veillard 1819: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1820: * This may or may not create a node (depending on the context)
1.38 daniel 1821: * The spec says that "For compatibility, the string "--" (double-hyphen)
1822: * must not occur within comments. "
1.22 daniel 1823: *
1824: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.50 daniel 1825: *
1826: * TODO: this should call a SAX function which will handle (or not) the
1827: * creation of the comment !
1.68 daniel 1828: *
1.69 daniel 1829: * Returns the comment node, or NULL
1.3 veillard 1830: */
1.69 daniel 1831: xmlNodePtr
1832: xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1.31 daniel 1833: xmlNodePtr ret = NULL;
1.17 daniel 1834: const CHAR *q, *start;
1835: const CHAR *r;
1.39 daniel 1836: CHAR *val;
1.3 veillard 1837:
1838: /*
1.22 daniel 1839: * Check that there is a comment right here.
1.3 veillard 1840: */
1.40 daniel 1841: if ((CUR != '<') || (NXT(1) != '!') ||
1842: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1843:
1.40 daniel 1844: SKIP(4);
1845: start = q = CUR_PTR;
1846: NEXT;
1847: r = CUR_PTR;
1848: NEXT;
1849: while (IS_CHAR(CUR) &&
1850: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1851: (*r != '-') || (*q != '-'))) {
1.59 daniel 1852: if ((*r == '-') && (*q == '-')) {
1.55 daniel 1853: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1854: ctxt->sax->error(ctxt,
1.38 daniel 1855: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 1856: ctxt->wellFormed = 0;
1857: }
1.40 daniel 1858: NEXT;r++;q++;
1.3 veillard 1859: }
1.40 daniel 1860: if (!IS_CHAR(CUR)) {
1.55 daniel 1861: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1862: ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 1863: ctxt->wellFormed = 0;
1.3 veillard 1864: } else {
1.40 daniel 1865: NEXT;
1.31 daniel 1866: if (create) {
1.39 daniel 1867: val = xmlStrndup(start, q - start);
1.50 daniel 1868: ret = xmlNewDocComment(ctxt->doc, val);
1.39 daniel 1869: free(val);
1.31 daniel 1870: }
1.3 veillard 1871: }
1.39 daniel 1872: return(ret);
1.3 veillard 1873: }
1874:
1.50 daniel 1875: /**
1876: * xmlParsePITarget:
1877: * @ctxt: an XML parser context
1878: *
1879: * parse the name of a PI
1.22 daniel 1880: *
1881: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 1882: *
1883: * Returns the PITarget name or NULL
1.22 daniel 1884: */
1885:
1.55 daniel 1886: CHAR *
1887: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 1888: CHAR *name;
1889:
1890: name = xmlParseName(ctxt);
1891: if ((name != NULL) && (name[3] == 0) &&
1892: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1893: ((name[1] == 'm') || (name[1] == 'M')) &&
1894: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 1895: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1896: ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1897: return(NULL);
1898: }
1899: return(name);
1900: }
1901:
1.50 daniel 1902: /**
1903: * xmlParsePI:
1904: * @ctxt: an XML parser context
1905: *
1906: * parse an XML Processing Instruction.
1.22 daniel 1907: *
1908: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 1909: *
1.69 daniel 1910: * The processing is transfered to SAX once parsed.
1.3 veillard 1911: */
1912:
1.55 daniel 1913: void
1914: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1915: CHAR *target;
1916:
1.40 daniel 1917: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1918: /*
1919: * this is a Processing Instruction.
1920: */
1.40 daniel 1921: SKIP(2);
1.3 veillard 1922:
1923: /*
1.22 daniel 1924: * Parse the target name and check for special support like
1925: * namespace.
1926: *
1927: * TODO : PI handling should be dynamically redefinable using an
1928: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1929: */
1.22 daniel 1930: target = xmlParsePITarget(ctxt);
1931: if (target != NULL) {
1932: /*
1.44 daniel 1933: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1934: */
1935: if ((target[0] == 'n') && (target[1] == 'a') &&
1936: (target[2] == 'm') && (target[3] == 'e') &&
1937: (target[4] == 's') && (target[5] == 'p') &&
1938: (target[6] == 'a') && (target[7] == 'c') &&
1939: (target[8] == 'e')) {
1940: xmlParseNamespace(ctxt);
1941: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1942: (target[2] == 'l') && (target[3] == ':') &&
1943: (target[4] == 'n') && (target[5] == 'a') &&
1944: (target[6] == 'm') && (target[7] == 'e') &&
1945: (target[8] == 's') && (target[9] == 'p') &&
1946: (target[10] == 'a') && (target[11] == 'c') &&
1947: (target[12] == 'e')) {
1948: xmlParseNamespace(ctxt);
1949: } else {
1.44 daniel 1950: const CHAR *q = CUR_PTR;
1951:
1.40 daniel 1952: while (IS_CHAR(CUR) &&
1953: ((CUR != '?') || (NXT(1) != '>')))
1954: NEXT;
1955: if (!IS_CHAR(CUR)) {
1.55 daniel 1956: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 1957: ctxt->sax->error(ctxt,
1958: "xmlParsePI: PI %s never end ...\n", target);
1959: ctxt->wellFormed = 0;
1.44 daniel 1960: } else {
1961: CHAR *data;
1962:
1963: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1964: SKIP(2);
1.44 daniel 1965:
1966: /*
1967: * SAX: PI detected.
1968: */
1969: if (ctxt->sax)
1970: ctxt->sax->processingInstruction(ctxt, target, data);
1971: /*
1972: * Unknown PI, ignore it !
1973: */
1974: else
1975: xmlParserWarning(ctxt,
1976: "xmlParsePI : skipping unknown PI %s\n",
1977: target);
1978: free(data);
1979: }
1.22 daniel 1980: }
1.39 daniel 1981: free(target);
1.3 veillard 1982: } else {
1.55 daniel 1983: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1984: ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
1.59 daniel 1985: ctxt->wellFormed = 0;
1986:
1.22 daniel 1987: /********* Should we try to complete parsing the PI ???
1.40 daniel 1988: while (IS_CHAR(CUR) &&
1989: (CUR != '?') && (CUR != '>'))
1990: NEXT;
1991: if (!IS_CHAR(CUR)) {
1.22 daniel 1992: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1993: target);
1994: }
1995: ********************************************************/
1996: }
1997: }
1998: }
1999:
1.50 daniel 2000: /**
2001: * xmlParseNotationDecl:
2002: * @ctxt: an XML parser context
2003: *
2004: * parse a notation declaration
1.22 daniel 2005: *
2006: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2007: *
2008: * Hence there is actually 3 choices:
2009: * 'PUBLIC' S PubidLiteral
2010: * 'PUBLIC' S PubidLiteral S SystemLiteral
2011: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2012: *
1.67 daniel 2013: * See the NOTE on xmlParseExternalID().
1.22 daniel 2014: */
2015:
1.55 daniel 2016: void
2017: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2018: CHAR *name;
1.67 daniel 2019: CHAR *Pubid;
2020: CHAR *Systemid;
1.22 daniel 2021:
1.40 daniel 2022: if ((CUR == '<') && (NXT(1) == '!') &&
2023: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2024: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2025: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2026: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.40 daniel 2027: SKIP(10);
1.67 daniel 2028: if (!IS_BLANK(CUR)) {
2029: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2030: ctxt->sax->error(ctxt, "Space required after '<!NOTATION'\n");
2031: ctxt->wellFormed = 0;
2032: return;
2033: }
2034: SKIP_BLANKS;
1.22 daniel 2035:
2036: name = xmlParseName(ctxt);
2037: if (name == NULL) {
1.55 daniel 2038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.67 daniel 2039: ctxt->sax->error(ctxt, "NOTATION: Name expected here\n");
2040: ctxt->wellFormed = 0;
2041: return;
2042: }
2043: if (!IS_BLANK(CUR)) {
2044: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2045: ctxt->sax->error(ctxt,
2046: "Space required after the NOTATION name'\n");
1.59 daniel 2047: ctxt->wellFormed = 0;
1.22 daniel 2048: return;
2049: }
1.42 daniel 2050: SKIP_BLANKS;
1.67 daniel 2051:
1.22 daniel 2052: /*
1.67 daniel 2053: * Parse the IDs.
1.22 daniel 2054: */
1.67 daniel 2055: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2056: SKIP_BLANKS;
2057:
2058: if (CUR == '>') {
1.40 daniel 2059: NEXT;
1.67 daniel 2060: xmlAddNotationDecl(ctxt->doc->intSubset, name, Pubid, Systemid);
2061: } else {
2062: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2063: ctxt->sax->error(ctxt,
2064: "'>' required to close NOTATION declaration\n");
2065: ctxt->wellFormed = 0;
2066: }
1.22 daniel 2067: free(name);
1.67 daniel 2068: if (Systemid != NULL) free(Systemid);
2069: if (Pubid != NULL) free(Pubid);
1.22 daniel 2070: }
2071: }
2072:
1.50 daniel 2073: /**
2074: * xmlParseEntityDecl:
2075: * @ctxt: an XML parser context
2076: *
2077: * parse <!ENTITY declarations
1.22 daniel 2078: *
2079: * [70] EntityDecl ::= GEDecl | PEDecl
2080: *
2081: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2082: *
2083: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2084: *
2085: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2086: *
2087: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2088: *
2089: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 2090: */
2091:
1.55 daniel 2092: void
2093: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2094: CHAR *name = NULL;
1.24 daniel 2095: CHAR *value = NULL;
1.39 daniel 2096: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2097: CHAR *ndata = NULL;
1.39 daniel 2098: int isParameter = 0;
1.22 daniel 2099:
1.40 daniel 2100: if ((CUR == '<') && (NXT(1) == '!') &&
2101: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2102: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2103: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.40 daniel 2104: SKIP(8);
1.59 daniel 2105: if (!IS_BLANK(CUR)) {
2106: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2107: ctxt->sax->error(ctxt, "Space required after '<!ENTITY'\n");
2108: ctxt->wellFormed = 0;
2109: }
2110: SKIP_BLANKS;
1.40 daniel 2111:
2112: if (CUR == '%') {
2113: NEXT;
1.59 daniel 2114: if (!IS_BLANK(CUR)) {
2115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2116: ctxt->sax->error(ctxt, "Space required after '%'\n");
2117: ctxt->wellFormed = 0;
2118: }
1.42 daniel 2119: SKIP_BLANKS;
1.39 daniel 2120: isParameter = 1;
1.22 daniel 2121: }
2122:
2123: name = xmlParseName(ctxt);
1.24 daniel 2124: if (name == NULL) {
1.55 daniel 2125: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2126: ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
1.59 daniel 2127: ctxt->wellFormed = 0;
1.24 daniel 2128: return;
2129: }
1.59 daniel 2130: if (!IS_BLANK(CUR)) {
2131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2132: ctxt->sax->error(ctxt,
2133: "Space required after the entity name\n");
2134: ctxt->wellFormed = 0;
2135: }
1.42 daniel 2136: SKIP_BLANKS;
1.24 daniel 2137:
1.22 daniel 2138: /*
1.68 daniel 2139: * handle the various case of definitions...
1.22 daniel 2140: */
1.39 daniel 2141: if (isParameter) {
1.40 daniel 2142: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 2143: value = xmlParseEntityValue(ctxt);
1.39 daniel 2144: if (value) {
2145: xmlAddDocEntity(ctxt->doc, name,
2146: XML_INTERNAL_PARAMETER_ENTITY,
2147: NULL, NULL, value);
2148: }
1.24 daniel 2149: else {
1.67 daniel 2150: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 2151: if (URI) {
2152: xmlAddDocEntity(ctxt->doc, name,
2153: XML_EXTERNAL_PARAMETER_ENTITY,
2154: literal, URI, NULL);
2155: }
1.24 daniel 2156: }
2157: } else {
1.40 daniel 2158: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 2159: value = xmlParseEntityValue(ctxt);
1.39 daniel 2160: xmlAddDocEntity(ctxt->doc, name,
2161: XML_INTERNAL_GENERAL_ENTITY,
2162: NULL, NULL, value);
2163: } else {
1.67 daniel 2164: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 2165: if ((CUR != '>') && (!IS_BLANK(CUR))) {
2166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2167: ctxt->sax->error(ctxt,
2168: "Space required before 'NDATA'\n");
2169: ctxt->wellFormed = 0;
2170: }
1.42 daniel 2171: SKIP_BLANKS;
1.40 daniel 2172: if ((CUR == 'N') && (NXT(1) == 'D') &&
2173: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2174: (NXT(4) == 'A')) {
2175: SKIP(5);
1.59 daniel 2176: if (!IS_BLANK(CUR)) {
2177: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2178: ctxt->sax->error(ctxt,
2179: "Space required after 'NDATA'\n");
2180: ctxt->wellFormed = 0;
2181: }
1.42 daniel 2182: SKIP_BLANKS;
1.24 daniel 2183: ndata = xmlParseName(ctxt);
1.39 daniel 2184: xmlAddDocEntity(ctxt->doc, name,
2185: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2186: literal, URI, ndata);
2187: } else {
2188: xmlAddDocEntity(ctxt->doc, name,
2189: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2190: literal, URI, NULL);
1.24 daniel 2191: }
2192: }
2193: }
1.42 daniel 2194: SKIP_BLANKS;
1.40 daniel 2195: if (CUR != '>') {
1.55 daniel 2196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2197: ctxt->sax->error(ctxt,
1.31 daniel 2198: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 2199: ctxt->wellFormed = 0;
1.24 daniel 2200: } else
1.40 daniel 2201: NEXT;
1.39 daniel 2202: if (name != NULL) free(name);
2203: if (value != NULL) free(value);
2204: if (URI != NULL) free(URI);
2205: if (literal != NULL) free(literal);
2206: if (ndata != NULL) free(ndata);
1.22 daniel 2207: }
2208: }
2209:
1.50 daniel 2210: /**
1.59 daniel 2211: * xmlParseDefaultDecl:
2212: * @ctxt: an XML parser context
2213: * @value: Receive a possible fixed default value for the attribute
2214: *
2215: * Parse an attribute default declaration
2216: *
2217: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2218: *
2219: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
2220: * or XML_ATTRIBUTE_FIXED.
2221: */
2222:
2223: int
2224: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
2225: int val;
2226: CHAR *ret;
2227:
2228: *value = NULL;
2229: if ((CUR == '#') && (NXT(1) == 'R') &&
2230: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
2231: (NXT(4) == 'U') && (NXT(5) == 'I') &&
2232: (NXT(6) == 'R') && (NXT(7) == 'E') &&
2233: (NXT(8) == 'D')) {
2234: SKIP(9);
2235: return(XML_ATTRIBUTE_REQUIRED);
2236: }
2237: if ((CUR == '#') && (NXT(1) == 'I') &&
2238: (NXT(2) == 'M') && (NXT(3) == 'P') &&
2239: (NXT(4) == 'L') && (NXT(5) == 'I') &&
2240: (NXT(6) == 'E') && (NXT(7) == 'D')) {
2241: SKIP(8);
2242: return(XML_ATTRIBUTE_IMPLIED);
2243: }
2244: val = XML_ATTRIBUTE_NONE;
2245: if ((CUR == '#') && (NXT(1) == 'F') &&
2246: (NXT(2) == 'I') && (NXT(3) == 'X') &&
2247: (NXT(4) == 'E') && (NXT(5) == 'D')) {
2248: SKIP(6);
2249: val = XML_ATTRIBUTE_FIXED;
2250: if (!IS_BLANK(CUR)) {
2251: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2252: ctxt->sax->error(ctxt, "Space required after '#FIXED'\n");
2253: ctxt->wellFormed = 0;
2254: }
2255: SKIP_BLANKS;
2256: }
2257: ret = xmlParseAttValue(ctxt);
2258: if (ret == NULL) {
2259: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2260: ctxt->sax->error(ctxt,
2261: "Attribute default value declaration error\n");
2262: ctxt->wellFormed = 0;
2263: } else
2264: *value = ret;
2265: return(val);
2266: }
2267:
2268: /**
1.66 daniel 2269: * xmlParseNotationType:
2270: * @ctxt: an XML parser context
2271: *
2272: * parse an Notation attribute type.
2273: *
2274: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2275: *
2276: * Note: the leading 'NOTATION' S part has already being parsed...
2277: *
2278: * Returns: the notation attribute tree built while parsing
2279: */
2280:
2281: xmlEnumerationPtr
2282: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
2283: CHAR *name;
2284: xmlEnumerationPtr ret = NULL, last = NULL, cur;
2285:
2286: if (CUR != '(') {
2287: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2288: ctxt->sax->error(ctxt, "'(' required to start 'NOTATION'\n");
2289: ctxt->wellFormed = 0;
2290: return(NULL);
2291: }
2292: do {
2293: NEXT;
2294: SKIP_BLANKS;
2295: name = xmlParseName(ctxt);
2296: if (name == NULL) {
2297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2298: ctxt->sax->error(ctxt,
2299: "Name expected in NOTATION declaration\n");
2300: ctxt->wellFormed = 0;
2301: return(ret);
2302: }
2303: cur = xmlCreateEnumeration(name);
1.67 daniel 2304: free(name);
1.66 daniel 2305: if (cur == NULL) return(ret);
2306: if (last == NULL) ret = last = cur;
2307: else {
2308: last->next = cur;
2309: last = cur;
2310: }
2311: SKIP_BLANKS;
2312: } while (CUR == '|');
2313: if (CUR != ')') {
2314: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2315: ctxt->sax->error(ctxt,
2316: "')' required to finish NOTATION declaration\n");
2317: ctxt->wellFormed = 0;
2318: return(ret);
2319: }
2320: NEXT;
2321: return(ret);
2322: }
2323:
2324: /**
2325: * xmlParseEnumerationType:
2326: * @ctxt: an XML parser context
2327: *
2328: * parse an Enumeration attribute type.
2329: *
2330: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
2331: *
2332: * Returns: the enumeration attribute tree built while parsing
2333: */
2334:
2335: xmlEnumerationPtr
2336: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
2337: CHAR *name;
2338: xmlEnumerationPtr ret = NULL, last = NULL, cur;
2339:
2340: if (CUR != '(') {
2341: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2342: ctxt->sax->error(ctxt,
2343: "'(' required to start ATTLIST enumeration\n");
2344: ctxt->wellFormed = 0;
2345: return(NULL);
2346: }
2347: do {
2348: NEXT;
2349: SKIP_BLANKS;
2350: name = xmlParseNmtoken(ctxt);
2351: if (name == NULL) {
2352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2353: ctxt->sax->error(ctxt,
2354: "NmToken expected in ATTLIST enumeration\n");
2355: ctxt->wellFormed = 0;
2356: return(ret);
2357: }
2358: cur = xmlCreateEnumeration(name);
1.67 daniel 2359: free(name);
1.66 daniel 2360: if (cur == NULL) return(ret);
2361: if (last == NULL) ret = last = cur;
2362: else {
2363: last->next = cur;
2364: last = cur;
2365: }
2366: SKIP_BLANKS;
2367: } while (CUR == '|');
2368: if (CUR != ')') {
2369: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370: ctxt->sax->error(ctxt,
2371: "')' required to finish ATTLIST enumeration\n");
2372: ctxt->wellFormed = 0;
2373: return(ret);
2374: }
2375: NEXT;
2376: return(ret);
2377: }
2378:
2379: /**
1.50 daniel 2380: * xmlParseEnumeratedType:
2381: * @ctxt: an XML parser context
1.66 daniel 2382: * @tree: the enumeration tree built while parsing
1.50 daniel 2383: *
1.66 daniel 2384: * parse an Enumerated attribute type.
1.22 daniel 2385: *
2386: * [57] EnumeratedType ::= NotationType | Enumeration
2387: *
2388: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2389: *
1.50 daniel 2390: *
1.66 daniel 2391: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 2392: */
2393:
1.66 daniel 2394: int
2395: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
2396: if ((CUR == 'N') && (NXT(1) == 'O') &&
2397: (NXT(2) == 'T') && (NXT(3) == 'A') &&
2398: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2399: (NXT(6) == 'O') && (NXT(7) == 'N')) {
2400: SKIP(8);
2401: if (!IS_BLANK(CUR)) {
2402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2403: ctxt->sax->error(ctxt, "Space required after 'NOTATION'\n");
2404: ctxt->wellFormed = 0;
2405: return(0);
2406: }
2407: SKIP_BLANKS;
2408: *tree = xmlParseNotationType(ctxt);
2409: if (*tree == NULL) return(0);
2410: return(XML_ATTRIBUTE_NOTATION);
2411: }
2412: *tree = xmlParseEnumerationType(ctxt);
2413: if (*tree == NULL) return(0);
2414: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 2415: }
2416:
1.50 daniel 2417: /**
2418: * xmlParseAttributeType:
2419: * @ctxt: an XML parser context
1.66 daniel 2420: * @tree: the enumeration tree built while parsing
1.50 daniel 2421: *
1.59 daniel 2422: * parse the Attribute list def for an element
1.22 daniel 2423: *
2424: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2425: *
2426: * [55] StringType ::= 'CDATA'
2427: *
2428: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2429: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 2430: *
1.69 daniel 2431: * Returns the attribute type
1.22 daniel 2432: */
1.59 daniel 2433: int
1.66 daniel 2434: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.40 daniel 2435: if ((CUR == 'C') && (NXT(1) == 'D') &&
2436: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2437: (NXT(4) == 'A')) {
2438: SKIP(5);
1.66 daniel 2439: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 2440: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2441: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2442: (NXT(4) == 'F')) {
2443: SKIP(5);
1.59 daniel 2444: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 2445: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2446: SKIP(2);
2447: return(XML_ATTRIBUTE_ID);
1.40 daniel 2448: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2449: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2450: (NXT(4) == 'F') && (NXT(5) == 'S')) {
2451: SKIP(6);
1.59 daniel 2452: return(XML_ATTRIBUTE_IDREFS);
1.40 daniel 2453: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2454: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2455: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2456: SKIP(6);
1.59 daniel 2457: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 2458: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2459: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2460: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2461: (NXT(6) == 'E') && (NXT(7) == 'S')) {
2462: SKIP(8);
1.59 daniel 2463: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 2464: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2465: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2466: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 2467: (NXT(6) == 'N') && (NXT(7) == 'S')) {
2468: SKIP(8);
2469: return(XML_ATTRIBUTE_NMTOKENS);
2470: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2471: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2472: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 2473: (NXT(6) == 'N')) {
2474: SKIP(7);
1.59 daniel 2475: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 2476: }
1.66 daniel 2477: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 2478: }
2479:
1.50 daniel 2480: /**
2481: * xmlParseAttributeListDecl:
2482: * @ctxt: an XML parser context
2483: *
2484: * : parse the Attribute list def for an element
1.22 daniel 2485: *
2486: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2487: *
2488: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 2489: *
1.22 daniel 2490: */
1.55 daniel 2491: void
2492: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 2493: CHAR *elemName;
2494: CHAR *attrName;
1.66 daniel 2495: xmlEnumerationPtr tree = NULL;
1.22 daniel 2496:
1.40 daniel 2497: if ((CUR == '<') && (NXT(1) == '!') &&
2498: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2499: (NXT(4) == 'T') && (NXT(5) == 'L') &&
2500: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 2501: (NXT(8) == 'T')) {
1.40 daniel 2502: SKIP(9);
1.59 daniel 2503: if (!IS_BLANK(CUR)) {
2504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2505: ctxt->sax->error(ctxt, "Space required after '<!ATTLIST'\n");
2506: ctxt->wellFormed = 0;
2507: }
1.42 daniel 2508: SKIP_BLANKS;
1.59 daniel 2509: elemName = xmlParseName(ctxt);
2510: if (elemName == NULL) {
1.55 daniel 2511: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2512: ctxt->sax->error(ctxt, "ATTLIST: no name for Element\n");
2513: ctxt->wellFormed = 0;
1.22 daniel 2514: return;
2515: }
1.42 daniel 2516: SKIP_BLANKS;
1.40 daniel 2517: while (CUR != '>') {
2518: const CHAR *check = CUR_PTR;
1.59 daniel 2519: int type;
2520: int def;
2521: CHAR *defaultValue = NULL;
2522:
2523: attrName = xmlParseName(ctxt);
2524: if (attrName == NULL) {
2525: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2526: ctxt->sax->error(ctxt, "ATTLIST: no name for Attribute\n");
2527: ctxt->wellFormed = 0;
2528: break;
2529: }
2530: if (!IS_BLANK(CUR)) {
2531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2532: ctxt->sax->error(ctxt,
2533: "Space required after the attribute name\n");
2534: ctxt->wellFormed = 0;
2535: break;
2536: }
2537: SKIP_BLANKS;
2538:
1.66 daniel 2539: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 2540: if (type <= 0) break;
1.22 daniel 2541:
1.59 daniel 2542: if (!IS_BLANK(CUR)) {
2543: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2544: ctxt->sax->error(ctxt,
2545: "Space required after the attribute type\n");
2546: ctxt->wellFormed = 0;
2547: break;
2548: }
1.42 daniel 2549: SKIP_BLANKS;
1.59 daniel 2550:
2551: def = xmlParseDefaultDecl(ctxt, &defaultValue);
2552: if (def <= 0) break;
2553:
2554: if (CUR != '>') {
2555: if (!IS_BLANK(CUR)) {
2556: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2557: ctxt->sax->error(ctxt,
2558: "Space required after the attribute default value\n");
2559: ctxt->wellFormed = 0;
2560: break;
2561: }
2562: SKIP_BLANKS;
2563: }
1.40 daniel 2564: if (check == CUR_PTR) {
1.55 daniel 2565: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2566: ctxt->sax->error(ctxt,
1.59 daniel 2567: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 2568: break;
2569: }
1.66 daniel 2570: xmlAddAttributeDecl(ctxt->doc->intSubset, elemName, attrName,
2571: type, def, defaultValue, tree);
1.59 daniel 2572: if (attrName != NULL)
2573: free(attrName);
2574: if (defaultValue != NULL)
2575: free(defaultValue);
1.22 daniel 2576: }
1.40 daniel 2577: if (CUR == '>')
2578: NEXT;
1.22 daniel 2579:
1.59 daniel 2580: free(elemName);
1.22 daniel 2581: }
2582: }
2583:
1.50 daniel 2584: /**
1.61 daniel 2585: * xmlParseElementMixedContentDecl:
2586: * @ctxt: an XML parser context
2587: *
2588: * parse the declaration for a Mixed Element content
2589: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
2590: *
2591: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2592: * '(' S? '#PCDATA' S? ')'
2593: *
2594: * returns: the list of the xmlElementContentPtr describing the element choices
2595: */
2596: xmlElementContentPtr
1.62 daniel 2597: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 2598: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 2599: CHAR *elem = NULL;
2600:
2601: if ((CUR == '#') && (NXT(1) == 'P') &&
2602: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2603: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2604: (NXT(6) == 'A')) {
2605: SKIP(7);
2606: SKIP_BLANKS;
1.63 daniel 2607: if (CUR == ')') {
2608: NEXT;
2609: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2610: return(ret);
2611: }
1.61 daniel 2612: if ((CUR == '(') || (CUR == '|')) {
2613: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2614: if (ret == NULL) return(NULL);
1.63 daniel 2615: } /********** else {
1.61 daniel 2616: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2617: ctxt->sax->error(ctxt,
2618: "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2619: ctxt->wellFormed = 0;
2620: return(NULL);
1.63 daniel 2621: } **********/
1.61 daniel 2622: while (CUR == '|') {
1.64 daniel 2623: NEXT;
1.61 daniel 2624: if (elem == NULL) {
2625: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2626: if (ret == NULL) return(NULL);
2627: ret->c1 = cur;
1.64 daniel 2628: cur = ret;
1.61 daniel 2629: } else {
1.64 daniel 2630: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2631: if (n == NULL) return(NULL);
2632: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2633: cur->c2 = n;
2634: cur = n;
1.66 daniel 2635: free(elem);
1.61 daniel 2636: }
2637: SKIP_BLANKS;
2638: elem = xmlParseName(ctxt);
2639: if (elem == NULL) {
2640: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2641: ctxt->sax->error(ctxt,
2642: "xmlParseElementMixedContentDecl : Name expected\n");
2643: ctxt->wellFormed = 0;
2644: xmlFreeElementContent(cur);
2645: return(NULL);
2646: }
2647: SKIP_BLANKS;
2648: }
1.63 daniel 2649: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 2650: if (elem != NULL) {
1.61 daniel 2651: cur->c2 = xmlNewElementContent(elem,
2652: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 2653: free(elem);
2654: }
1.65 daniel 2655: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 2656: SKIP(2);
1.61 daniel 2657: } else {
1.66 daniel 2658: if (elem != NULL) free(elem);
1.61 daniel 2659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2660: ctxt->sax->error(ctxt,
1.63 daniel 2661: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 2662: ctxt->wellFormed = 0;
2663: xmlFreeElementContent(ret);
2664: return(NULL);
2665: }
2666:
2667: } else {
2668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669: ctxt->sax->error(ctxt,
2670: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
2671: ctxt->wellFormed = 0;
2672: }
2673: return(ret);
2674: }
2675:
2676: /**
2677: * xmlParseElementChildrenContentDecl:
1.50 daniel 2678: * @ctxt: an XML parser context
2679: *
1.61 daniel 2680: * parse the declaration for a Mixed Element content
2681: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 2682: *
1.61 daniel 2683: *
1.22 daniel 2684: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2685: *
2686: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2687: *
2688: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2689: *
2690: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2691: *
1.62 daniel 2692: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 2693: * hierarchy.
2694: */
2695: xmlElementContentPtr
1.62 daniel 2696: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 2697: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 2698: CHAR *elem;
2699: CHAR type = 0;
2700:
2701: SKIP_BLANKS;
2702: if (CUR == '(') {
1.63 daniel 2703: /* Recurse on first child */
1.62 daniel 2704: NEXT;
2705: SKIP_BLANKS;
2706: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
2707: SKIP_BLANKS;
2708: } else {
2709: elem = xmlParseName(ctxt);
2710: if (elem == NULL) {
2711: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2712: ctxt->sax->error(ctxt,
2713: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2714: ctxt->wellFormed = 0;
2715: return(NULL);
2716: }
2717: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2718: if (CUR == '?') {
2719: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2720: NEXT;
2721: } else if (CUR == '*') {
2722: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2723: NEXT;
2724: } else if (CUR == '+') {
2725: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2726: NEXT;
2727: } else {
2728: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2729: }
1.66 daniel 2730: free(elem);
1.62 daniel 2731: }
2732: SKIP_BLANKS;
2733: while (CUR != ')') {
1.63 daniel 2734: /*
2735: * Each loop we parse one separator and one element.
2736: */
1.62 daniel 2737: if (CUR == ',') {
2738: if (type == 0) type = CUR;
2739:
2740: /*
2741: * Detect "Name | Name , Name" error
2742: */
2743: else if (type != CUR) {
2744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2745: ctxt->sax->error(ctxt,
2746: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2747: type);
2748: ctxt->wellFormed = 0;
2749: xmlFreeElementContent(ret);
2750: return(NULL);
2751: }
1.64 daniel 2752: NEXT;
1.62 daniel 2753:
1.63 daniel 2754: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
2755: if (op == NULL) {
2756: xmlFreeElementContent(ret);
2757: return(NULL);
2758: }
2759: if (last == NULL) {
2760: op->c1 = ret;
1.65 daniel 2761: ret = cur = op;
1.63 daniel 2762: } else {
2763: cur->c2 = op;
2764: op->c1 = last;
2765: cur =op;
1.65 daniel 2766: last = NULL;
1.63 daniel 2767: }
1.62 daniel 2768: } else if (CUR == '|') {
2769: if (type == 0) type = CUR;
2770:
2771: /*
1.63 daniel 2772: * Detect "Name , Name | Name" error
1.62 daniel 2773: */
2774: else if (type != CUR) {
2775: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776: ctxt->sax->error(ctxt,
2777: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2778: type);
2779: ctxt->wellFormed = 0;
2780: xmlFreeElementContent(ret);
2781: return(NULL);
2782: }
1.64 daniel 2783: NEXT;
1.62 daniel 2784:
1.63 daniel 2785: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2786: if (op == NULL) {
2787: xmlFreeElementContent(ret);
2788: return(NULL);
2789: }
2790: if (last == NULL) {
2791: op->c1 = ret;
1.65 daniel 2792: ret = cur = op;
1.63 daniel 2793: } else {
2794: cur->c2 = op;
2795: op->c1 = last;
2796: cur =op;
1.65 daniel 2797: last = NULL;
1.63 daniel 2798: }
1.62 daniel 2799: } else {
2800: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2801: ctxt->sax->error(ctxt,
2802: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
2803: ctxt->wellFormed = 0;
2804: xmlFreeElementContent(ret);
2805: return(NULL);
2806: }
2807: SKIP_BLANKS;
2808: if (CUR == '(') {
1.63 daniel 2809: /* Recurse on second child */
1.62 daniel 2810: NEXT;
2811: SKIP_BLANKS;
1.65 daniel 2812: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 2813: SKIP_BLANKS;
2814: } else {
2815: elem = xmlParseName(ctxt);
2816: if (elem == NULL) {
2817: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2818: ctxt->sax->error(ctxt,
2819: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2820: ctxt->wellFormed = 0;
2821: return(NULL);
2822: }
1.65 daniel 2823: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 2824: free(elem);
1.62 daniel 2825: }
1.63 daniel 2826: if (CUR == '?') {
2827: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2828: NEXT;
2829: } else if (CUR == '*') {
2830: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2831: NEXT;
2832: } else if (CUR == '+') {
2833: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2834: NEXT;
2835: } else {
2836: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2837: }
2838: SKIP_BLANKS;
1.64 daniel 2839: }
1.65 daniel 2840: if ((cur != NULL) && (last != NULL)) {
2841: cur->c2 = last;
1.62 daniel 2842: }
2843: NEXT;
2844: if (CUR == '?') {
2845: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2846: NEXT;
2847: } else if (CUR == '*') {
2848: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2849: NEXT;
2850: } else if (CUR == '+') {
2851: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2852: NEXT;
2853: } else {
2854: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2855: }
2856: return(ret);
1.61 daniel 2857: }
2858:
2859: /**
2860: * xmlParseElementContentDecl:
2861: * @ctxt: an XML parser context
2862: * @name: the name of the element being defined.
2863: * @result: the Element Content pointer will be stored here if any
1.22 daniel 2864: *
1.61 daniel 2865: * parse the declaration for an Element content either Mixed or Children,
2866: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
2867: *
2868: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 2869: *
1.61 daniel 2870: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 2871: */
2872:
1.61 daniel 2873: int
2874: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
2875: xmlElementContentPtr *result) {
2876:
2877: xmlElementContentPtr tree = NULL;
2878: int res;
2879:
2880: *result = NULL;
2881:
2882: if (CUR != '(') {
2883: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2884: ctxt->sax->error(ctxt,
2885: "xmlParseElementContentDecl : '(' expected\n");
2886: ctxt->wellFormed = 0;
2887: return(-1);
2888: }
2889: NEXT;
2890: SKIP_BLANKS;
2891: if ((CUR == '#') && (NXT(1) == 'P') &&
2892: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2893: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2894: (NXT(6) == 'A')) {
1.62 daniel 2895: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 2896: res = XML_ELEMENT_TYPE_MIXED;
2897: } else {
1.62 daniel 2898: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 2899: res = XML_ELEMENT_TYPE_ELEMENT;
2900: }
2901: SKIP_BLANKS;
1.63 daniel 2902: /****************************
1.61 daniel 2903: if (CUR != ')') {
2904: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2905: ctxt->sax->error(ctxt,
2906: "xmlParseElementContentDecl : ')' expected\n");
2907: ctxt->wellFormed = 0;
2908: return(-1);
2909: }
1.63 daniel 2910: ****************************/
2911: *result = tree;
1.61 daniel 2912: return(res);
1.22 daniel 2913: }
2914:
1.50 daniel 2915: /**
2916: * xmlParseElementDecl:
2917: * @ctxt: an XML parser context
2918: *
2919: * parse an Element declaration.
1.22 daniel 2920: *
2921: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2922: *
2923: * TODO There is a check [ VC: Unique Element Type Declaration ]
1.69 daniel 2924: *
2925: * Returns the type of the element, or -1 in case of error
1.22 daniel 2926: */
1.59 daniel 2927: int
1.55 daniel 2928: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2929: CHAR *name;
1.59 daniel 2930: int ret = -1;
1.61 daniel 2931: xmlElementContentPtr content = NULL;
1.22 daniel 2932:
1.40 daniel 2933: if ((CUR == '<') && (NXT(1) == '!') &&
2934: (NXT(2) == 'E') && (NXT(3) == 'L') &&
2935: (NXT(4) == 'E') && (NXT(5) == 'M') &&
2936: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 2937: (NXT(8) == 'T')) {
1.40 daniel 2938: SKIP(9);
1.59 daniel 2939: if (!IS_BLANK(CUR)) {
2940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2941: ctxt->sax->error(ctxt,
2942: "Space required after 'ELEMENT'\n");
2943: ctxt->wellFormed = 0;
2944: }
1.42 daniel 2945: SKIP_BLANKS;
1.22 daniel 2946: name = xmlParseName(ctxt);
2947: if (name == NULL) {
1.55 daniel 2948: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2949: ctxt->sax->error(ctxt,
2950: "xmlParseElementDecl: no name for Element\n");
2951: ctxt->wellFormed = 0;
2952: return(-1);
2953: }
2954: if (!IS_BLANK(CUR)) {
2955: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2956: ctxt->sax->error(ctxt,
2957: "Space required after the element name\n");
2958: ctxt->wellFormed = 0;
1.22 daniel 2959: }
1.42 daniel 2960: SKIP_BLANKS;
1.40 daniel 2961: if ((CUR == 'E') && (NXT(1) == 'M') &&
2962: (NXT(2) == 'P') && (NXT(3) == 'T') &&
2963: (NXT(4) == 'Y')) {
2964: SKIP(5);
1.22 daniel 2965: /*
2966: * Element must always be empty.
2967: */
1.59 daniel 2968: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 2969: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2970: (NXT(2) == 'Y')) {
2971: SKIP(3);
1.22 daniel 2972: /*
2973: * Element is a generic container.
2974: */
1.59 daniel 2975: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 2976: } else if (CUR == '(') {
2977: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 2978: } else {
1.61 daniel 2979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2980: ctxt->sax->error(ctxt,
2981: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
2982: ctxt->wellFormed = 0;
2983: if (name != NULL) free(name);
2984: return(-1);
1.22 daniel 2985: }
1.42 daniel 2986: SKIP_BLANKS;
1.40 daniel 2987: if (CUR != '>') {
1.55 daniel 2988: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2989: ctxt->sax->error(ctxt,
1.31 daniel 2990: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 2991: ctxt->wellFormed = 0;
1.61 daniel 2992: } else {
1.40 daniel 2993: NEXT;
1.61 daniel 2994: xmlAddElementDecl(ctxt->doc->intSubset, name, ret, content);
2995: }
2996: if (name != NULL) {
2997: free(name);
2998: }
1.22 daniel 2999: }
1.59 daniel 3000: return(ret);
1.22 daniel 3001: }
3002:
1.50 daniel 3003: /**
3004: * xmlParseMarkupDecl:
3005: * @ctxt: an XML parser context
3006: *
3007: * parse Markup declarations
1.22 daniel 3008: *
3009: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
3010: * NotationDecl | PI | Comment
3011: *
3012: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
3013: */
1.55 daniel 3014: void
3015: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3016: xmlParseElementDecl(ctxt);
3017: xmlParseAttributeListDecl(ctxt);
3018: xmlParseEntityDecl(ctxt);
3019: xmlParseNotationDecl(ctxt);
3020: xmlParsePI(ctxt);
1.31 daniel 3021: xmlParseComment(ctxt, 0);
1.22 daniel 3022: }
3023:
1.50 daniel 3024: /**
3025: * xmlParseCharRef:
3026: * @ctxt: an XML parser context
3027: *
3028: * parse Reference declarations
1.24 daniel 3029: *
3030: * [66] CharRef ::= '&#' [0-9]+ ';' |
3031: * '&#x' [0-9a-fA-F]+ ';'
1.68 daniel 3032: *
3033: * Returns the value parsed
1.24 daniel 3034: */
1.55 daniel 3035: CHAR *
3036: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 3037: int val = 0;
1.44 daniel 3038: CHAR buf[2];
1.24 daniel 3039:
1.40 daniel 3040: if ((CUR == '&') && (NXT(1) == '#') &&
3041: (NXT(2) == 'x')) {
3042: SKIP(3);
3043: while (CUR != ';') {
3044: if ((CUR >= '0') && (CUR <= '9'))
3045: val = val * 16 + (CUR - '0');
3046: else if ((CUR >= 'a') && (CUR <= 'f'))
3047: val = val * 16 + (CUR - 'a') + 10;
3048: else if ((CUR >= 'A') && (CUR <= 'F'))
3049: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 3050: else {
1.55 daniel 3051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 3052: ctxt->sax->error(ctxt,
1.59 daniel 3053: "xmlParseCharRef: invalid hexadecimal value\n");
3054: ctxt->wellFormed = 0;
1.29 daniel 3055: val = 0;
1.24 daniel 3056: break;
3057: }
1.47 daniel 3058: NEXT;
1.24 daniel 3059: }
1.55 daniel 3060: if (CUR == ';')
1.40 daniel 3061: NEXT;
3062: } else if ((CUR == '&') && (NXT(1) == '#')) {
3063: SKIP(2);
3064: while (CUR != ';') {
3065: if ((CUR >= '0') && (CUR <= '9'))
1.55 daniel 3066: val = val * 10 + (CUR - '0');
1.24 daniel 3067: else {
1.55 daniel 3068: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 3069: ctxt->sax->error(ctxt,
3070: "xmlParseCharRef: invalid decimal value\n");
1.59 daniel 3071: ctxt->wellFormed = 0;
1.29 daniel 3072: val = 0;
1.24 daniel 3073: break;
3074: }
1.47 daniel 3075: NEXT;
1.24 daniel 3076: }
1.55 daniel 3077: if (CUR == ';')
1.40 daniel 3078: NEXT;
1.24 daniel 3079: } else {
1.55 daniel 3080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3081: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
1.59 daniel 3082: ctxt->wellFormed = 0;
1.24 daniel 3083: }
1.29 daniel 3084: /*
3085: * Check the value IS_CHAR ...
3086: */
1.44 daniel 3087: if (IS_CHAR(val)) {
3088: buf[0] = (CHAR) val;
3089: buf[1] = 0;
1.50 daniel 3090: return(xmlStrndup(buf, 1));
1.44 daniel 3091: } else {
1.55 daniel 3092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 3093: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n",
3094: val);
1.59 daniel 3095: ctxt->wellFormed = 0;
1.29 daniel 3096: }
1.46 daniel 3097: return(NULL);
1.24 daniel 3098: }
3099:
1.50 daniel 3100: /**
3101: * xmlParseEntityRef:
3102: * @ctxt: an XML parser context
3103: *
3104: * parse ENTITY references declarations
1.24 daniel 3105: *
3106: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 3107: *
3108: * Returns the entity ref string or NULL if directly as input stream.
1.24 daniel 3109: */
1.55 daniel 3110: CHAR *
3111: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.46 daniel 3112: CHAR *ret = NULL;
1.50 daniel 3113: const CHAR *q;
1.24 daniel 3114: CHAR *name;
1.59 daniel 3115: xmlEntityPtr ent;
1.50 daniel 3116: xmlParserInputPtr input = NULL;
1.24 daniel 3117:
1.50 daniel 3118: q = CUR_PTR;
1.40 daniel 3119: if (CUR == '&') {
3120: NEXT;
1.24 daniel 3121: name = xmlParseName(ctxt);
3122: if (name == NULL) {
1.55 daniel 3123: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3124: ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
1.59 daniel 3125: ctxt->wellFormed = 0;
1.24 daniel 3126: } else {
1.40 daniel 3127: if (CUR == ';') {
3128: NEXT;
1.24 daniel 3129: /*
1.59 daniel 3130: * Well Formedness Constraint if:
3131: * - standalone
3132: * or
3133: * - no external subset and no external parameter entities
3134: * referenced
3135: * then
3136: * the entity referenced must have been declared
3137: *
3138: * TODO: to be double checked !!!
3139: */
3140: ent = xmlGetDocEntity(ctxt->doc, name);
1.70 ! daniel 3141: if ((ctxt->doc->standalone == 1) ||
1.59 daniel 3142: ((ctxt->doc->intSubset == NULL) &&
3143: (ctxt->doc->extSubset == NULL))) {
3144: if (ent == NULL) {
3145: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3146: ctxt->sax->error(ctxt,
3147: "Entity '%s' not defined\n", name);
3148: ctxt->wellFormed = 0;
3149: }
3150: }
3151:
3152: /*
3153: * Well Formedness Constraint :
3154: * The referenced entity must be a parsed entity.
3155: */
3156: if (ent != NULL) {
3157: switch (ent->type) {
3158: case XML_INTERNAL_PARAMETER_ENTITY:
3159: case XML_EXTERNAL_PARAMETER_ENTITY:
3160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3161: ctxt->sax->error(ctxt,
3162: "Attempt to reference the parameter entity '%s'\n", name);
3163: ctxt->wellFormed = 0;
3164: break;
3165:
3166: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
3167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3168: ctxt->sax->error(ctxt,
3169: "Attempt to reference unparsed entity '%s'\n", name);
3170: ctxt->wellFormed = 0;
3171: break;
3172: }
3173: }
3174:
3175: /*
3176: * Well Formedness Constraint :
3177: * The referenced entity must not lead to recursion !
3178: */
3179:
3180: /*
1.52 daniel 3181: * We parsed the entity reference correctly, call SAX
3182: * interface for the proper behaviour:
3183: * - get a new input stream
3184: * - or keep the reference inline
1.24 daniel 3185: */
1.52 daniel 3186: if (ctxt->sax)
3187: input = ctxt->sax->resolveEntity(ctxt, NULL, name);
3188: if (input != NULL)
3189: xmlPushInput(ctxt, input);
3190: else {
3191: ret = xmlStrndup(q, CUR_PTR - q);
3192: }
1.24 daniel 3193: } else {
1.46 daniel 3194: char cst[2] = { '&', 0 };
3195:
1.55 daniel 3196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3197: ctxt->sax->error(ctxt,
3198: "xmlParseEntityRef: expecting ';'\n");
3199: ctxt->wellFormed = 0;
1.46 daniel 3200: ret = xmlStrndup(cst, 1);
3201: ret = xmlStrcat(ret, name);
1.24 daniel 3202: }
1.45 daniel 3203: free(name);
1.24 daniel 3204: }
3205: }
1.46 daniel 3206: return(ret);
1.24 daniel 3207: }
3208:
1.50 daniel 3209: /**
3210: * xmlParseReference:
3211: * @ctxt: an XML parser context
3212: *
3213: * parse Reference declarations
1.24 daniel 3214: *
3215: * [67] Reference ::= EntityRef | CharRef
1.68 daniel 3216: *
3217: * Returns the entity string or NULL if handled directly by pushing
1.52 daniel 3218: * the entity value as the input.
1.24 daniel 3219: */
1.55 daniel 3220: CHAR *
3221: xmlParseReference(xmlParserCtxtPtr ctxt) {
1.44 daniel 3222: if ((CUR == '&') && (NXT(1) == '#')) {
1.59 daniel 3223: CHAR *val = xmlParseCharRef(ctxt);
3224: xmlParserInputPtr in;
3225:
3226: if (val != NULL) {
3227: in = xmlNewStringInputStream(ctxt, val);
3228: xmlPushInput(ctxt, in);
3229: }
3230: return(NULL);
1.44 daniel 3231: } else if (CUR == '&') {
1.50 daniel 3232: return(xmlParseEntityRef(ctxt));
1.24 daniel 3233: }
1.46 daniel 3234: return(NULL);
1.24 daniel 3235: }
3236:
1.50 daniel 3237: /**
3238: * xmlParsePEReference:
3239: * @ctxt: an XML parser context
3240: *
3241: * parse PEReference declarations
1.22 daniel 3242: *
3243: * [69] PEReference ::= '%' Name ';'
1.68 daniel 3244: *
3245: * Returns the entity content or NULL if handled directly.
1.22 daniel 3246: */
1.55 daniel 3247: CHAR *
3248: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.46 daniel 3249: CHAR *ret = NULL;
1.22 daniel 3250: CHAR *name;
1.45 daniel 3251: xmlEntityPtr entity;
1.50 daniel 3252: xmlParserInputPtr input;
1.22 daniel 3253:
1.40 daniel 3254: if (CUR == '%') {
3255: NEXT;
1.22 daniel 3256: name = xmlParseName(ctxt);
3257: if (name == NULL) {
1.55 daniel 3258: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3259: ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
1.59 daniel 3260: ctxt->wellFormed = 0;
1.22 daniel 3261: } else {
1.40 daniel 3262: if (CUR == ';') {
3263: NEXT;
1.45 daniel 3264: entity = xmlGetDtdEntity(ctxt->doc, name);
3265: if (entity == NULL) {
1.55 daniel 3266: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3267: ctxt->sax->warning(ctxt,
1.59 daniel 3268: "xmlParsePEReference: %%%s; not found\n", name);
1.50 daniel 3269: } else {
3270: input = xmlNewEntityInputStream(ctxt, entity);
3271: xmlPushInput(ctxt, input);
1.45 daniel 3272: }
1.22 daniel 3273: } else {
1.50 daniel 3274: char cst[2] = { '%', 0 };
1.46 daniel 3275:
1.55 daniel 3276: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3277: ctxt->sax->error(ctxt,
3278: "xmlParsePEReference: expecting ';'\n");
3279: ctxt->wellFormed = 0;
1.46 daniel 3280: ret = xmlStrndup(cst, 1);
3281: ret = xmlStrcat(ret, name);
1.22 daniel 3282: }
1.45 daniel 3283: free(name);
1.3 veillard 3284: }
3285: }
1.46 daniel 3286: return(ret);
1.3 veillard 3287: }
3288:
1.50 daniel 3289: /**
3290: * xmlParseDocTypeDecl :
3291: * @ctxt: an XML parser context
3292: *
3293: * parse a DOCTYPE declaration
1.21 daniel 3294: *
1.22 daniel 3295: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3296: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 3297: */
3298:
1.55 daniel 3299: void
3300: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 3301: xmlDtdPtr dtd;
1.21 daniel 3302: CHAR *name;
3303: CHAR *ExternalID = NULL;
1.39 daniel 3304: CHAR *URI = NULL;
1.21 daniel 3305:
3306: /*
3307: * We know that '<!DOCTYPE' has been detected.
3308: */
1.40 daniel 3309: SKIP(9);
1.21 daniel 3310:
1.42 daniel 3311: SKIP_BLANKS;
1.21 daniel 3312:
3313: /*
3314: * Parse the DOCTYPE name.
3315: */
3316: name = xmlParseName(ctxt);
3317: if (name == NULL) {
1.55 daniel 3318: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3319: ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 3320: ctxt->wellFormed = 0;
1.21 daniel 3321: }
3322:
1.42 daniel 3323: SKIP_BLANKS;
1.21 daniel 3324:
3325: /*
1.22 daniel 3326: * Check for SystemID and ExternalID
3327: */
1.67 daniel 3328: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.42 daniel 3329: SKIP_BLANKS;
1.36 daniel 3330:
1.59 daniel 3331: dtd = xmlCreateIntSubset(ctxt->doc, name, ExternalID, URI);
1.22 daniel 3332:
3333: /*
3334: * Is there any DTD definition ?
3335: */
1.40 daniel 3336: if (CUR == '[') {
3337: NEXT;
1.22 daniel 3338: /*
3339: * Parse the succession of Markup declarations and
3340: * PEReferences.
3341: * Subsequence (markupdecl | PEReference | S)*
3342: */
1.40 daniel 3343: while (CUR != ']') {
3344: const CHAR *check = CUR_PTR;
1.22 daniel 3345:
1.42 daniel 3346: SKIP_BLANKS;
1.22 daniel 3347: xmlParseMarkupDecl(ctxt);
1.50 daniel 3348: xmlParsePEReference(ctxt);
1.22 daniel 3349:
1.40 daniel 3350: if (CUR_PTR == check) {
1.55 daniel 3351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3352: ctxt->sax->error(ctxt,
1.31 daniel 3353: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 3354: ctxt->wellFormed = 0;
1.22 daniel 3355: break;
3356: }
3357: }
1.40 daniel 3358: if (CUR == ']') NEXT;
1.22 daniel 3359: }
3360:
3361: /*
3362: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 3363: */
1.40 daniel 3364: if (CUR != '>') {
1.55 daniel 3365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3366: ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
1.59 daniel 3367: ctxt->wellFormed = 0;
1.22 daniel 3368: /* We shouldn't try to resynchronize ... */
1.21 daniel 3369: }
1.40 daniel 3370: NEXT;
1.22 daniel 3371:
3372: /*
3373: * Cleanup, since we don't use all those identifiers
3374: * TODO : the DOCTYPE if available should be stored !
3375: */
1.39 daniel 3376: if (URI != NULL) free(URI);
1.22 daniel 3377: if (ExternalID != NULL) free(ExternalID);
3378: if (name != NULL) free(name);
1.21 daniel 3379: }
3380:
1.50 daniel 3381: /**
3382: * xmlParseAttribute:
3383: * @ctxt: an XML parser context
3384: * @node: the node carrying the attribute
3385: *
3386: * parse an attribute
1.3 veillard 3387: *
1.22 daniel 3388: * [41] Attribute ::= Name Eq AttValue
3389: *
3390: * [25] Eq ::= S? '=' S?
3391: *
1.29 daniel 3392: * With namespace:
3393: *
3394: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 3395: *
3396: * Also the case QName == xmlns:??? is handled independently as a namespace
3397: * definition.
1.69 daniel 3398: *
3399: * Returns the attribute just parsed of NULL in case of error.
1.3 veillard 3400: */
3401:
1.69 daniel 3402: xmlAttrPtr
3403: xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.59 daniel 3404: CHAR *name, *val;
1.29 daniel 3405: CHAR *ns;
1.52 daniel 3406: CHAR *value = NULL;
3407: xmlAttrPtr ret;
1.3 veillard 3408:
1.29 daniel 3409: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 3410: if (name == NULL) {
1.55 daniel 3411: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3412: ctxt->sax->error(ctxt, "error parsing attribute name\n");
1.59 daniel 3413: ctxt->wellFormed = 0;
1.52 daniel 3414: return(NULL);
1.3 veillard 3415: }
3416:
3417: /*
1.29 daniel 3418: * read the value
1.3 veillard 3419: */
1.42 daniel 3420: SKIP_BLANKS;
1.40 daniel 3421: if (CUR == '=') {
3422: NEXT;
1.42 daniel 3423: SKIP_BLANKS;
1.29 daniel 3424: value = xmlParseAttValue(ctxt);
3425: } else {
1.55 daniel 3426: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3427: ctxt->sax->error(ctxt,
3428: "Specification mandate value for attribute %s\n", name);
3429: ctxt->wellFormed = 0;
1.3 veillard 3430: }
3431:
3432: /*
1.43 daniel 3433: * Check whether it's a namespace definition
3434: */
3435: if ((ns == NULL) &&
3436: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
3437: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
3438: /* a default namespace definition */
3439: xmlNewNs(node, value, NULL);
3440: if (name != NULL)
3441: free(name);
3442: if (value != NULL)
3443: free(value);
1.52 daniel 3444: return(NULL);
1.43 daniel 3445: }
3446: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
3447: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
3448: /* a standard namespace definition */
3449: xmlNewNs(node, value, name);
1.50 daniel 3450: free(ns);
1.43 daniel 3451: if (name != NULL)
3452: free(name);
3453: if (value != NULL)
3454: free(value);
1.52 daniel 3455: return(NULL);
1.43 daniel 3456: }
3457:
1.59 daniel 3458: /*
3459: * Well formedness requires at most one declaration of an attribute
3460: */
3461: if ((val = xmlGetProp(ctxt->node, name)) != NULL) {
3462: free(val);
3463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3464: ctxt->sax->error(ctxt, "Attribute %s redefined\n", name);
3465: ctxt->wellFormed = 0;
3466: ret = NULL;
3467: } else {
3468: ret = xmlNewProp(ctxt->node, name, NULL);
3469: if (ret != NULL)
3470: ret->val = xmlStringGetNodeList(ctxt->doc, value);
3471: }
1.53 daniel 3472:
3473: if (ns != NULL)
3474: free(ns);
3475: if (value != NULL)
3476: free(value);
3477: free(name);
1.52 daniel 3478: return(ret);
1.3 veillard 3479: }
3480:
1.50 daniel 3481: /**
3482: * xmlParseStartTag:
3483: * @ctxt: an XML parser context
3484: *
3485: * parse a start of tag either for rule element or
3486: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 3487: *
3488: * [40] STag ::= '<' Name (S Attribute)* S? '>'
3489: *
1.29 daniel 3490: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3491: *
3492: * With namespace:
3493: *
3494: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3495: *
3496: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.50 daniel 3497: *
1.68 daniel 3498: * Returns the XML new node or NULL.
1.2 veillard 3499: */
3500:
1.69 daniel 3501: xmlNodePtr
3502: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 3503: CHAR *namespace, *name;
3504: xmlNsPtr ns = NULL;
1.2 veillard 3505: xmlNodePtr ret = NULL;
1.50 daniel 3506: xmlNodePtr parent = ctxt->node;
1.2 veillard 3507:
1.40 daniel 3508: if (CUR != '<') return(NULL);
3509: NEXT;
1.3 veillard 3510:
1.34 daniel 3511: name = xmlNamespaceParseQName(ctxt, &namespace);
1.59 daniel 3512: if (name == NULL) {
3513: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3514: ctxt->sax->error(ctxt,
3515: "xmlParseStartTag: invalid element name\n");
3516: ctxt->wellFormed = 0;
3517: return(NULL);
3518: }
1.3 veillard 3519:
1.43 daniel 3520: /*
3521: * Note : the namespace resolution is deferred until the end of the
3522: * attributes parsing, since local namespace can be defined as
3523: * an attribute at this level.
3524: */
1.50 daniel 3525: ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
3526: if (ret == NULL) {
3527: if (namespace != NULL)
3528: free(namespace);
3529: free(name);
3530: return(NULL);
3531: }
3532:
3533: /*
3534: * We are parsing a new node.
3535: */
3536: nodePush(ctxt, ret);
1.2 veillard 3537:
1.3 veillard 3538: /*
3539: * Now parse the attributes, it ends up with the ending
3540: *
3541: * (S Attribute)* S?
3542: */
1.42 daniel 3543: SKIP_BLANKS;
1.40 daniel 3544: while ((IS_CHAR(CUR)) &&
3545: (CUR != '>') &&
3546: ((CUR != '/') || (NXT(1) != '>'))) {
3547: const CHAR *q = CUR_PTR;
1.29 daniel 3548:
3549: xmlParseAttribute(ctxt, ret);
1.42 daniel 3550: SKIP_BLANKS;
1.29 daniel 3551:
1.40 daniel 3552: if (q == CUR_PTR) {
1.55 daniel 3553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3554: ctxt->sax->error(ctxt,
1.31 daniel 3555: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 3556: ctxt->wellFormed = 0;
1.29 daniel 3557: break;
1.3 veillard 3558: }
3559: }
3560:
1.43 daniel 3561: /*
3562: * Search the namespace
3563: */
3564: ns = xmlSearchNs(ctxt->doc, ret, namespace);
3565: if (ns == NULL) /* ret still doesn't have a parent yet ! */
1.50 daniel 3566: ns = xmlSearchNs(ctxt->doc, parent, namespace);
1.43 daniel 3567: xmlSetNs(ret, ns);
3568: if (namespace != NULL)
3569: free(namespace);
3570:
1.44 daniel 3571: /*
3572: * SAX: Start of Element !
3573: */
3574: if (ctxt->sax != NULL)
3575: ctxt->sax->startElement(ctxt, name);
1.52 daniel 3576: free(name);
3577:
3578: /*
3579: * Link the child element
3580: */
3581: if (ctxt->nodeNr < 2) return(ret);
3582: parent = ctxt->nodeTab[ctxt->nodeNr - 2];
3583: if (parent != NULL)
3584: xmlAddChild(parent, ctxt->node);
1.44 daniel 3585:
1.3 veillard 3586: return(ret);
3587: }
3588:
1.50 daniel 3589: /**
3590: * xmlParseEndTag:
3591: * @ctxt: an XML parser context
3592: * @nsPtr: the current node namespace definition
3593: * @tagPtr: CHAR** receive the tag value
3594: *
3595: * parse an end of tag
1.27 daniel 3596: *
3597: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 3598: *
3599: * With namespace
3600: *
3601: * [9] ETag ::= '</' QName S? '>'
1.50 daniel 3602: *
1.69 daniel 3603: * tagPtr receive the tag name just read
1.7 veillard 3604: */
3605:
1.55 daniel 3606: void
3607: xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
1.34 daniel 3608: CHAR *namespace, *name;
3609: xmlNsPtr ns = NULL;
1.7 veillard 3610:
1.34 daniel 3611: *nsPtr = NULL;
1.7 veillard 3612: *tagPtr = NULL;
3613:
1.40 daniel 3614: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 3615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3616: ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
1.59 daniel 3617: ctxt->wellFormed = 0;
1.27 daniel 3618: return;
3619: }
1.40 daniel 3620: SKIP(2);
1.7 veillard 3621:
1.34 daniel 3622: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 3623:
3624: /*
3625: * Search the namespace
3626: */
3627: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
3628: if (namespace != NULL)
1.34 daniel 3629: free(namespace);
1.7 veillard 3630:
1.34 daniel 3631: *nsPtr = ns;
1.7 veillard 3632: *tagPtr = name;
3633:
3634: /*
3635: * We should definitely be at the ending "S? '>'" part
3636: */
1.42 daniel 3637: SKIP_BLANKS;
1.40 daniel 3638: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 3639: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3640: ctxt->sax->error(ctxt, "End tag : expected '>'\n");
1.59 daniel 3641: ctxt->wellFormed = 0;
1.7 veillard 3642: } else
1.40 daniel 3643: NEXT;
1.7 veillard 3644:
3645: return;
3646: }
3647:
1.50 daniel 3648: /**
3649: * xmlParseCDSect:
3650: * @ctxt: an XML parser context
3651: *
3652: * Parse escaped pure raw content.
1.29 daniel 3653: *
3654: * [18] CDSect ::= CDStart CData CDEnd
3655: *
3656: * [19] CDStart ::= '<![CDATA['
3657: *
3658: * [20] Data ::= (Char* - (Char* ']]>' Char*))
3659: *
3660: * [21] CDEnd ::= ']]>'
1.3 veillard 3661: */
1.55 daniel 3662: void
3663: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 3664: const CHAR *r, *s, *base;
1.3 veillard 3665:
1.40 daniel 3666: if ((CUR == '<') && (NXT(1) == '!') &&
3667: (NXT(2) == '[') && (NXT(3) == 'C') &&
3668: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3669: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3670: (NXT(8) == '[')) {
3671: SKIP(9);
1.29 daniel 3672: } else
1.45 daniel 3673: return;
1.40 daniel 3674: base = CUR_PTR;
3675: if (!IS_CHAR(CUR)) {
1.55 daniel 3676: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3677: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3678: ctxt->wellFormed = 0;
1.45 daniel 3679: return;
1.3 veillard 3680: }
1.40 daniel 3681: r = NEXT;
3682: if (!IS_CHAR(CUR)) {
1.55 daniel 3683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3684: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3685: ctxt->wellFormed = 0;
1.45 daniel 3686: return;
1.3 veillard 3687: }
1.40 daniel 3688: s = NEXT;
3689: while (IS_CHAR(CUR) &&
3690: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
3691: r++;s++;NEXT;
1.3 veillard 3692: }
1.40 daniel 3693: if (!IS_CHAR(CUR)) {
1.55 daniel 3694: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3695: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3696: ctxt->wellFormed = 0;
1.45 daniel 3697: return;
1.3 veillard 3698: }
1.16 daniel 3699:
1.45 daniel 3700: /*
3701: * Ok the segment [base CUR_PTR] is to be consumed as chars.
3702: */
3703: if (ctxt->sax != NULL) {
3704: if (areBlanks(ctxt, base, CUR_PTR - base))
1.59 daniel 3705: ctxt->sax->ignorableWhitespace(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3706: else
1.59 daniel 3707: ctxt->sax->characters(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3708: }
1.2 veillard 3709: }
3710:
1.50 daniel 3711: /**
3712: * xmlParseContent:
3713: * @ctxt: an XML parser context
3714: *
3715: * Parse a content:
1.2 veillard 3716: *
1.27 daniel 3717: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 3718: */
3719:
1.55 daniel 3720: void
3721: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 3722: xmlNodePtr ret = NULL;
3723:
1.40 daniel 3724: while ((CUR != '<') || (NXT(1) != '/')) {
3725: const CHAR *test = CUR_PTR;
1.27 daniel 3726: ret = NULL;
3727:
3728: /*
3729: * First case : a Processing Instruction.
3730: */
1.40 daniel 3731: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 3732: xmlParsePI(ctxt);
3733: }
3734: /*
3735: * Second case : a CDSection
3736: */
1.40 daniel 3737: else if ((CUR == '<') && (NXT(1) == '!') &&
3738: (NXT(2) == '[') && (NXT(3) == 'C') &&
3739: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3740: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3741: (NXT(8) == '[')) {
1.45 daniel 3742: xmlParseCDSect(ctxt);
1.27 daniel 3743: }
3744: /*
3745: * Third case : a comment
3746: */
1.40 daniel 3747: else if ((CUR == '<') && (NXT(1) == '!') &&
3748: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 3749: ret = xmlParseComment(ctxt, 1);
1.27 daniel 3750: }
3751: /*
3752: * Fourth case : a sub-element.
3753: */
1.40 daniel 3754: else if (CUR == '<') {
1.45 daniel 3755: ret = xmlParseElement(ctxt);
3756: }
3757: /*
1.50 daniel 3758: * Fifth case : a reference. If if has not been resolved,
3759: * parsing returns it's Name, create the node
1.45 daniel 3760: */
3761: else if (CUR == '&') {
1.50 daniel 3762: CHAR *val = xmlParseReference(ctxt);
3763: if (val != NULL) {
3764: if (val[0] != '&') {
3765: /*
3766: * inline predefined entity.
3767: */
3768: if (ctxt->sax != NULL)
3769: ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
3770: } else {
3771: /*
3772: * user defined entity, create a node.
3773: */
3774: ret = xmlNewReference(ctxt->doc, val);
3775: xmlAddChild(ctxt->node, ret);
1.69 daniel 3776: ret = NULL;
1.50 daniel 3777: }
3778: free(val);
3779: }
1.27 daniel 3780: }
3781: /*
3782: * Last case, text. Note that References are handled directly.
3783: */
3784: else {
1.45 daniel 3785: xmlParseCharData(ctxt, 0);
1.3 veillard 3786: }
1.14 veillard 3787:
3788: /*
1.45 daniel 3789: * Pop-up of finished entities.
1.14 veillard 3790: */
1.69 daniel 3791: while ((CUR == 0) && (ctxt->inputNr > 1))
3792: xmlPopInput(ctxt);
1.45 daniel 3793:
1.40 daniel 3794: if (test == CUR_PTR) {
1.55 daniel 3795: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3796: ctxt->sax->error(ctxt,
3797: "detected an error in element content\n");
3798: ctxt->wellFormed = 0;
1.29 daniel 3799: break;
3800: }
1.3 veillard 3801: }
1.2 veillard 3802: }
3803:
1.50 daniel 3804: /**
3805: * xmlParseElement:
3806: * @ctxt: an XML parser context
3807: *
3808: * parse an XML element, this is highly recursive
1.26 daniel 3809: *
3810: * [39] element ::= EmptyElemTag | STag content ETag
3811: *
3812: * [41] Attribute ::= Name Eq AttValue
1.68 daniel 3813: *
3814: * Returns the XML new node or NULL
1.2 veillard 3815: */
1.26 daniel 3816:
1.2 veillard 3817:
1.69 daniel 3818: xmlNodePtr
3819: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 3820: xmlNodePtr ret;
1.40 daniel 3821: const CHAR *openTag = CUR_PTR;
1.32 daniel 3822: xmlParserNodeInfo node_info;
1.27 daniel 3823: CHAR *endTag;
1.34 daniel 3824: xmlNsPtr endNs;
1.2 veillard 3825:
1.32 daniel 3826: /* Capture start position */
1.40 daniel 3827: node_info.begin_pos = CUR_PTR - ctxt->input->base;
3828: node_info.begin_line = ctxt->input->line;
1.32 daniel 3829:
1.16 daniel 3830: ret = xmlParseStartTag(ctxt);
1.3 veillard 3831: if (ret == NULL) {
3832: return(NULL);
3833: }
1.2 veillard 3834:
3835: /*
3836: * Check for an Empty Element.
3837: */
1.40 daniel 3838: if ((CUR == '/') && (NXT(1) == '>')) {
3839: SKIP(2);
1.45 daniel 3840: if (ctxt->sax != NULL)
3841: ctxt->sax->endElement(ctxt, ret->name);
3842:
3843: /*
3844: * end of parsing of this node.
3845: */
3846: nodePop(ctxt);
3847:
1.2 veillard 3848: return(ret);
3849: }
1.40 daniel 3850: if (CUR == '>') NEXT;
1.2 veillard 3851: else {
1.55 daniel 3852: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3853: ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",
3854: openTag);
1.59 daniel 3855: ctxt->wellFormed = 0;
1.45 daniel 3856:
3857: /*
3858: * end of parsing of this node.
3859: */
3860: nodePop(ctxt);
3861:
1.16 daniel 3862: return(NULL);
1.2 veillard 3863: }
3864:
3865: /*
3866: * Parse the content of the element:
3867: */
1.45 daniel 3868: xmlParseContent(ctxt);
1.40 daniel 3869: if (!IS_CHAR(CUR)) {
1.55 daniel 3870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3871: ctxt->sax->error(ctxt,
3872: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 3873: ctxt->wellFormed = 0;
1.45 daniel 3874:
3875: /*
3876: * end of parsing of this node.
3877: */
3878: nodePop(ctxt);
3879:
1.16 daniel 3880: return(NULL);
1.2 veillard 3881: }
3882:
3883: /*
1.27 daniel 3884: * parse the end of tag: '</' should be here.
1.2 veillard 3885: */
1.34 daniel 3886: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 3887:
1.27 daniel 3888: /*
3889: * Check that the Name in the ETag is the same as in the STag.
3890: */
1.34 daniel 3891: if (endNs != ret->ns) {
1.55 daniel 3892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3893: ctxt->sax->error(ctxt,
1.43 daniel 3894: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 3895: openTag, endTag);
1.59 daniel 3896: ctxt->wellFormed = 0;
1.27 daniel 3897: }
1.32 daniel 3898: if (endTag == NULL ) {
1.55 daniel 3899: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3900: ctxt->sax->error(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.59 daniel 3901: ctxt->wellFormed = 0;
1.45 daniel 3902: } else if (xmlStrcmp(ret->name, endTag)) {
1.55 daniel 3903: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3904: ctxt->sax->error(ctxt,
1.31 daniel 3905: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
3906: openTag, endTag);
1.59 daniel 3907: ctxt->wellFormed = 0;
1.27 daniel 3908: }
1.44 daniel 3909: /*
3910: * SAX: End of Tag
3911: */
3912: else if (ctxt->sax != NULL)
3913: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 3914:
1.44 daniel 3915: if (endTag != NULL)
3916: free(endTag);
1.2 veillard 3917:
1.32 daniel 3918: /* Capture end position and add node */
3919: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 3920: node_info.end_pos = CUR_PTR - ctxt->input->base;
3921: node_info.end_line = ctxt->input->line;
1.32 daniel 3922: node_info.node = ret;
3923: xmlParserAddNodeInfo(ctxt, &node_info);
3924: }
1.43 daniel 3925:
3926: /*
3927: * end of parsing of this node.
3928: */
3929: nodePop(ctxt);
3930:
1.2 veillard 3931: return(ret);
3932: }
3933:
1.50 daniel 3934: /**
3935: * xmlParseVersionNum:
3936: * @ctxt: an XML parser context
3937: *
3938: * parse the XML version value.
1.29 daniel 3939: *
3940: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 3941: *
3942: * Returns the string giving the XML version number, or NULL
1.29 daniel 3943: */
1.55 daniel 3944: CHAR *
3945: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 3946: const CHAR *q = CUR_PTR;
1.29 daniel 3947: CHAR *ret;
3948:
1.40 daniel 3949: while (IS_CHAR(CUR) &&
3950: (((CUR >= 'a') && (CUR <= 'z')) ||
3951: ((CUR >= 'A') && (CUR <= 'Z')) ||
3952: ((CUR >= '0') && (CUR <= '9')) ||
3953: (CUR == '_') || (CUR == '.') ||
3954: (CUR == ':') || (CUR == '-'))) NEXT;
3955: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3956: return(ret);
3957: }
3958:
1.50 daniel 3959: /**
3960: * xmlParseVersionInfo:
3961: * @ctxt: an XML parser context
3962: *
3963: * parse the XML version.
1.29 daniel 3964: *
3965: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3966: *
3967: * [25] Eq ::= S? '=' S?
1.50 daniel 3968: *
1.68 daniel 3969: * Returns the version string, e.g. "1.0"
1.29 daniel 3970: */
3971:
1.55 daniel 3972: CHAR *
3973: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 3974: CHAR *version = NULL;
3975: const CHAR *q;
3976:
1.40 daniel 3977: if ((CUR == 'v') && (NXT(1) == 'e') &&
3978: (NXT(2) == 'r') && (NXT(3) == 's') &&
3979: (NXT(4) == 'i') && (NXT(5) == 'o') &&
3980: (NXT(6) == 'n')) {
3981: SKIP(7);
1.42 daniel 3982: SKIP_BLANKS;
1.40 daniel 3983: if (CUR != '=') {
1.55 daniel 3984: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3985: ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 3986: ctxt->wellFormed = 0;
1.31 daniel 3987: return(NULL);
3988: }
1.40 daniel 3989: NEXT;
1.42 daniel 3990: SKIP_BLANKS;
1.40 daniel 3991: if (CUR == '"') {
3992: NEXT;
3993: q = CUR_PTR;
1.29 daniel 3994: version = xmlParseVersionNum(ctxt);
1.55 daniel 3995: if (CUR != '"') {
3996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3997: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3998: ctxt->wellFormed = 0;
1.55 daniel 3999: } else
1.40 daniel 4000: NEXT;
4001: } else if (CUR == '\''){
4002: NEXT;
4003: q = CUR_PTR;
1.29 daniel 4004: version = xmlParseVersionNum(ctxt);
1.55 daniel 4005: if (CUR != '\'') {
4006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 4008: ctxt->wellFormed = 0;
1.55 daniel 4009: } else
1.40 daniel 4010: NEXT;
1.31 daniel 4011: } else {
1.55 daniel 4012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 4013: ctxt->sax->error(ctxt,
4014: "xmlParseVersionInfo : expected ' or \"\n");
4015: ctxt->wellFormed = 0;
1.29 daniel 4016: }
4017: }
4018: return(version);
4019: }
4020:
1.50 daniel 4021: /**
4022: * xmlParseEncName:
4023: * @ctxt: an XML parser context
4024: *
4025: * parse the XML encoding name
1.29 daniel 4026: *
4027: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 4028: *
1.68 daniel 4029: * Returns the encoding name value or NULL
1.29 daniel 4030: */
1.55 daniel 4031: CHAR *
4032: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 4033: const CHAR *q = CUR_PTR;
1.29 daniel 4034: CHAR *ret = NULL;
4035:
1.40 daniel 4036: if (((CUR >= 'a') && (CUR <= 'z')) ||
4037: ((CUR >= 'A') && (CUR <= 'Z'))) {
4038: NEXT;
4039: while (IS_CHAR(CUR) &&
4040: (((CUR >= 'a') && (CUR <= 'z')) ||
4041: ((CUR >= 'A') && (CUR <= 'Z')) ||
4042: ((CUR >= '0') && (CUR <= '9')) ||
4043: (CUR == '-'))) NEXT;
4044: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 4045: } else {
1.55 daniel 4046: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047: ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
1.59 daniel 4048: ctxt->wellFormed = 0;
1.29 daniel 4049: }
4050: return(ret);
4051: }
4052:
1.50 daniel 4053: /**
4054: * xmlParseEncodingDecl:
4055: * @ctxt: an XML parser context
4056: *
4057: * parse the XML encoding declaration
1.29 daniel 4058: *
4059: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 4060: *
4061: * TODO: this should setup the conversion filters.
4062: *
1.68 daniel 4063: * Returns the encoding value or NULL
1.29 daniel 4064: */
4065:
1.55 daniel 4066: CHAR *
4067: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4068: CHAR *encoding = NULL;
4069: const CHAR *q;
4070:
1.42 daniel 4071: SKIP_BLANKS;
1.40 daniel 4072: if ((CUR == 'e') && (NXT(1) == 'n') &&
4073: (NXT(2) == 'c') && (NXT(3) == 'o') &&
4074: (NXT(4) == 'd') && (NXT(5) == 'i') &&
4075: (NXT(6) == 'n') && (NXT(7) == 'g')) {
4076: SKIP(8);
1.42 daniel 4077: SKIP_BLANKS;
1.40 daniel 4078: if (CUR != '=') {
1.55 daniel 4079: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4080: ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 4081: ctxt->wellFormed = 0;
1.31 daniel 4082: return(NULL);
4083: }
1.40 daniel 4084: NEXT;
1.42 daniel 4085: SKIP_BLANKS;
1.40 daniel 4086: if (CUR == '"') {
4087: NEXT;
4088: q = CUR_PTR;
1.29 daniel 4089: encoding = xmlParseEncName(ctxt);
1.55 daniel 4090: if (CUR != '"') {
4091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 4093: ctxt->wellFormed = 0;
1.55 daniel 4094: } else
1.40 daniel 4095: NEXT;
4096: } else if (CUR == '\''){
4097: NEXT;
4098: q = CUR_PTR;
1.29 daniel 4099: encoding = xmlParseEncName(ctxt);
1.55 daniel 4100: if (CUR != '\'') {
4101: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 4103: ctxt->wellFormed = 0;
1.55 daniel 4104: } else
1.40 daniel 4105: NEXT;
4106: } else if (CUR == '"'){
1.55 daniel 4107: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 4108: ctxt->sax->error(ctxt,
4109: "xmlParseEncodingDecl : expected ' or \"\n");
4110: ctxt->wellFormed = 0;
1.29 daniel 4111: }
4112: }
4113: return(encoding);
4114: }
4115:
1.50 daniel 4116: /**
4117: * xmlParseSDDecl:
4118: * @ctxt: an XML parser context
4119: *
4120: * parse the XML standalone declaration
1.29 daniel 4121: *
4122: * [32] SDDecl ::= S 'standalone' Eq
4123: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.68 daniel 4124: *
4125: * Returns 1 if standalone, 0 otherwise
1.29 daniel 4126: */
4127:
1.55 daniel 4128: int
4129: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4130: int standalone = -1;
4131:
1.42 daniel 4132: SKIP_BLANKS;
1.40 daniel 4133: if ((CUR == 's') && (NXT(1) == 't') &&
4134: (NXT(2) == 'a') && (NXT(3) == 'n') &&
4135: (NXT(4) == 'd') && (NXT(5) == 'a') &&
4136: (NXT(6) == 'l') && (NXT(7) == 'o') &&
4137: (NXT(8) == 'n') && (NXT(9) == 'e')) {
4138: SKIP(10);
4139: if (CUR != '=') {
1.55 daniel 4140: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 4141: ctxt->sax->error(ctxt,
4142: "XML standalone declaration : expected '='\n");
4143: ctxt->wellFormed = 0;
1.32 daniel 4144: return(standalone);
4145: }
1.40 daniel 4146: NEXT;
1.42 daniel 4147: SKIP_BLANKS;
1.40 daniel 4148: if (CUR == '\''){
4149: NEXT;
4150: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4151: standalone = 0;
1.40 daniel 4152: SKIP(2);
4153: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4154: (NXT(2) == 's')) {
1.29 daniel 4155: standalone = 1;
1.40 daniel 4156: SKIP(3);
1.29 daniel 4157: } else {
1.55 daniel 4158: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4159: ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 4160: ctxt->wellFormed = 0;
1.29 daniel 4161: }
1.55 daniel 4162: if (CUR != '\'') {
4163: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4164: ctxt->sax->error(ctxt, "String not closed\n");
1.59 daniel 4165: ctxt->wellFormed = 0;
1.55 daniel 4166: } else
1.40 daniel 4167: NEXT;
4168: } else if (CUR == '"'){
4169: NEXT;
4170: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4171: standalone = 0;
1.40 daniel 4172: SKIP(2);
4173: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4174: (NXT(2) == 's')) {
1.29 daniel 4175: standalone = 1;
1.40 daniel 4176: SKIP(3);
1.29 daniel 4177: } else {
1.55 daniel 4178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 4179: ctxt->sax->error(ctxt,
4180: "standalone accepts only 'yes' or 'no'\n");
4181: ctxt->wellFormed = 0;
1.29 daniel 4182: }
1.55 daniel 4183: if (CUR != '"') {
4184: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4185: ctxt->sax->error(ctxt, "String not closed\n");
1.59 daniel 4186: ctxt->wellFormed = 0;
1.55 daniel 4187: } else
1.40 daniel 4188: NEXT;
1.37 daniel 4189: } else {
1.55 daniel 4190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4191: ctxt->sax->error(ctxt, "Standalone value not found\n");
1.59 daniel 4192: ctxt->wellFormed = 0;
1.37 daniel 4193: }
1.29 daniel 4194: }
4195: return(standalone);
4196: }
4197:
1.50 daniel 4198: /**
4199: * xmlParseXMLDecl:
4200: * @ctxt: an XML parser context
4201: *
4202: * parse an XML declaration header
1.29 daniel 4203: *
4204: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 4205: */
4206:
1.55 daniel 4207: void
4208: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 4209: CHAR *version;
4210:
4211: /*
1.19 daniel 4212: * We know that '<?xml' is here.
1.1 veillard 4213: */
1.40 daniel 4214: SKIP(5);
1.1 veillard 4215:
1.59 daniel 4216: if (!IS_BLANK(CUR)) {
4217: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4218: ctxt->sax->error(ctxt, "Blank needed after '<?xml'\n");
4219: ctxt->wellFormed = 0;
4220: }
1.42 daniel 4221: SKIP_BLANKS;
1.1 veillard 4222:
4223: /*
1.29 daniel 4224: * We should have the VersionInfo here.
1.1 veillard 4225: */
1.29 daniel 4226: version = xmlParseVersionInfo(ctxt);
4227: if (version == NULL)
1.45 daniel 4228: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4229: ctxt->doc = xmlNewDoc(version);
4230: free(version);
1.29 daniel 4231:
4232: /*
4233: * We may have the encoding declaration
4234: */
1.59 daniel 4235: if (!IS_BLANK(CUR)) {
4236: if ((CUR == '?') && (NXT(1) == '>')) {
4237: SKIP(2);
4238: return;
4239: }
4240: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4241: ctxt->sax->error(ctxt, "Blank needed here\n");
4242: ctxt->wellFormed = 0;
4243: }
1.32 daniel 4244: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 4245:
4246: /*
1.29 daniel 4247: * We may have the standalone status.
1.1 veillard 4248: */
1.59 daniel 4249: if ((ctxt->doc->encoding != NULL) && (!IS_BLANK(CUR))) {
4250: if ((CUR == '?') && (NXT(1) == '>')) {
4251: SKIP(2);
4252: return;
4253: }
4254: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4255: ctxt->sax->error(ctxt, "Blank needed here\n");
4256: ctxt->wellFormed = 0;
4257: }
4258: SKIP_BLANKS;
1.32 daniel 4259: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 4260:
1.42 daniel 4261: SKIP_BLANKS;
1.40 daniel 4262: if ((CUR == '?') && (NXT(1) == '>')) {
4263: SKIP(2);
4264: } else if (CUR == '>') {
1.31 daniel 4265: /* Deprecated old WD ... */
1.55 daniel 4266: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4267: ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
1.59 daniel 4268: ctxt->wellFormed = 0;
1.40 daniel 4269: NEXT;
1.29 daniel 4270: } else {
1.55 daniel 4271: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4272: ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
1.59 daniel 4273: ctxt->wellFormed = 0;
1.40 daniel 4274: MOVETO_ENDTAG(CUR_PTR);
4275: NEXT;
1.29 daniel 4276: }
1.1 veillard 4277: }
4278:
1.50 daniel 4279: /**
4280: * xmlParseMisc:
4281: * @ctxt: an XML parser context
4282: *
4283: * parse an XML Misc* optionnal field.
1.21 daniel 4284: *
1.22 daniel 4285: * [27] Misc ::= Comment | PI | S
1.1 veillard 4286: */
4287:
1.55 daniel 4288: void
4289: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 4290: while (((CUR == '<') && (NXT(1) == '?')) ||
4291: ((CUR == '<') && (NXT(1) == '!') &&
4292: (NXT(2) == '-') && (NXT(3) == '-')) ||
4293: IS_BLANK(CUR)) {
4294: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 4295: xmlParsePI(ctxt);
1.40 daniel 4296: } else if (IS_BLANK(CUR)) {
4297: NEXT;
1.1 veillard 4298: } else
1.31 daniel 4299: xmlParseComment(ctxt, 0);
1.1 veillard 4300: }
4301: }
4302:
1.50 daniel 4303: /**
4304: * xmlParseDocument :
4305: * @ctxt: an XML parser context
4306: *
4307: * parse an XML document (and build a tree if using the standard SAX
4308: * interface).
1.21 daniel 4309: *
1.22 daniel 4310: * [1] document ::= prolog element Misc*
1.29 daniel 4311: *
4312: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 4313: *
1.68 daniel 4314: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 4315: * as a result of the parsing.
1.1 veillard 4316: */
4317:
1.55 daniel 4318: int
4319: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 4320: xmlDefaultSAXHandlerInit();
4321:
1.14 veillard 4322: /*
1.44 daniel 4323: * SAX: beginning of the document processing.
4324: */
4325: if (ctxt->sax)
4326: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
4327: if (ctxt->sax)
4328: ctxt->sax->startDocument(ctxt);
4329:
4330: /*
1.14 veillard 4331: * We should check for encoding here and plug-in some
4332: * conversion code TODO !!!!
4333: */
1.1 veillard 4334:
4335: /*
4336: * Wipe out everything which is before the first '<'
4337: */
1.59 daniel 4338: if (IS_BLANK(CUR)) {
4339: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4340: ctxt->sax->error(ctxt,
4341: "Extra spaces at the beginning of the document are not allowed\n");
4342: ctxt->wellFormed = 0;
4343: SKIP_BLANKS;
4344: }
4345:
4346: if (CUR == 0) {
4347: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4348: ctxt->sax->error(ctxt, "Document is empty\n");
4349: ctxt->wellFormed = 0;
4350: }
1.1 veillard 4351:
4352: /*
4353: * Check for the XMLDecl in the Prolog.
4354: */
1.40 daniel 4355: if ((CUR == '<') && (NXT(1) == '?') &&
4356: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4357: (NXT(4) == 'l')) {
1.19 daniel 4358: xmlParseXMLDecl(ctxt);
4359: /* SKIP_EOL(cur); */
1.42 daniel 4360: SKIP_BLANKS;
1.40 daniel 4361: } else if ((CUR == '<') && (NXT(1) == '?') &&
4362: (NXT(2) == 'X') && (NXT(3) == 'M') &&
4363: (NXT(4) == 'L')) {
1.19 daniel 4364: /*
4365: * The first drafts were using <?XML and the final W3C REC
4366: * now use <?xml ...
4367: */
1.16 daniel 4368: xmlParseXMLDecl(ctxt);
1.1 veillard 4369: /* SKIP_EOL(cur); */
1.42 daniel 4370: SKIP_BLANKS;
1.1 veillard 4371: } else {
1.45 daniel 4372: CHAR *version;
4373:
4374: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4375: ctxt->doc = xmlNewDoc(version);
4376: free(version);
1.1 veillard 4377: }
4378:
4379: /*
4380: * The Misc part of the Prolog
4381: */
1.16 daniel 4382: xmlParseMisc(ctxt);
1.1 veillard 4383:
4384: /*
1.29 daniel 4385: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 4386: * (doctypedecl Misc*)?
4387: */
1.40 daniel 4388: if ((CUR == '<') && (NXT(1) == '!') &&
4389: (NXT(2) == 'D') && (NXT(3) == 'O') &&
4390: (NXT(4) == 'C') && (NXT(5) == 'T') &&
4391: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
4392: (NXT(8) == 'E')) {
1.22 daniel 4393: xmlParseDocTypeDecl(ctxt);
4394: xmlParseMisc(ctxt);
1.21 daniel 4395: }
4396:
4397: /*
4398: * Time to start parsing the tree itself
1.1 veillard 4399: */
1.45 daniel 4400: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 4401:
4402: /*
4403: * The Misc part at the end
4404: */
4405: xmlParseMisc(ctxt);
1.16 daniel 4406:
1.59 daniel 4407: if (CUR != 0) {
4408: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4409: ctxt->sax->error(ctxt,
4410: "Extra content at the end of the document\n");
4411: ctxt->wellFormed = 0;
4412: }
4413:
1.44 daniel 4414: /*
4415: * SAX: end of the document processing.
4416: */
4417: if (ctxt->sax)
4418: ctxt->sax->endDocument(ctxt);
1.59 daniel 4419: if (! ctxt->wellFormed) return(-1);
1.16 daniel 4420: return(0);
4421: }
4422:
1.50 daniel 4423: /**
1.69 daniel 4424: * xmlCreateFileParserCtxt :
1.50 daniel 4425: * @cur: a pointer to an array of CHAR
4426: *
1.69 daniel 4427: * Create a parser context for an XML in-memory document.
4428: *
4429: * Returns the new parser context or NULL
1.16 daniel 4430: */
1.69 daniel 4431: xmlParserCtxtPtr
4432: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 4433: xmlParserCtxtPtr ctxt;
1.40 daniel 4434: xmlParserInputPtr input;
1.16 daniel 4435:
4436: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4437: if (ctxt == NULL) {
4438: perror("malloc");
4439: return(NULL);
4440: }
1.40 daniel 4441: xmlInitParserCtxt(ctxt);
4442: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4443: if (input == NULL) {
4444: perror("malloc");
4445: free(ctxt);
4446: return(NULL);
4447: }
4448:
4449: input->filename = NULL;
4450: input->line = 1;
4451: input->col = 1;
4452: input->base = cur;
4453: input->cur = cur;
1.69 daniel 4454: input->free = NULL;
1.40 daniel 4455:
4456: inputPush(ctxt, input);
1.69 daniel 4457: return(ctxt);
4458: }
4459:
4460: /**
4461: * xmlSAXParseDoc :
4462: * @sax: the SAX handler block
4463: * @cur: a pointer to an array of CHAR
4464: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4465: * documents
4466: *
4467: * parse an XML in-memory document and build a tree.
4468: * It use the given SAX function block to handle the parsing callback.
4469: * If sax is NULL, fallback to the default DOM tree building routines.
4470: *
4471: * Returns the resulting document tree
4472: */
4473:
4474: xmlDocPtr
4475: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
4476: xmlDocPtr ret;
4477: xmlParserCtxtPtr ctxt;
4478:
4479: if (cur == NULL) return(NULL);
1.16 daniel 4480:
4481:
1.69 daniel 4482: ctxt = xmlCreateDocParserCtxt(cur);
4483: if (ctxt == NULL) return(NULL);
4484: if (sax != NULL) ctxt->sax = sax;
4485:
1.16 daniel 4486: xmlParseDocument(ctxt);
1.59 daniel 4487: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4488: else {
4489: ret = NULL;
4490: xmlFreeDoc(ctxt->doc);
4491: ctxt->doc = NULL;
4492: }
1.69 daniel 4493: xmlFreeParserCtxt(ctxt);
1.16 daniel 4494:
1.1 veillard 4495: return(ret);
4496: }
4497:
1.50 daniel 4498: /**
1.55 daniel 4499: * xmlParseDoc :
4500: * @cur: a pointer to an array of CHAR
4501: *
4502: * parse an XML in-memory document and build a tree.
4503: *
1.68 daniel 4504: * Returns the resulting document tree
1.55 daniel 4505: */
4506:
1.69 daniel 4507: xmlDocPtr
4508: xmlParseDoc(CHAR *cur) {
1.59 daniel 4509: return(xmlSAXParseDoc(NULL, cur, 0));
4510: }
4511:
4512: /**
4513: * xmlRecoverDoc :
4514: * @cur: a pointer to an array of CHAR
4515: *
4516: * parse an XML in-memory document and build a tree.
4517: * In the case the document is not Well Formed, a tree is built anyway
4518: *
1.68 daniel 4519: * Returns the resulting document tree
1.59 daniel 4520: */
4521:
1.69 daniel 4522: xmlDocPtr
4523: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 4524: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 4525: }
4526:
4527: /**
1.69 daniel 4528: * xmlCreateFileParserCtxt :
1.50 daniel 4529: * @filename: the filename
4530: *
1.69 daniel 4531: * Create a parser context for a file content.
4532: * Automatic support for ZLIB/Compress compressed document is provided
4533: * by default if found at compile-time.
1.50 daniel 4534: *
1.69 daniel 4535: * Returns the new parser context or NULL
1.9 httpng 4536: */
1.69 daniel 4537: xmlParserCtxtPtr
4538: xmlCreateFileParserCtxt(const char *filename)
4539: {
4540: xmlParserCtxtPtr ctxt;
1.20 daniel 4541: #ifdef HAVE_ZLIB_H
4542: gzFile input;
4543: #else
1.9 httpng 4544: int input;
1.20 daniel 4545: #endif
1.9 httpng 4546: int res;
1.55 daniel 4547: int len;
1.9 httpng 4548: struct stat buf;
4549: char *buffer;
1.40 daniel 4550: xmlParserInputPtr inputStream;
1.9 httpng 4551:
1.11 veillard 4552: res = stat(filename, &buf);
1.9 httpng 4553: if (res < 0) return(NULL);
4554:
1.20 daniel 4555: #ifdef HAVE_ZLIB_H
1.55 daniel 4556: len = (buf.st_size * 8) + 1000;
1.20 daniel 4557: retry_bigger:
1.55 daniel 4558: buffer = malloc(len);
1.20 daniel 4559: #else
1.55 daniel 4560: len = buf.st_size + 100;
4561: buffer = malloc(len);
1.20 daniel 4562: #endif
1.9 httpng 4563: if (buffer == NULL) {
4564: perror("malloc");
4565: return(NULL);
4566: }
4567:
1.55 daniel 4568: memset(buffer, 0, len);
1.20 daniel 4569: #ifdef HAVE_ZLIB_H
4570: input = gzopen (filename, "r");
4571: if (input == NULL) {
4572: fprintf (stderr, "Cannot read file %s :\n", filename);
4573: perror ("gzopen failed");
4574: return(NULL);
4575: }
4576: #else
1.9 httpng 4577: input = open (filename, O_RDONLY);
4578: if (input < 0) {
4579: fprintf (stderr, "Cannot read file %s :\n", filename);
4580: perror ("open failed");
4581: return(NULL);
4582: }
1.20 daniel 4583: #endif
4584: #ifdef HAVE_ZLIB_H
1.55 daniel 4585: res = gzread(input, buffer, len);
1.20 daniel 4586: #else
1.9 httpng 4587: res = read(input, buffer, buf.st_size);
1.20 daniel 4588: #endif
1.9 httpng 4589: if (res < 0) {
4590: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 4591: #ifdef HAVE_ZLIB_H
4592: perror ("gzread failed");
4593: #else
1.9 httpng 4594: perror ("read failed");
1.20 daniel 4595: #endif
1.9 httpng 4596: return(NULL);
4597: }
1.20 daniel 4598: #ifdef HAVE_ZLIB_H
4599: gzclose(input);
1.55 daniel 4600: if (res >= len) {
1.20 daniel 4601: free(buffer);
1.55 daniel 4602: len *= 2;
1.20 daniel 4603: goto retry_bigger;
4604: }
4605: buf.st_size = res;
4606: #else
1.9 httpng 4607: close(input);
1.20 daniel 4608: #endif
4609:
1.40 daniel 4610: buffer[buf.st_size] = '\0';
1.9 httpng 4611:
1.16 daniel 4612: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4613: if (ctxt == NULL) {
4614: perror("malloc");
4615: return(NULL);
4616: }
1.40 daniel 4617: xmlInitParserCtxt(ctxt);
4618: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4619: if (inputStream == NULL) {
4620: perror("malloc");
4621: free(ctxt);
4622: return(NULL);
4623: }
4624:
4625: inputStream->filename = strdup(filename);
4626: inputStream->line = 1;
4627: inputStream->col = 1;
1.45 daniel 4628:
4629: /*
4630: * TODO : plug some encoding conversion routines here. !!!
4631: */
1.40 daniel 4632: inputStream->base = buffer;
4633: inputStream->cur = buffer;
1.69 daniel 4634: inputStream->free = (xmlParserInputDeallocate) free;
1.16 daniel 4635:
1.40 daniel 4636: inputPush(ctxt, inputStream);
1.69 daniel 4637: return(ctxt);
4638: }
4639:
4640: /**
4641: * xmlSAXParseFile :
4642: * @sax: the SAX handler block
4643: * @filename: the filename
4644: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4645: * documents
4646: *
4647: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4648: * compressed document is provided by default if found at compile-time.
4649: * It use the given SAX function block to handle the parsing callback.
4650: * If sax is NULL, fallback to the default DOM tree building routines.
4651: *
4652: * Returns the resulting document tree
4653: */
4654:
4655: xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
4656: int recovery) {
4657: xmlDocPtr ret;
4658: xmlParserCtxtPtr ctxt;
4659:
4660: ctxt = xmlCreateFileParserCtxt(filename);
4661: if (ctxt == NULL) return(NULL);
4662: if (sax != NULL) ctxt->sax = sax;
1.16 daniel 4663:
4664: xmlParseDocument(ctxt);
1.40 daniel 4665:
1.59 daniel 4666: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4667: else {
4668: ret = NULL;
4669: xmlFreeDoc(ctxt->doc);
4670: ctxt->doc = NULL;
4671: }
1.69 daniel 4672: xmlFreeParserCtxt(ctxt);
1.20 daniel 4673:
4674: return(ret);
4675: }
4676:
1.55 daniel 4677: /**
4678: * xmlParseFile :
4679: * @filename: the filename
4680: *
4681: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4682: * compressed document is provided by default if found at compile-time.
4683: *
1.68 daniel 4684: * Returns the resulting document tree
1.55 daniel 4685: */
4686:
4687: xmlDocPtr xmlParseFile(const char *filename) {
1.59 daniel 4688: return(xmlSAXParseFile(NULL, filename, 0));
4689: }
4690:
4691: /**
4692: * xmlRecoverFile :
4693: * @filename: the filename
4694: *
4695: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4696: * compressed document is provided by default if found at compile-time.
4697: * In the case the document is not Well Formed, a tree is built anyway
4698: *
1.68 daniel 4699: * Returns the resulting document tree
1.59 daniel 4700: */
4701:
4702: xmlDocPtr xmlRecoverFile(const char *filename) {
4703: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 4704: }
1.32 daniel 4705:
1.50 daniel 4706: /**
1.69 daniel 4707: * xmlCreateMemoryParserCtxt :
1.68 daniel 4708: * @buffer: an pointer to a char array
1.50 daniel 4709: * @size: the siwe of the array
4710: *
1.69 daniel 4711: * Create a parser context for an XML in-memory document.
1.50 daniel 4712: *
1.69 daniel 4713: * Returns the new parser context or NULL
1.20 daniel 4714: */
1.69 daniel 4715: xmlParserCtxtPtr
4716: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 4717: xmlParserCtxtPtr ctxt;
1.40 daniel 4718: xmlParserInputPtr input;
4719:
4720: buffer[size - 1] = '\0';
4721:
1.20 daniel 4722: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4723: if (ctxt == NULL) {
4724: perror("malloc");
4725: return(NULL);
4726: }
1.40 daniel 4727: xmlInitParserCtxt(ctxt);
4728: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4729: if (input == NULL) {
4730: perror("malloc");
1.50 daniel 4731: free(ctxt->nodeTab);
4732: free(ctxt->inputTab);
1.40 daniel 4733: free(ctxt);
4734: return(NULL);
4735: }
1.20 daniel 4736:
1.40 daniel 4737: input->filename = NULL;
4738: input->line = 1;
4739: input->col = 1;
1.45 daniel 4740:
4741: /*
4742: * TODO : plug some encoding conversion routines here. !!!
4743: */
1.40 daniel 4744: input->base = buffer;
4745: input->cur = buffer;
1.69 daniel 4746: input->free = NULL;
1.20 daniel 4747:
1.40 daniel 4748: inputPush(ctxt, input);
1.69 daniel 4749: return(ctxt);
4750: }
4751:
4752: /**
4753: * xmlSAXParseMemory :
4754: * @sax: the SAX handler block
4755: * @buffer: an pointer to a char array
4756: * @size: the siwe of the array
4757: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4758: * documents
4759: *
4760: * parse an XML in-memory block and use the given SAX function block
4761: * to handle the parsing callback. If sax is NULL, fallback to the default
4762: * DOM tree building routines.
4763: *
4764: * TODO : plug some encoding conversion routines here. !!!
4765: *
4766: * Returns the resulting document tree
4767: */
4768: xmlDocPtr
4769: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
4770: xmlDocPtr ret;
4771: xmlParserCtxtPtr ctxt;
4772:
4773: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
4774: if (ctxt == NULL) return(NULL);
4775: if (sax != NULL) ctxt->sax = sax;
1.20 daniel 4776:
4777: xmlParseDocument(ctxt);
1.40 daniel 4778:
1.59 daniel 4779: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4780: else {
4781: ret = NULL;
4782: xmlFreeDoc(ctxt->doc);
4783: ctxt->doc = NULL;
4784: }
1.69 daniel 4785: xmlFreeParserCtxt(ctxt);
1.16 daniel 4786:
1.9 httpng 4787: return(ret);
1.17 daniel 4788: }
4789:
1.55 daniel 4790: /**
4791: * xmlParseMemory :
1.68 daniel 4792: * @buffer: an pointer to a char array
1.55 daniel 4793: * @size: the size of the array
4794: *
4795: * parse an XML in-memory block and build a tree.
4796: *
1.68 daniel 4797: * Returns the resulting document tree
1.55 daniel 4798: */
4799:
4800: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 4801: return(xmlSAXParseMemory(NULL, buffer, size, 0));
4802: }
4803:
4804: /**
4805: * xmlRecoverMemory :
1.68 daniel 4806: * @buffer: an pointer to a char array
1.59 daniel 4807: * @size: the size of the array
4808: *
4809: * parse an XML in-memory block and build a tree.
4810: * In the case the document is not Well Formed, a tree is built anyway
4811: *
1.68 daniel 4812: * Returns the resulting document tree
1.59 daniel 4813: */
4814:
4815: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
4816: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.55 daniel 4817: }
1.17 daniel 4818:
1.50 daniel 4819: /**
4820: * xmlInitParserCtxt:
4821: * @ctxt: an XML parser context
4822: *
4823: * Initialize a parser context
4824: */
4825:
1.55 daniel 4826: void
4827: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4828: {
1.69 daniel 4829: /* Allocate the Input stack */
4830: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
4831: ctxt->inputNr = 0;
4832: ctxt->inputMax = 5;
4833: ctxt->input = NULL;
4834:
4835: /* Allocate the Node stack */
4836: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
4837: ctxt->nodeNr = 0;
4838: ctxt->nodeMax = 10;
4839: ctxt->node = NULL;
4840:
4841: ctxt->sax = &xmlDefaultSAXHandler;
4842: ctxt->doc = NULL;
4843: ctxt->wellFormed = 1;
4844: ctxt->record_info = 0;
4845: xmlInitNodeInfoSeq(&ctxt->node_seq);
4846: }
4847:
4848: /**
4849: * xmlFreeParserCtxt:
4850: * @ctxt: an XML parser context
4851: *
4852: * Free all the memory used by a parser context. However the parsed
4853: * document in ctxt->doc is not freed.
4854: */
4855:
4856: void
4857: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
4858: {
4859: xmlParserInputPtr input;
4860:
4861: if (ctxt == NULL) return;
4862:
4863: while ((input = inputPop(ctxt)) != NULL) {
4864: xmlFreeInputStream(input);
4865: }
4866:
4867: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
4868: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
4869: free(ctxt);
1.17 daniel 4870: }
4871:
1.50 daniel 4872: /**
4873: * xmlClearParserCtxt:
4874: * @ctxt: an XML parser context
4875: *
4876: * Clear (release owned resources) and reinitialize a parser context
4877: */
1.17 daniel 4878:
1.55 daniel 4879: void
4880: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4881: {
1.32 daniel 4882: xmlClearNodeInfoSeq(&ctxt->node_seq);
4883: xmlInitParserCtxt(ctxt);
1.17 daniel 4884: }
4885:
4886:
1.50 daniel 4887: /**
4888: * xmlSetupParserForBuffer:
4889: * @ctxt: an XML parser context
4890: * @buffer: a CHAR * buffer
4891: * @filename: a file name
4892: *
1.19 daniel 4893: * Setup the parser context to parse a new buffer; Clears any prior
4894: * contents from the parser context. The buffer parameter must not be
4895: * NULL, but the filename parameter can be
4896: */
1.55 daniel 4897: void
4898: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 4899: const char* filename)
4900: {
1.40 daniel 4901: xmlParserInputPtr input;
4902:
4903: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4904: if (input == NULL) {
4905: perror("malloc");
4906: free(ctxt);
4907: exit(1);
4908: }
4909:
1.17 daniel 4910: xmlClearParserCtxt(ctxt);
1.40 daniel 4911: if (input->filename != NULL)
4912: input->filename = strdup(filename);
4913: else
4914: input->filename = NULL;
4915: input->line = 1;
4916: input->col = 1;
4917: input->base = buffer;
4918: input->cur = buffer;
4919:
4920: inputPush(ctxt, input);
1.17 daniel 4921: }
4922:
1.32 daniel 4923:
1.50 daniel 4924: /**
4925: * xmlParserFindNodeInfo:
4926: * @ctxt: an XML parser context
4927: * @node: an XML node within the tree
4928: *
4929: * Find the parser node info struct for a given node
4930: *
1.68 daniel 4931: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 4932: */
4933: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
4934: const xmlNode* node)
4935: {
4936: unsigned long pos;
4937:
4938: /* Find position where node should be at */
4939: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
4940: if ( ctx->node_seq.buffer[pos].node == node )
4941: return &ctx->node_seq.buffer[pos];
4942: else
4943: return NULL;
4944: }
4945:
4946:
1.50 daniel 4947: /**
4948: * xmlInitNodeInfoSeq :
4949: * @seq: a node info sequence pointer
4950: *
4951: * -- Initialize (set to initial state) node info sequence
1.32 daniel 4952: */
1.55 daniel 4953: void
4954: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4955: {
4956: seq->length = 0;
4957: seq->maximum = 0;
4958: seq->buffer = NULL;
4959: }
4960:
1.50 daniel 4961: /**
4962: * xmlClearNodeInfoSeq :
4963: * @seq: a node info sequence pointer
4964: *
4965: * -- Clear (release memory and reinitialize) node
1.32 daniel 4966: * info sequence
4967: */
1.55 daniel 4968: void
4969: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4970: {
4971: if ( seq->buffer != NULL )
4972: free(seq->buffer);
4973: xmlInitNodeInfoSeq(seq);
4974: }
4975:
4976:
1.50 daniel 4977: /**
4978: * xmlParserFindNodeInfoIndex:
4979: * @seq: a node info sequence pointer
4980: * @node: an XML node pointer
4981: *
4982: *
1.32 daniel 4983: * xmlParserFindNodeInfoIndex : Find the index that the info record for
4984: * the given node is or should be at in a sorted sequence
1.68 daniel 4985: *
4986: * Returns a long indicating the position of the record
1.32 daniel 4987: */
4988: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
4989: const xmlNode* node)
4990: {
4991: unsigned long upper, lower, middle;
4992: int found = 0;
4993:
4994: /* Do a binary search for the key */
4995: lower = 1;
4996: upper = seq->length;
4997: middle = 0;
4998: while ( lower <= upper && !found) {
4999: middle = lower + (upper - lower) / 2;
5000: if ( node == seq->buffer[middle - 1].node )
5001: found = 1;
5002: else if ( node < seq->buffer[middle - 1].node )
5003: upper = middle - 1;
5004: else
5005: lower = middle + 1;
5006: }
5007:
5008: /* Return position */
5009: if ( middle == 0 || seq->buffer[middle - 1].node < node )
5010: return middle;
5011: else
5012: return middle - 1;
5013: }
5014:
5015:
1.50 daniel 5016: /**
5017: * xmlParserAddNodeInfo:
5018: * @ctxt: an XML parser context
1.68 daniel 5019: * @info: a node info sequence pointer
1.50 daniel 5020: *
5021: * Insert node info record into the sorted sequence
1.32 daniel 5022: */
1.55 daniel 5023: void
5024: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 5025: const xmlParserNodeInfo* info)
1.32 daniel 5026: {
5027: unsigned long pos;
5028: static unsigned int block_size = 5;
5029:
5030: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 5031: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
5032: if ( pos < ctxt->node_seq.length
5033: && ctxt->node_seq.buffer[pos].node == info->node ) {
5034: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 5035: }
5036:
5037: /* Otherwise, we need to add new node to buffer */
5038: else {
5039: /* Expand buffer by 5 if needed */
1.55 daniel 5040: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 5041: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 5042: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
5043: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 5044:
1.55 daniel 5045: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 5046: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
5047: else
1.55 daniel 5048: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 5049:
5050: if ( tmp_buffer == NULL ) {
1.55 daniel 5051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 5052: ctxt->sax->error(ctxt, "Out of memory\n");
1.32 daniel 5053: return;
5054: }
1.55 daniel 5055: ctxt->node_seq.buffer = tmp_buffer;
5056: ctxt->node_seq.maximum += block_size;
1.32 daniel 5057: }
5058:
5059: /* If position is not at end, move elements out of the way */
1.55 daniel 5060: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 5061: unsigned long i;
5062:
1.55 daniel 5063: for ( i = ctxt->node_seq.length; i > pos; i-- )
5064: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 5065: }
5066:
5067: /* Copy element and increase length */
1.55 daniel 5068: ctxt->node_seq.buffer[pos] = *info;
5069: ctxt->node_seq.length++;
1.32 daniel 5070: }
5071: }
Webmaster