Annotation of XML/parser.c, revision 1.63
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.61 daniel 33: #include "valid.h"
1.1 veillard 34:
1.45 daniel 35: /************************************************************************
36: * *
37: * Parser stacks related functions and macros *
38: * *
39: ************************************************************************/
1.1 veillard 40: /*
1.40 daniel 41: * Generic function for accessing stacks in the Parser Context
1.1 veillard 42: */
43:
1.31 daniel 44: #define PUSH_AND_POP(type, name) \
1.40 daniel 45: int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 46: if (ctxt->name##Nr >= ctxt->name##Max) { \
47: ctxt->name##Max *= 2; \
1.40 daniel 48: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
49: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
50: if (ctxt->name##Tab == NULL) { \
1.31 daniel 51: fprintf(stderr, "realloc failed !\n"); \
52: exit(1); \
53: } \
54: } \
1.40 daniel 55: ctxt->name##Tab[ctxt->name##Nr] = value; \
56: ctxt->name = value; \
57: return(ctxt->name##Nr++); \
1.31 daniel 58: } \
1.40 daniel 59: type name##Pop(xmlParserCtxtPtr ctxt) { \
60: if (ctxt->name##Nr <= 0) return(0); \
61: ctxt->name##Nr--; \
1.50 daniel 62: if (ctxt->name##Nr > 0) \
63: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
64: else \
65: ctxt->name = NULL; \
1.40 daniel 66: return(ctxt->name); \
1.31 daniel 67: } \
68:
1.40 daniel 69: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 70: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 71:
1.55 daniel 72: /*
73: * Macros for accessing the content. Those should be used only by the parser,
74: * and not exported.
75: *
76: * Dirty macros, i.e. one need to make assumption on the context to use them
77: *
78: * CUR_PTR return the current pointer to the CHAR to be parsed.
79: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
80: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
81: * in UNICODE mode. This should be used internally by the parser
82: * only to compare to ASCII values otherwise it would break when
83: * running with UTF-8 encoding.
84: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
85: * to compare on ASCII based substring.
86: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
87: * strings within the parser.
88: *
89: * Clean macros, not dependent of an ASCII context.
90: *
91: * CURRENT Returns the current char value, with the full decoding of
92: * UTF-8 if we are using this mode. It returns an int.
93: * NEXT Skip to the next character, this does the proper decoding
94: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
95: * It returns the pointer to the current CHAR.
96: */
1.45 daniel 97:
98: #define CUR (*ctxt->input->cur)
1.55 daniel 99: #define SKIP(val) ctxt->input->cur += (val)
100: #define NXT(val) ctxt->input->cur[(val)]
101: #define CUR_PTR ctxt->input->cur
102:
103: #define SKIP_BLANKS \
104: while (IS_BLANK(*(ctxt->input->cur))) NEXT
105:
106: #ifndef USE_UTF_8
107: #define CURRENT (*ctxt->input->cur)
1.45 daniel 108: #define NEXT ((*ctxt->input->cur) ? \
109: (((*(ctxt->input->cur) == '\n') ? \
110: (ctxt->input->line++, ctxt->input->col = 1) : \
111: (ctxt->input->col++)), ctxt->input->cur++) : \
112: (xmlPopInput(ctxt), ctxt->input->cur))
1.55 daniel 113: #else
114: #endif
1.42 daniel 115:
1.40 daniel 116:
1.50 daniel 117: /**
118: * xmlPopInput:
119: * @ctxt: an XML parser context
120: *
1.40 daniel 121: * xmlPopInput: the current input pointed by ctxt->input came to an end
122: * pop it and return the next char.
1.45 daniel 123: *
124: * TODO A deallocation of the popped Input structure is needed
1.50 daniel 125: * return values: the current CHAR in the parser context
1.40 daniel 126: */
1.55 daniel 127: CHAR
128: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 129: if (ctxt->inputNr == 1) return(0); /* End of main Input */
130: inputPop(ctxt);
131: return(CUR);
132: }
133:
1.50 daniel 134: /**
135: * xmlPushInput:
136: * @ctxt: an XML parser context
137: * @input: an XML parser input fragment (entity, XML fragment ...).
138: *
1.40 daniel 139: * xmlPushInput: switch to a new input stream which is stacked on top
140: * of the previous one(s).
141: */
1.55 daniel 142: void
143: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 144: if (input == NULL) return;
145: inputPush(ctxt, input);
146: }
147:
1.50 daniel 148: /**
149: * xmlNewEntityInputStream:
150: * @ctxt: an XML parser context
151: * @entity: an Entity pointer
152: *
1.45 daniel 153: * Create a new input stream based on a memory buffer.
1.50 daniel 154: * return vakues: the new input stream
1.45 daniel 155: */
1.50 daniel 156: xmlParserInputPtr
157: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 158: xmlParserInputPtr input;
159:
160: if (entity == NULL) {
1.55 daniel 161: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
162: ctxt->sax->error(ctxt,
1.45 daniel 163: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 164: return(NULL);
1.45 daniel 165: }
166: if (entity->content == NULL) {
1.55 daniel 167: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
168: ctxt->sax->error(ctxt,
1.45 daniel 169: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 170: return(NULL);
1.45 daniel 171: }
172: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
173: if (input == NULL) {
1.55 daniel 174: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
175: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 176: return(NULL);
1.45 daniel 177: }
178: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
179: input->base = entity->content;
180: input->cur = entity->content;
181: input->line = 1;
182: input->col = 1;
1.50 daniel 183: return(input);
1.45 daniel 184: }
185:
1.59 daniel 186: /**
187: * xmlNewStringInputStream:
188: * @ctxt: an XML parser context
189: * @entity: an Entity pointer
190: *
191: * Create a new input stream based on a memory buffer.
192: * return vakues: the new input stream
193: */
194: xmlParserInputPtr
195: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *string) {
196: xmlParserInputPtr input;
197:
198: if (string == NULL) {
199: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
200: ctxt->sax->error(ctxt,
201: "internal: xmlNewStringInputStream string = NULL\n");
202: return(NULL);
203: }
204: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
205: if (input == NULL) {
206: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
207: ctxt->sax->error(ctxt, "malloc: couldn't allocate a new input stream\n");
208: return(NULL);
209: }
210: input->filename = NULL;
211: input->base = string;
212: input->cur = string;
213: input->line = 1;
214: input->col = 1;
215: return(input);
216: }
217:
1.45 daniel 218: /*
1.40 daniel 219: * A few macros needed to help building the parser.
220: */
221:
1.1 veillard 222: #ifdef UNICODE
1.30 daniel 223: /************************************************************************
224: * *
225: * UNICODE version of the macros. *
226: * *
227: ************************************************************************/
1.1 veillard 228: /*
1.22 daniel 229: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
230: * | [#x10000-#x10FFFF]
231: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 232: */
233: #define IS_CHAR(c) \
1.59 daniel 234: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
235: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
236: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
237: ((c) <= 0x10FFFF))
1.1 veillard 238:
1.22 daniel 239: /*
240: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
241: */
1.42 daniel 242: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
243: ((c) == 0x0D))
1.1 veillard 244:
1.22 daniel 245: /*
1.30 daniel 246: * [85] BaseChar ::= ... long list see REC ...
1.22 daniel 247: *
1.30 daniel 248: * VI is your friend !
249: * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
250: * and
251: * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/
1.22 daniel 252: */
1.1 veillard 253: #define IS_BASECHAR(c) \
1.30 daniel 254: ((((c) >= 0x0041) && ((c) <= 0x005A)) || \
255: (((c) >= 0x0061) && ((c) <= 0x007A)) || \
256: (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \
257: (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \
258: (((c) >= 0x00F8) && ((c) <= 0x00FF)) || \
259: (((c) >= 0x0100) && ((c) <= 0x0131)) || \
260: (((c) >= 0x0134) && ((c) <= 0x013E)) || \
261: (((c) >= 0x0141) && ((c) <= 0x0148)) || \
262: (((c) >= 0x014A) && ((c) <= 0x017E)) || \
263: (((c) >= 0x0180) && ((c) <= 0x01C3)) || \
264: (((c) >= 0x01CD) && ((c) <= 0x01F0)) || \
265: (((c) >= 0x01F4) && ((c) <= 0x01F5)) || \
266: (((c) >= 0x01FA) && ((c) <= 0x0217)) || \
267: (((c) >= 0x0250) && ((c) <= 0x02A8)) || \
268: (((c) >= 0x02BB) && ((c) <= 0x02C1)) || \
269: ((c) == 0x0386) || \
270: (((c) >= 0x0388) && ((c) <= 0x038A)) || \
271: ((c) == 0x038C) || \
272: (((c) >= 0x038E) && ((c) <= 0x03A1)) || \
273: (((c) >= 0x03A3) && ((c) <= 0x03CE)) || \
274: (((c) >= 0x03D0) && ((c) <= 0x03D6)) || \
275: ((c) == 0x03DA) || \
276: ((c) == 0x03DC) || \
277: ((c) == 0x03DE) || \
278: ((c) == 0x03E0) || \
279: (((c) >= 0x03E2) && ((c) <= 0x03F3)) || \
280: (((c) >= 0x0401) && ((c) <= 0x040C)) || \
281: (((c) >= 0x040E) && ((c) <= 0x044F)) || \
282: (((c) >= 0x0451) && ((c) <= 0x045C)) || \
283: (((c) >= 0x045E) && ((c) <= 0x0481)) || \
284: (((c) >= 0x0490) && ((c) <= 0x04C4)) || \
285: (((c) >= 0x04C7) && ((c) <= 0x04C8)) || \
286: (((c) >= 0x04CB) && ((c) <= 0x04CC)) || \
287: (((c) >= 0x04D0) && ((c) <= 0x04EB)) || \
288: (((c) >= 0x04EE) && ((c) <= 0x04F5)) || \
289: (((c) >= 0x04F8) && ((c) <= 0x04F9)) || \
290: (((c) >= 0x0531) && ((c) <= 0x0556)) || \
291: ((c) == 0x0559) || \
292: (((c) >= 0x0561) && ((c) <= 0x0586)) || \
293: (((c) >= 0x05D0) && ((c) <= 0x05EA)) || \
294: (((c) >= 0x05F0) && ((c) <= 0x05F2)) || \
295: (((c) >= 0x0621) && ((c) <= 0x063A)) || \
296: (((c) >= 0x0641) && ((c) <= 0x064A)) || \
297: (((c) >= 0x0671) && ((c) <= 0x06B7)) || \
298: (((c) >= 0x06BA) && ((c) <= 0x06BE)) || \
299: (((c) >= 0x06C0) && ((c) <= 0x06CE)) || \
300: (((c) >= 0x06D0) && ((c) <= 0x06D3)) || \
301: ((c) == 0x06D5) || \
302: (((c) >= 0x06E5) && ((c) <= 0x06E6)) || \
303: (((c) >= 0x0905) && ((c) <= 0x0939)) || \
304: ((c) == 0x093D) || \
305: (((c) >= 0x0958) && ((c) <= 0x0961)) || \
306: (((c) >= 0x0985) && ((c) <= 0x098C)) || \
307: (((c) >= 0x098F) && ((c) <= 0x0990)) || \
308: (((c) >= 0x0993) && ((c) <= 0x09A8)) || \
309: (((c) >= 0x09AA) && ((c) <= 0x09B0)) || \
310: ((c) == 0x09B2) || \
311: (((c) >= 0x09B6) && ((c) <= 0x09B9)) || \
312: (((c) >= 0x09DC) && ((c) <= 0x09DD)) || \
313: (((c) >= 0x09DF) && ((c) <= 0x09E1)) || \
314: (((c) >= 0x09F0) && ((c) <= 0x09F1)) || \
315: (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || \
316: (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || \
317: (((c) >= 0x0A13) && ((c) <= 0x0A28)) || \
318: (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || \
319: (((c) >= 0x0A32) && ((c) <= 0x0A33)) || \
320: (((c) >= 0x0A35) && ((c) <= 0x0A36)) || \
321: (((c) >= 0x0A38) && ((c) <= 0x0A39)) || \
322: (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || \
323: ((c) == 0x0A5E) || \
324: (((c) >= 0x0A72) && ((c) <= 0x0A74)) || \
325: (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || \
326: ((c) == 0x0A8D) || \
327: (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || \
328: (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || \
329: (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || \
330: (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || \
331: (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || \
332: ((c) == 0x0ABD) || \
333: ((c) == 0x0AE0) || \
334: (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || \
335: (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || \
336: (((c) >= 0x0B13) && ((c) <= 0x0B28)) || \
337: (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || \
338: (((c) >= 0x0B32) && ((c) <= 0x0B33)) || \
339: (((c) >= 0x0B36) && ((c) <= 0x0B39)) || \
340: ((c) == 0x0B3D) || \
341: (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || \
342: (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || \
343: (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || \
344: (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || \
345: (((c) >= 0x0B92) && ((c) <= 0x0B95)) || \
346: (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || \
347: ((c) == 0x0B9C) || \
348: (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || \
349: (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || \
350: (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || \
351: (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || \
352: (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || \
353: (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || \
354: (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || \
355: (((c) >= 0x0C12) && ((c) <= 0x0C28)) || \
356: (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || \
357: (((c) >= 0x0C35) && ((c) <= 0x0C39)) || \
358: (((c) >= 0x0C60) && ((c) <= 0x0C61)) || \
359: (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || \
360: (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || \
361: (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || \
362: (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || \
363: (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || \
364: ((c) == 0x0CDE) || \
365: (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || \
366: (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || \
367: (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || \
368: (((c) >= 0x0D12) && ((c) <= 0x0D28)) || \
369: (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || \
370: (((c) >= 0x0D60) && ((c) <= 0x0D61)) || \
371: (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || \
372: ((c) == 0x0E30) || \
373: (((c) >= 0x0E32) && ((c) <= 0x0E33)) || \
374: (((c) >= 0x0E40) && ((c) <= 0x0E45)) || \
375: (((c) >= 0x0E81) && ((c) <= 0x0E82)) || \
376: ((c) == 0x0E84) || \
377: (((c) >= 0x0E87) && ((c) <= 0x0E88)) || \
378: ((c) == 0x0E8A) || \
379: ((c) == 0x0E8D) || \
380: (((c) >= 0x0E94) && ((c) <= 0x0E97)) || \
381: (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || \
382: (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || \
383: ((c) == 0x0EA5) || \
384: ((c) == 0x0EA7) || \
385: (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || \
386: (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || \
387: ((c) == 0x0EB0) || \
388: (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || \
389: ((c) == 0x0EBD) || \
390: (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || \
391: (((c) >= 0x0F40) && ((c) <= 0x0F47)) || \
392: (((c) >= 0x0F49) && ((c) <= 0x0F69)) || \
393: (((c) >= 0x10A0) && ((c) <= 0x10C5)) || \
394: (((c) >= 0x10D0) && ((c) <= 0x10F6)) || \
395: ((c) == 0x1100) || \
396: (((c) >= 0x1102) && ((c) <= 0x1103)) || \
397: (((c) >= 0x1105) && ((c) <= 0x1107)) || \
398: ((c) == 0x1109) || \
399: (((c) >= 0x110B) && ((c) <= 0x110C)) || \
400: (((c) >= 0x110E) && ((c) <= 0x1112)) || \
401: ((c) == 0x113C) || \
402: ((c) == 0x113E) || \
403: ((c) == 0x1140) || \
404: ((c) == 0x114C) || \
405: ((c) == 0x114E) || \
406: ((c) == 0x1150) || \
407: (((c) >= 0x1154) && ((c) <= 0x1155)) || \
408: ((c) == 0x1159) || \
409: (((c) >= 0x115F) && ((c) <= 0x1161)) || \
410: ((c) == 0x1163) || \
411: ((c) == 0x1165) || \
412: ((c) == 0x1167) || \
413: ((c) == 0x1169) || \
414: (((c) >= 0x116D) && ((c) <= 0x116E)) || \
415: (((c) >= 0x1172) && ((c) <= 0x1173)) || \
416: ((c) == 0x1175) || \
417: ((c) == 0x119E) || \
418: ((c) == 0x11A8) || \
419: ((c) == 0x11AB) || \
420: (((c) >= 0x11AE) && ((c) <= 0x11AF)) || \
421: (((c) >= 0x11B7) && ((c) <= 0x11B8)) || \
422: ((c) == 0x11BA) || \
423: (((c) >= 0x11BC) && ((c) <= 0x11C2)) || \
424: ((c) == 0x11EB) || \
425: ((c) == 0x11F0) || \
426: ((c) == 0x11F9) || \
427: (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || \
428: (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || \
429: (((c) >= 0x1F00) && ((c) <= 0x1F15)) || \
430: (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || \
431: (((c) >= 0x1F20) && ((c) <= 0x1F45)) || \
432: (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || \
433: (((c) >= 0x1F50) && ((c) <= 0x1F57)) || \
434: ((c) == 0x1F59) || \
435: ((c) == 0x1F5B) || \
436: ((c) == 0x1F5D) || \
437: (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || \
438: (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || \
439: (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || \
440: ((c) == 0x1FBE) || \
441: (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || \
442: (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || \
443: (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || \
444: (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || \
445: (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || \
446: (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || \
447: (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || \
448: ((c) == 0x2126) || \
449: (((c) >= 0x212A) && ((c) <= 0x212B)) || \
450: ((c) == 0x212E) || \
451: (((c) >= 0x2180) && ((c) <= 0x2182)) || \
452: (((c) >= 0x3041) && ((c) <= 0x3094)) || \
453: (((c) >= 0x30A1) && ((c) <= 0x30FA)) || \
454: (((c) >= 0x3105) && ((c) <= 0x312C)) || \
455: (((c) >= 0xAC00) && ((c) <= 0xD7A3)))
1.1 veillard 456:
1.22 daniel 457: /*
458: * [88] Digit ::= ... long list see REC ...
459: */
1.30 daniel 460: #define IS_DIGIT(c) \
461: ((((c) >= 0x0030) && ((c) <= 0x0039)) || \
462: (((c) >= 0x0660) && ((c) <= 0x0669)) || \
463: (((c) >= 0x06F0) && ((c) <= 0x06F9)) || \
464: (((c) >= 0x0966) && ((c) <= 0x096F)) || \
465: (((c) >= 0x09E6) && ((c) <= 0x09EF)) || \
466: (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || \
467: (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || \
468: (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || \
469: (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || \
470: (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || \
471: (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || \
472: (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || \
473: (((c) >= 0x0E50) && ((c) <= 0x0E59)) || \
474: (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || \
475: (((c) >= 0x0F20) && ((c) <= 0x0F29)))
1.1 veillard 476:
1.22 daniel 477: /*
478: * [87] CombiningChar ::= ... long list see REC ...
479: */
1.30 daniel 480: #define IS_COMBINING(c) \
481: ((((c) >= 0x0300) && ((c) <= 0x0345)) || \
482: (((c) >= 0x0360) && ((c) <= 0x0361)) || \
483: (((c) >= 0x0483) && ((c) <= 0x0486)) || \
484: (((c) >= 0x0591) && ((c) <= 0x05A1)) || \
485: (((c) >= 0x05A3) && ((c) <= 0x05B9)) || \
486: (((c) >= 0x05BB) && ((c) <= 0x05BD)) || \
487: ((c) == 0x05BF) || \
488: (((c) >= 0x05C1) && ((c) <= 0x05C2)) || \
489: ((c) == 0x05C4) || \
490: (((c) >= 0x064B) && ((c) <= 0x0652)) || \
491: ((c) == 0x0670) || \
492: (((c) >= 0x06D6) && ((c) <= 0x06DC)) || \
493: (((c) >= 0x06DD) && ((c) <= 0x06DF)) || \
494: (((c) >= 0x06E0) && ((c) <= 0x06E4)) || \
495: (((c) >= 0x06E7) && ((c) <= 0x06E8)) || \
496: (((c) >= 0x06EA) && ((c) <= 0x06ED)) || \
497: (((c) >= 0x0901) && ((c) <= 0x0903)) || \
498: ((c) == 0x093C) || \
499: (((c) >= 0x093E) && ((c) <= 0x094C)) || \
500: ((c) == 0x094D) || \
501: (((c) >= 0x0951) && ((c) <= 0x0954)) || \
502: (((c) >= 0x0962) && ((c) <= 0x0963)) || \
503: (((c) >= 0x0981) && ((c) <= 0x0983)) || \
504: ((c) == 0x09BC) || \
505: ((c) == 0x09BE) || \
506: ((c) == 0x09BF) || \
507: (((c) >= 0x09C0) && ((c) <= 0x09C4)) || \
508: (((c) >= 0x09C7) && ((c) <= 0x09C8)) || \
509: (((c) >= 0x09CB) && ((c) <= 0x09CD)) || \
510: ((c) == 0x09D7) || \
511: (((c) >= 0x09E2) && ((c) <= 0x09E3)) || \
512: ((c) == 0x0A02) || \
513: ((c) == 0x0A3C) || \
514: ((c) == 0x0A3E) || \
515: ((c) == 0x0A3F) || \
516: (((c) >= 0x0A40) && ((c) <= 0x0A42)) || \
517: (((c) >= 0x0A47) && ((c) <= 0x0A48)) || \
518: (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || \
519: (((c) >= 0x0A70) && ((c) <= 0x0A71)) || \
520: (((c) >= 0x0A81) && ((c) <= 0x0A83)) || \
521: ((c) == 0x0ABC) || \
522: (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || \
523: (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || \
524: (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || \
525: (((c) >= 0x0B01) && ((c) <= 0x0B03)) || \
526: ((c) == 0x0B3C) || \
527: (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || \
528: (((c) >= 0x0B47) && ((c) <= 0x0B48)) || \
529: (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || \
530: (((c) >= 0x0B56) && ((c) <= 0x0B57)) || \
531: (((c) >= 0x0B82) && ((c) <= 0x0B83)) || \
532: (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || \
533: (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || \
534: (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || \
535: ((c) == 0x0BD7) || \
536: (((c) >= 0x0C01) && ((c) <= 0x0C03)) || \
537: (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || \
538: (((c) >= 0x0C46) && ((c) <= 0x0C48)) || \
539: (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || \
540: (((c) >= 0x0C55) && ((c) <= 0x0C56)) || \
541: (((c) >= 0x0C82) && ((c) <= 0x0C83)) || \
542: (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || \
543: (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || \
544: (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || \
545: (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || \
546: (((c) >= 0x0D02) && ((c) <= 0x0D03)) || \
547: (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || \
548: (((c) >= 0x0D46) && ((c) <= 0x0D48)) || \
549: (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || \
550: ((c) == 0x0D57) || \
551: ((c) == 0x0E31) || \
552: (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || \
553: (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || \
554: ((c) == 0x0EB1) || \
555: (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || \
556: (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || \
557: (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || \
558: (((c) >= 0x0F18) && ((c) <= 0x0F19)) || \
559: ((c) == 0x0F35) || \
560: ((c) == 0x0F37) || \
561: ((c) == 0x0F39) || \
562: ((c) == 0x0F3E) || \
563: ((c) == 0x0F3F) || \
564: (((c) >= 0x0F71) && ((c) <= 0x0F84)) || \
565: (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || \
566: (((c) >= 0x0F90) && ((c) <= 0x0F95)) || \
567: ((c) == 0x0F97) || \
568: (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || \
569: (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || \
570: ((c) == 0x0FB9) || \
571: (((c) >= 0x20D0) && ((c) <= 0x20DC)) || \
572: ((c) == 0x20E1) || \
573: (((c) >= 0x302A) && ((c) <= 0x302F)) || \
574: ((c) == 0x3099) || \
575: ((c) == 0x309A))
1.3 veillard 576:
1.22 daniel 577: /*
578: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
579: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
580: * [#x309D-#x309E] | [#x30FC-#x30FE]
581: */
1.3 veillard 582: #define IS_EXTENDER(c) \
583: (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
584: ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
585: ((c) == 0xec6) || ((c) == 0x3005) \
586: (((c) >= 0x3031) && ((c) <= 0x3035)) || \
587: (((c) >= 0x309b) && ((c) <= 0x309e)) || \
1.22 daniel 588: (((c) >= 0x30fc) && ((c) <= 0x30fe)))
1.3 veillard 589:
1.22 daniel 590: /*
591: * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
592: */
1.1 veillard 593: #define IS_IDEOGRAPHIC(c) \
594: ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
595: (((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
596: (((c) >= 0x3021) && ((c) <= 0x3029)) || \
597: ((c) == 0x3007))
598:
1.22 daniel 599: /*
600: * [84] Letter ::= BaseChar | Ideographic
601: */
1.1 veillard 602: #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
603:
604: #else
1.55 daniel 605: #ifndef USE_UTF_8
1.30 daniel 606: /************************************************************************
607: * *
1.55 daniel 608: * 8bits / ISO-Latin version of the macros. *
1.30 daniel 609: * *
610: ************************************************************************/
1.1 veillard 611: /*
1.22 daniel 612: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
613: * | [#x10000-#x10FFFF]
614: * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
1.1 veillard 615: */
616: #define IS_CHAR(c) \
1.59 daniel 617: ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
618: (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
619: (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
1.1 veillard 620:
1.22 daniel 621: /*
622: * [85] BaseChar ::= ... long list see REC ...
623: */
1.1 veillard 624: #define IS_BASECHAR(c) \
625: ((((c) >= 0x41) && ((c) <= 0x5a)) || \
626: (((c) >= 0x61) && ((c) <= 0x7a)) || \
627: (((c) >= 0xaa) && ((c) <= 0x5b)) || \
628: (((c) >= 0xc0) && ((c) <= 0xd6)) || \
629: (((c) >= 0xd8) && ((c) <= 0xf6)) || \
630: (((c) >= 0xf8) && ((c) <= 0xff)) || \
631: ((c) == 0xba))
632:
1.22 daniel 633: /*
634: * [88] Digit ::= ... long list see REC ...
635: */
1.1 veillard 636: #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
637:
1.22 daniel 638: /*
639: * [84] Letter ::= BaseChar | Ideographic
640: */
1.1 veillard 641: #define IS_LETTER(c) IS_BASECHAR(c)
642:
1.22 daniel 643:
644: /*
645: * [87] CombiningChar ::= ... long list see REC ...
646: */
1.1 veillard 647: #define IS_COMBINING(c) 0
648:
1.22 daniel 649: /*
650: * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
651: * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
652: * [#x309D-#x309E] | [#x30FC-#x30FE]
653: */
1.3 veillard 654: #define IS_EXTENDER(c) ((c) == 0xb7)
655:
1.55 daniel 656: #else /* USE_UTF_8 */
657: /************************************************************************
658: * *
659: * 8bits / UTF-8 version of the macros. *
660: * *
661: ************************************************************************/
662:
663: TODO !!!
664: #endif /* USE_UTF_8 */
1.21 daniel 665: #endif /* !UNICODE */
1.1 veillard 666:
1.22 daniel 667: /*
668: * Blank chars.
669: *
670: * [3] S ::= (#x20 | #x9 | #xD | #xA)+
671: */
672: #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) || \
673: ((c) == 0x0D))
674:
675: /*
676: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
677: */
1.21 daniel 678: #define IS_PUBIDCHAR(c) \
679: (((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || \
680: (((c) >= 'a') && ((c) <= 'z')) || \
681: (((c) >= 'A') && ((c) <= 'Z')) || \
682: (((c) >= '0') && ((c) <= '9')) || \
683: ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || \
684: ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || \
685: ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || \
686: ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || \
687: ((c) == '$') || ((c) == '_') || ((c) == '%'))
1.1 veillard 688:
689: #define SKIP_EOL(p) \
690: if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
691: if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
692:
693: #define MOVETO_ENDTAG(p) \
1.39 daniel 694: while (IS_CHAR(*p) && (*(p) != '>')) (p)++
1.1 veillard 695:
696: #define MOVETO_STARTTAG(p) \
1.39 daniel 697: while (IS_CHAR(*p) && (*(p) != '<')) (p)++
1.1 veillard 698:
1.28 daniel 699: /************************************************************************
700: * *
701: * Commodity functions to handle CHARs *
702: * *
703: ************************************************************************/
704:
1.50 daniel 705: /**
706: * xmlStrndup:
707: * @cur: the input CHAR *
708: * @len: the len of @cur
709: *
710: * a strndup for array of CHAR's
711: * return values: a new CHAR * or NULL
1.1 veillard 712: */
713:
1.55 daniel 714: CHAR *
715: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 716: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
717:
718: if (ret == NULL) {
719: fprintf(stderr, "malloc of %d byte failed\n",
720: (len + 1) * sizeof(CHAR));
721: return(NULL);
722: }
723: memcpy(ret, cur, len * sizeof(CHAR));
724: ret[len] = 0;
725: return(ret);
726: }
727:
1.50 daniel 728: /**
729: * xmlStrdup:
730: * @cur: the input CHAR *
731: *
732: * a strdup for array of CHAR's
733: * return values: a new CHAR * or NULL
1.1 veillard 734: */
735:
1.55 daniel 736: CHAR *
737: xmlStrdup(const CHAR *cur) {
1.6 httpng 738: const CHAR *p = cur;
1.1 veillard 739:
740: while (IS_CHAR(*p)) p++;
741: return(xmlStrndup(cur, p - cur));
742: }
743:
1.50 daniel 744: /**
745: * xmlCharStrndup:
746: * @cur: the input char *
747: * @len: the len of @cur
748: *
749: * a strndup for char's to CHAR's
750: * return values: a new CHAR * or NULL
1.45 daniel 751: */
752:
1.55 daniel 753: CHAR *
754: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 755: int i;
756: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
757:
758: if (ret == NULL) {
759: fprintf(stderr, "malloc of %d byte failed\n",
760: (len + 1) * sizeof(CHAR));
761: return(NULL);
762: }
763: for (i = 0;i < len;i++)
764: ret[i] = (CHAR) cur[i];
765: ret[len] = 0;
766: return(ret);
767: }
768:
1.50 daniel 769: /**
770: * xmlCharStrdup:
771: * @cur: the input char *
772: * @len: the len of @cur
773: *
774: * a strdup for char's to CHAR's
775: * return values: a new CHAR * or NULL
1.45 daniel 776: */
777:
1.55 daniel 778: CHAR *
779: xmlCharStrdup(const char *cur) {
1.45 daniel 780: const char *p = cur;
781:
782: while (*p != '\0') p++;
783: return(xmlCharStrndup(cur, p - cur));
784: }
785:
1.50 daniel 786: /**
787: * xmlStrcmp:
788: * @str1: the first CHAR *
789: * @str2: the second CHAR *
790: *
791: * a strcmp for CHAR's
792: * return values: the integer result of the comparison
1.14 veillard 793: */
794:
1.55 daniel 795: int
796: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 797: register int tmp;
798:
799: do {
800: tmp = *str1++ - *str2++;
801: if (tmp != 0) return(tmp);
802: } while ((*str1 != 0) && (*str2 != 0));
803: return (*str1 - *str2);
804: }
805:
1.50 daniel 806: /**
807: * xmlStrncmp:
808: * @str1: the first CHAR *
809: * @str2: the second CHAR *
810: * @len: the max comparison length
811: *
812: * a strncmp for CHAR's
813: * return values: the integer result of the comparison
1.14 veillard 814: */
815:
1.55 daniel 816: int
817: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 818: register int tmp;
819:
820: if (len <= 0) return(0);
821: do {
822: tmp = *str1++ - *str2++;
823: if (tmp != 0) return(tmp);
824: len--;
825: if (len <= 0) return(0);
826: } while ((*str1 != 0) && (*str2 != 0));
827: return (*str1 - *str2);
828: }
829:
1.50 daniel 830: /**
831: * xmlStrchr:
832: * @str: the CHAR * array
833: * @val: the CHAR to search
834: *
835: * a strchr for CHAR's
836: * return values: the CHAR * for the first occurence or NULL.
1.14 veillard 837: */
838:
1.55 daniel 839: CHAR *
840: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 841: while (*str != 0) {
842: if (*str == val) return((CHAR *) str);
843: str++;
844: }
845: return(NULL);
846: }
1.28 daniel 847:
1.50 daniel 848: /**
849: * xmlStrlen:
850: * @str: the CHAR * array
851: *
852: * lenght of a CHAR's string
853: * return values: the number of CHAR contained in the ARRAY.
1.45 daniel 854: */
855:
1.55 daniel 856: int
857: xmlStrlen(const CHAR *str) {
1.45 daniel 858: int len = 0;
859:
860: if (str == NULL) return(0);
861: while (*str != 0) {
862: str++;
863: len++;
864: }
865: return(len);
866: }
867:
1.50 daniel 868: /**
869: * xmlStrncat:
870: * @first: the original CHAR * array
871: * @add: the CHAR * array added
872: * @len: the length of @add
873: *
874: * a strncat for array of CHAR's
875: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 876: */
877:
1.55 daniel 878: CHAR *
879: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 880: int size;
881: CHAR *ret;
882:
883: if ((add == NULL) || (len == 0))
884: return(cur);
885: if (cur == NULL)
886: return(xmlStrndup(add, len));
887:
888: size = xmlStrlen(cur);
889: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
890: if (ret == NULL) {
891: fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
892: (size + len + 1) * sizeof(CHAR));
893: return(cur);
894: }
895: memcpy(&ret[size], add, len * sizeof(CHAR));
896: ret[size + len] = 0;
897: return(ret);
898: }
899:
1.50 daniel 900: /**
901: * xmlStrcat:
902: * @first: the original CHAR * array
903: * @add: the CHAR * array added
904: *
905: * a strcat for array of CHAR's
906: * return values: a new CHAR * containing the concatenated string.
1.45 daniel 907: */
908:
1.55 daniel 909: CHAR *
910: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 911: const CHAR *p = add;
912:
913: if (add == NULL) return(cur);
914: if (cur == NULL)
915: return(xmlStrdup(add));
916:
917: while (IS_CHAR(*p)) p++;
918: return(xmlStrncat(cur, add, p - add));
919: }
920:
921: /************************************************************************
922: * *
923: * Commodity functions, cleanup needed ? *
924: * *
925: ************************************************************************/
926:
1.50 daniel 927: /**
928: * areBlanks:
929: * @ctxt: an XML parser context
930: * @str: a CHAR *
931: * @len: the size of @str
932: *
1.45 daniel 933: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 934: *
935: * TODO: to be corrected accodingly to DTD information if available
936: * return values: 1 if ignorable 0 otherwise.
1.45 daniel 937: */
938:
939: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
940: int i;
941: xmlNodePtr lastChild;
942:
943: for (i = 0;i < len;i++)
944: if (!(IS_BLANK(str[i]))) return(0);
945:
946: if (CUR != '<') return(0);
947: lastChild = xmlGetLastChild(ctxt->node);
948: if (lastChild == NULL) {
949: if (ctxt->node->content != NULL) return(0);
950: } else if (xmlNodeIsText(lastChild))
951: return(0);
952: return(1);
953: }
954:
1.50 daniel 955: /**
956: * xmlHandleEntity:
957: * @ctxt: an XML parser context
958: * @entity: an XML entity pointer.
959: *
960: * Default handling of defined entities, when should we define a new input
1.45 daniel 961: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 962: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 963: */
964:
1.55 daniel 965: void
966: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 967: int len;
1.50 daniel 968: xmlParserInputPtr input;
1.45 daniel 969:
970: if (entity->content == NULL) {
1.55 daniel 971: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
972: ctxt->sax->error(ctxt, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 973: entity->name);
1.59 daniel 974: ctxt->wellFormed = 0;
1.45 daniel 975: return;
976: }
977: len = xmlStrlen(entity->content);
978: if (len <= 2) goto handle_as_char;
979:
980: /*
981: * Redefine its content as an input stream.
982: */
1.50 daniel 983: input = xmlNewEntityInputStream(ctxt, entity);
984: xmlPushInput(ctxt, input);
1.45 daniel 985: return;
986:
987: handle_as_char:
988: /*
989: * Just handle the content as a set of chars.
990: */
991: if (ctxt->sax != NULL)
992: ctxt->sax->characters(ctxt, entity->content, 0, len);
993:
994: }
995:
996: /*
997: * Forward definition for recusive behaviour.
998: */
999: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
1.50 daniel 1000: CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt);
1001: CHAR *xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1002:
1.28 daniel 1003: /************************************************************************
1004: * *
1005: * Extra stuff for namespace support *
1006: * Relates to http://www.w3.org/TR/WD-xml-names *
1007: * *
1008: ************************************************************************/
1009:
1.50 daniel 1010: /**
1011: * xmlNamespaceParseNCName:
1012: * @ctxt: an XML parser context
1013: *
1014: * parse an XML namespace name.
1.28 daniel 1015: *
1016: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1017: *
1018: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1019: * CombiningChar | Extender
1.50 daniel 1020: * return values: the namespace name or NULL
1.28 daniel 1021: */
1022:
1.55 daniel 1023: CHAR *
1024: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.28 daniel 1025: const CHAR *q;
1026: CHAR *ret = NULL;
1027:
1.40 daniel 1028: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1029: q = NEXT;
1.28 daniel 1030:
1.40 daniel 1031: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1032: (CUR == '.') || (CUR == '-') ||
1033: (CUR == '_') ||
1034: (IS_COMBINING(CUR)) ||
1035: (IS_EXTENDER(CUR)))
1036: NEXT;
1.28 daniel 1037:
1.40 daniel 1038: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 1039:
1040: return(ret);
1041: }
1042:
1.50 daniel 1043: /**
1044: * xmlNamespaceParseQName:
1045: * @ctxt: an XML parser context
1046: * @prefix: a CHAR **
1047: *
1048: * parse an XML qualified name
1.28 daniel 1049: *
1050: * [NS 5] QName ::= (Prefix ':')? LocalPart
1051: *
1052: * [NS 6] Prefix ::= NCName
1053: *
1054: * [NS 7] LocalPart ::= NCName
1.50 daniel 1055: * return values: the function returns the local part, and prefix is updated
1056: * to get the Prefix if any.
1.28 daniel 1057: */
1058:
1.55 daniel 1059: CHAR *
1060: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1061: CHAR *ret = NULL;
1062:
1063: *prefix = NULL;
1064: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1065: if (CUR == ':') {
1.28 daniel 1066: *prefix = ret;
1.40 daniel 1067: NEXT;
1.28 daniel 1068: ret = xmlNamespaceParseNCName(ctxt);
1069: }
1070:
1071: return(ret);
1072: }
1073:
1.50 daniel 1074: /**
1075: * xmlNamespaceParseNSDef:
1076: * @ctxt: an XML parser context
1077: *
1078: * parse a namespace prefix declaration
1.28 daniel 1079: *
1080: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1081: *
1082: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.50 daniel 1083: * return values: the namespace name
1.28 daniel 1084: */
1085:
1.55 daniel 1086: CHAR *
1087: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1088: CHAR *name = NULL;
1089:
1.40 daniel 1090: if ((CUR == 'x') && (NXT(1) == 'm') &&
1091: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1092: (NXT(4) == 's')) {
1093: SKIP(5);
1094: if (CUR == ':') {
1095: NEXT;
1.28 daniel 1096: name = xmlNamespaceParseNCName(ctxt);
1097: }
1098: }
1.39 daniel 1099: return(name);
1.28 daniel 1100: }
1101:
1.50 daniel 1102: /**
1103: * xmlParseQuotedString:
1104: * @ctxt: an XML parser context
1105: *
1.45 daniel 1106: * [OLD] Parse and return a string between quotes or doublequotes
1.50 daniel 1107: * return values: the string parser or NULL.
1.45 daniel 1108: */
1.55 daniel 1109: CHAR *
1110: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1111: CHAR *ret = NULL;
1112: const CHAR *q;
1113:
1114: if (CUR == '"') {
1115: NEXT;
1116: q = CUR_PTR;
1117: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1118: if (CUR != '"') {
1119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1120: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 daniel 1121: ctxt->wellFormed = 0;
1.55 daniel 1122: } else {
1.45 daniel 1123: ret = xmlStrndup(q, CUR_PTR - q);
1124: NEXT;
1125: }
1126: } else if (CUR == '\''){
1127: NEXT;
1128: q = CUR_PTR;
1129: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1130: if (CUR != '\'') {
1131: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 1132: ctxt->sax->error(ctxt, "String not closed \"%.50s\"\n", q);
1.59 daniel 1133: ctxt->wellFormed = 0;
1.55 daniel 1134: } else {
1.45 daniel 1135: ret = xmlStrndup(q, CUR_PTR - q);
1136: NEXT;
1137: }
1138: }
1139: return(ret);
1140: }
1141:
1.50 daniel 1142: /**
1143: * xmlParseNamespace:
1144: * @ctxt: an XML parser context
1145: *
1.45 daniel 1146: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1147: *
1148: * This is what the older xml-name Working Draft specified, a bunch of
1149: * other stuff may still rely on it, so support is still here as
1150: * if ot was declared on the root of the Tree:-(
1151: */
1152:
1.55 daniel 1153: void
1154: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1155: CHAR *href = NULL;
1156: CHAR *prefix = NULL;
1157: int garbage = 0;
1158:
1159: /*
1160: * We just skipped "namespace" or "xml:namespace"
1161: */
1162: SKIP_BLANKS;
1163:
1164: while (IS_CHAR(CUR) && (CUR != '>')) {
1165: /*
1166: * We can have "ns" or "prefix" attributes
1167: * Old encoding as 'href' or 'AS' attributes is still supported
1168: */
1169: if ((CUR == 'n') && (NXT(1) == 's')) {
1170: garbage = 0;
1171: SKIP(2);
1172: SKIP_BLANKS;
1173:
1174: if (CUR != '=') continue;
1175: NEXT;
1176: SKIP_BLANKS;
1177:
1178: href = xmlParseQuotedString(ctxt);
1179: SKIP_BLANKS;
1180: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1181: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1182: garbage = 0;
1183: SKIP(4);
1184: SKIP_BLANKS;
1185:
1186: if (CUR != '=') continue;
1187: NEXT;
1188: SKIP_BLANKS;
1189:
1190: href = xmlParseQuotedString(ctxt);
1191: SKIP_BLANKS;
1192: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1193: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1194: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1195: garbage = 0;
1196: SKIP(6);
1197: SKIP_BLANKS;
1198:
1199: if (CUR != '=') continue;
1200: NEXT;
1201: SKIP_BLANKS;
1202:
1203: prefix = xmlParseQuotedString(ctxt);
1204: SKIP_BLANKS;
1205: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1206: garbage = 0;
1207: SKIP(2);
1208: SKIP_BLANKS;
1209:
1210: if (CUR != '=') continue;
1211: NEXT;
1212: SKIP_BLANKS;
1213:
1214: prefix = xmlParseQuotedString(ctxt);
1215: SKIP_BLANKS;
1216: } else if ((CUR == '?') && (NXT(1) == '>')) {
1217: garbage = 0;
1218: CUR_PTR ++;
1219: } else {
1220: /*
1221: * Found garbage when parsing the namespace
1222: */
1223: if (!garbage)
1.55 daniel 1224: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1225: ctxt->sax->error(ctxt, "xmlParseNamespace found garbage\n");
1.59 daniel 1226: ctxt->wellFormed = 0;
1.45 daniel 1227: NEXT;
1228: }
1229: }
1230:
1231: MOVETO_ENDTAG(CUR_PTR);
1232: NEXT;
1233:
1234: /*
1235: * Register the DTD.
1236: */
1237: if (href != NULL)
1238: xmlNewGlobalNs(ctxt->doc, href, prefix);
1239:
1240: if (prefix != NULL) free(prefix);
1241: if (href != NULL) free(href);
1242: }
1243:
1.28 daniel 1244: /************************************************************************
1245: * *
1246: * The parser itself *
1247: * Relates to http://www.w3.org/TR/REC-xml *
1248: * *
1249: ************************************************************************/
1.14 veillard 1250:
1.50 daniel 1251: /**
1252: * xmlParseName:
1253: * @ctxt: an XML parser context
1254: *
1255: * parse an XML name.
1.22 daniel 1256: *
1257: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1258: * CombiningChar | Extender
1259: *
1260: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1261: *
1262: * [6] Names ::= Name (S Name)*
1.50 daniel 1263: * return values: the Name parsed or NULL
1.1 veillard 1264: */
1265:
1.55 daniel 1266: CHAR *
1267: xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1268: const CHAR *q;
1269: CHAR *ret = NULL;
1.1 veillard 1270:
1.40 daniel 1271: if (!IS_LETTER(CUR) && (CUR != '_') &&
1272: (CUR != ':')) return(NULL);
1273: q = NEXT;
1274:
1275: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1276: (CUR == '.') || (CUR == '-') ||
1277: (CUR == '_') || (CUR == ':') ||
1278: (IS_COMBINING(CUR)) ||
1279: (IS_EXTENDER(CUR)))
1280: NEXT;
1.22 daniel 1281:
1.40 daniel 1282: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1283:
1284: return(ret);
1285: }
1286:
1.50 daniel 1287: /**
1288: * xmlParseNmtoken:
1289: * @ctxt: an XML parser context
1290: *
1291: * parse an XML Nmtoken.
1.22 daniel 1292: *
1293: * [7] Nmtoken ::= (NameChar)+
1294: *
1295: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.50 daniel 1296: * return values: the Nmtoken parsed or NULL
1.22 daniel 1297: */
1298:
1.55 daniel 1299: CHAR *
1300: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.22 daniel 1301: const CHAR *q;
1302: CHAR *ret = NULL;
1303:
1.40 daniel 1304: q = NEXT;
1.22 daniel 1305:
1.40 daniel 1306: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1307: (CUR == '.') || (CUR == '-') ||
1308: (CUR == '_') || (CUR == ':') ||
1309: (IS_COMBINING(CUR)) ||
1310: (IS_EXTENDER(CUR)))
1311: NEXT;
1.3 veillard 1312:
1.40 daniel 1313: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1314:
1.3 veillard 1315: return(ret);
1.1 veillard 1316: }
1317:
1.50 daniel 1318: /**
1319: * xmlParseEntityValue:
1320: * @ctxt: an XML parser context
1321: *
1322: * parse a value for ENTITY decl.
1.24 daniel 1323: *
1324: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1325: * "'" ([^%&'] | PEReference | Reference)* "'"
1.50 daniel 1326: * return values: the EntityValue parsed or NULL
1.24 daniel 1327: */
1328:
1.55 daniel 1329: CHAR *
1330: xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1331: CHAR *ret = NULL, *cur;
1.24 daniel 1332: const CHAR *q;
1333:
1.40 daniel 1334: if (CUR == '"') {
1335: NEXT;
1.24 daniel 1336:
1.40 daniel 1337: q = CUR_PTR;
1338: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1339: if (CUR == '%') {
1.46 daniel 1340: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1341: cur = xmlParsePEReference(ctxt);
1.46 daniel 1342: ret = xmlStrcat(ret, cur);
1343: q = CUR_PTR;
1.40 daniel 1344: } else if (CUR == '&') {
1.46 daniel 1345: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1346: cur = xmlParseReference(ctxt);
1347: if (cur != NULL) {
1348: CHAR buf[2];
1349: buf[0] = '&';
1350: buf[1] = 0;
1351: ret = xmlStrncat(ret, buf, 1);
1352: ret = xmlStrcat(ret, cur);
1353: buf[0] = ';';
1354: buf[1] = 0;
1355: ret = xmlStrncat(ret, buf, 1);
1356: }
1.46 daniel 1357: q = CUR_PTR;
1.24 daniel 1358: } else
1.40 daniel 1359: NEXT;
1.24 daniel 1360: }
1.40 daniel 1361: if (!IS_CHAR(CUR)) {
1.55 daniel 1362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 daniel 1364: ctxt->wellFormed = 0;
1.24 daniel 1365: } else {
1.46 daniel 1366: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1367: NEXT;
1.24 daniel 1368: }
1.40 daniel 1369: } else if (CUR == '\'') {
1370: NEXT;
1371: q = CUR_PTR;
1372: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1373: if (CUR == '%') {
1.46 daniel 1374: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1375: cur = xmlParsePEReference(ctxt);
1.46 daniel 1376: ret = xmlStrcat(ret, cur);
1377: q = CUR_PTR;
1.40 daniel 1378: } else if (CUR == '&') {
1.46 daniel 1379: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1380: cur = xmlParseReference(ctxt);
1381: if (cur != NULL) {
1382: CHAR buf[2];
1383: buf[0] = '&';
1384: buf[1] = 0;
1385: ret = xmlStrncat(ret, buf, 1);
1386: ret = xmlStrcat(ret, cur);
1387: buf[0] = ';';
1388: buf[1] = 0;
1389: ret = xmlStrncat(ret, buf, 1);
1390: }
1.46 daniel 1391: q = CUR_PTR;
1.24 daniel 1392: } else
1.40 daniel 1393: NEXT;
1.24 daniel 1394: }
1.40 daniel 1395: if (!IS_CHAR(CUR)) {
1.55 daniel 1396: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1397: ctxt->sax->error(ctxt, "Unfinished EntityValue\n");
1.59 daniel 1398: ctxt->wellFormed = 0;
1.24 daniel 1399: } else {
1.46 daniel 1400: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1401: NEXT;
1.24 daniel 1402: }
1403: } else {
1.55 daniel 1404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1405: ctxt->sax->error(ctxt, "xmlParseEntityValue \" or ' expected\n");
1.59 daniel 1406: ctxt->wellFormed = 0;
1.24 daniel 1407: }
1408:
1409: return(ret);
1410: }
1411:
1.50 daniel 1412: /**
1413: * xmlParseAttValue:
1414: * @ctxt: an XML parser context
1415: *
1416: * parse a value for an attribute
1.29 daniel 1417: *
1418: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1419: * "'" ([^<&'] | Reference)* "'"
1.50 daniel 1420: * return values: the AttValue parsed or NULL.
1.29 daniel 1421: */
1422:
1.55 daniel 1423: CHAR *
1424: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.46 daniel 1425: CHAR *ret = NULL, *cur;
1.29 daniel 1426: const CHAR *q;
1427:
1.40 daniel 1428: if (CUR == '"') {
1429: NEXT;
1.29 daniel 1430:
1.40 daniel 1431: q = CUR_PTR;
1432: while ((IS_CHAR(CUR)) && (CUR != '"')) {
1.59 daniel 1433: if (CUR == '<') {
1434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1435: ctxt->sax->error(ctxt,
1436: "Unescaped '<' not allowed in attributes values\n");
1437: ctxt->wellFormed = 0;
1438: }
1.40 daniel 1439: if (CUR == '&') {
1.46 daniel 1440: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1441: cur = xmlParseReference(ctxt);
1442: if (cur != NULL) {
1443: /*
1444: * Special case for '&', we don't want to
1445: * resolve it here since it will break later
1446: * when searching entities in the string.
1447: */
1448: if ((cur[0] == '&') && (cur[1] == 0)) {
1449: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1450: ret = xmlStrncat(ret, buf, 5);
1451: } else
1452: ret = xmlStrcat(ret, cur);
1453: free(cur);
1454: }
1.46 daniel 1455: q = CUR_PTR;
1.29 daniel 1456: } else
1.40 daniel 1457: NEXT;
1.50 daniel 1458: /*
1459: * Pop out finished entity references.
1460: */
1461: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1462: if (CUR_PTR != q)
1463: ret = xmlStrncat(ret, q, CUR_PTR - q);
1464: xmlPopInput(ctxt);
1465: q = CUR_PTR;
1466: }
1.29 daniel 1467: }
1.40 daniel 1468: if (!IS_CHAR(CUR)) {
1.55 daniel 1469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1470: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 daniel 1471: ctxt->wellFormed = 0;
1.29 daniel 1472: } else {
1.46 daniel 1473: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1474: NEXT;
1.29 daniel 1475: }
1.40 daniel 1476: } else if (CUR == '\'') {
1477: NEXT;
1478: q = CUR_PTR;
1479: while ((IS_CHAR(CUR)) && (CUR != '\'')) {
1.59 daniel 1480: if (CUR == '<') {
1481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1482: ctxt->sax->error(ctxt,
1483: "Unescaped '<' not allowed in attributes values\n");
1484: ctxt->wellFormed = 0;
1485: }
1.40 daniel 1486: if (CUR == '&') {
1.46 daniel 1487: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.50 daniel 1488: cur = xmlParseReference(ctxt);
1489: if (cur != NULL) {
1490: /*
1491: * Special case for '&', we don't want to
1492: * resolve it here since it will break later
1493: * when searching entities in the string.
1494: */
1495: if ((cur[0] == '&') && (cur[1] == 0)) {
1496: CHAR buf[6] = { '&', 'a', 'm', 'p', ';', 0 };
1497: ret = xmlStrncat(ret, buf, 5);
1498: } else
1499: ret = xmlStrcat(ret, cur);
1500: free(cur);
1501: }
1.46 daniel 1502: q = CUR_PTR;
1.29 daniel 1503: } else
1.40 daniel 1504: NEXT;
1.50 daniel 1505: /*
1506: * Pop out finished entity references.
1507: */
1508: while ((CUR == 0) && (ctxt->inputNr > 1)) {
1509: if (CUR_PTR != q)
1510: ret = xmlStrncat(ret, q, CUR_PTR - q);
1511: xmlPopInput(ctxt);
1512: q = CUR_PTR;
1513: }
1.29 daniel 1514: }
1.40 daniel 1515: if (!IS_CHAR(CUR)) {
1.55 daniel 1516: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1517: ctxt->sax->error(ctxt, "Unfinished AttValue\n");
1.59 daniel 1518: ctxt->wellFormed = 0;
1.29 daniel 1519: } else {
1.46 daniel 1520: ret = xmlStrncat(ret, q, CUR_PTR - q);
1.40 daniel 1521: NEXT;
1.29 daniel 1522: }
1523: } else {
1.55 daniel 1524: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1525: ctxt->sax->error(ctxt, "AttValue: \" or ' expected\n");
1.59 daniel 1526: ctxt->wellFormed = 0;
1.29 daniel 1527: }
1528:
1529: return(ret);
1530: }
1531:
1.50 daniel 1532: /**
1533: * xmlParseSystemLiteral:
1534: * @ctxt: an XML parser context
1535: *
1536: * parse an XML Literal
1.21 daniel 1537: *
1.22 daniel 1538: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.50 daniel 1539: * return values: the SystemLiteral parsed or NULL
1.21 daniel 1540: */
1541:
1.55 daniel 1542: CHAR *
1543: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1544: const CHAR *q;
1545: CHAR *ret = NULL;
1546:
1.40 daniel 1547: if (CUR == '"') {
1548: NEXT;
1549: q = CUR_PTR;
1550: while ((IS_CHAR(CUR)) && (CUR != '"'))
1551: NEXT;
1552: if (!IS_CHAR(CUR)) {
1.55 daniel 1553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1554: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 daniel 1555: ctxt->wellFormed = 0;
1.21 daniel 1556: } else {
1.40 daniel 1557: ret = xmlStrndup(q, CUR_PTR - q);
1558: NEXT;
1.21 daniel 1559: }
1.40 daniel 1560: } else if (CUR == '\'') {
1561: NEXT;
1562: q = CUR_PTR;
1563: while ((IS_CHAR(CUR)) && (CUR != '\''))
1564: NEXT;
1565: if (!IS_CHAR(CUR)) {
1.55 daniel 1566: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1567: ctxt->sax->error(ctxt, "Unfinished SystemLiteral\n");
1.59 daniel 1568: ctxt->wellFormed = 0;
1.21 daniel 1569: } else {
1.40 daniel 1570: ret = xmlStrndup(q, CUR_PTR - q);
1571: NEXT;
1.21 daniel 1572: }
1573: } else {
1.55 daniel 1574: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1575: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 daniel 1576: ctxt->wellFormed = 0;
1.21 daniel 1577: }
1578:
1579: return(ret);
1580: }
1581:
1.50 daniel 1582: /**
1583: * xmlParsePubidLiteral:
1584: * @ctxt: an XML parser context
1.21 daniel 1585: *
1.50 daniel 1586: * parse an XML public literal
1587: * return values: the PubidLiteral parsed or NULL.
1.21 daniel 1588: */
1589:
1.55 daniel 1590: CHAR *
1591: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1592: const CHAR *q;
1593: CHAR *ret = NULL;
1594: /*
1595: * Name ::= (Letter | '_') (NameChar)*
1596: */
1.40 daniel 1597: if (CUR == '"') {
1598: NEXT;
1599: q = CUR_PTR;
1600: while (IS_PUBIDCHAR(CUR)) NEXT;
1601: if (CUR != '"') {
1.55 daniel 1602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 daniel 1604: ctxt->wellFormed = 0;
1.21 daniel 1605: } else {
1.40 daniel 1606: ret = xmlStrndup(q, CUR_PTR - q);
1607: NEXT;
1.21 daniel 1608: }
1.40 daniel 1609: } else if (CUR == '\'') {
1610: NEXT;
1611: q = CUR_PTR;
1612: while ((IS_LETTER(CUR)) && (CUR != '\''))
1613: NEXT;
1614: if (!IS_LETTER(CUR)) {
1.55 daniel 1615: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1616: ctxt->sax->error(ctxt, "Unfinished PubidLiteral\n");
1.59 daniel 1617: ctxt->wellFormed = 0;
1.21 daniel 1618: } else {
1.40 daniel 1619: ret = xmlStrndup(q, CUR_PTR - q);
1620: NEXT;
1.21 daniel 1621: }
1622: } else {
1.55 daniel 1623: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1624: ctxt->sax->error(ctxt, "SystemLiteral \" or ' expected\n");
1.59 daniel 1625: ctxt->wellFormed = 0;
1.21 daniel 1626: }
1627:
1628: return(ret);
1629: }
1630:
1.50 daniel 1631: /**
1632: * xmlParseCharData:
1633: * @ctxt: an XML parser context
1634: * @cdata: int indicating whether we are within a CDATA section
1635: *
1636: * parse a CharData section.
1637: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1638: *
1639: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1.50 daniel 1640: * return values:
1.27 daniel 1641: */
1642:
1.55 daniel 1643: void
1644: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1645: const CHAR *q;
1646:
1.40 daniel 1647: q = CUR_PTR;
1648: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1649: (CUR != '&')) {
1.59 daniel 1650: if ((CUR == ']') && (NXT(1) == ']') &&
1651: (NXT(2) == '>')) {
1652: if (cdata) break;
1653: else {
1654: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655: ctxt->sax->error(ctxt,
1656: "Sequence ']]>' not allowed in content\n");
1657: ctxt->wellFormed = 0;
1658: }
1659: }
1.40 daniel 1660: NEXT;
1.27 daniel 1661: }
1.45 daniel 1662: if (q == CUR_PTR) return;
1663:
1664: /*
1665: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1666: */
1667: if (ctxt->sax != NULL) {
1668: if (areBlanks(ctxt, q, CUR_PTR - q))
1669: ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
1670: else
1671: ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
1672: }
1.27 daniel 1673: }
1674:
1.50 daniel 1675: /**
1676: * xmlParseExternalID:
1677: * @ctxt: an XML parser context
1678: * @publicID: a CHAR** receiving PubidLiteral
1679: *
1680: * Parse an External ID
1.22 daniel 1681: *
1682: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1683: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.50 daniel 1684: * return values: the function returns SystemLiteral and in the second
1685: * case publicID receives PubidLiteral
1.22 daniel 1686: */
1687:
1.55 daniel 1688: CHAR *
1689: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID) {
1.39 daniel 1690: CHAR *URI = NULL;
1.22 daniel 1691:
1.40 daniel 1692: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1693: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1694: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1695: SKIP(6);
1.59 daniel 1696: if (!IS_BLANK(CUR)) {
1697: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1698: ctxt->sax->error(ctxt,
1699: "Space required after 'SYSTEM'\n");
1700: ctxt->wellFormed = 0;
1701: }
1.42 daniel 1702: SKIP_BLANKS;
1.39 daniel 1703: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1704: if (URI == NULL) {
1.55 daniel 1705: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1706: ctxt->sax->error(ctxt,
1.39 daniel 1707: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 1708: ctxt->wellFormed = 0;
1709: }
1.40 daniel 1710: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1711: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1712: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1713: SKIP(6);
1.59 daniel 1714: if (!IS_BLANK(CUR)) {
1715: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1716: ctxt->sax->error(ctxt,
1717: "Space required after 'PUBLIC'\n");
1718: ctxt->wellFormed = 0;
1719: }
1.42 daniel 1720: SKIP_BLANKS;
1.39 daniel 1721: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 1722: if (*publicID == NULL) {
1.55 daniel 1723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1724: ctxt->sax->error(ctxt,
1.39 daniel 1725: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 1726: ctxt->wellFormed = 0;
1727: }
1728: if (!IS_BLANK(CUR)) {
1729: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1730: ctxt->sax->error(ctxt,
1731: "Space required after the Public Identifier\n");
1732: ctxt->wellFormed = 0;
1733: }
1.42 daniel 1734: SKIP_BLANKS;
1.39 daniel 1735: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1736: if (URI == NULL) {
1.55 daniel 1737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1738: ctxt->sax->error(ctxt,
1.39 daniel 1739: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 1740: ctxt->wellFormed = 0;
1741: }
1.22 daniel 1742: }
1.39 daniel 1743: return(URI);
1.22 daniel 1744: }
1745:
1.50 daniel 1746: /**
1747: * xmlParseComment:
1748: * @create: should we create a node
1749: *
1.3 veillard 1750: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1751: * This may or may not create a node (depending on the context)
1.38 daniel 1752: * The spec says that "For compatibility, the string "--" (double-hyphen)
1753: * must not occur within comments. "
1.22 daniel 1754: *
1755: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.50 daniel 1756: *
1757: * TODO: this should call a SAX function which will handle (or not) the
1758: * creation of the comment !
1759: * return values:
1.3 veillard 1760: */
1.31 daniel 1761: xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1762: xmlNodePtr ret = NULL;
1.17 daniel 1763: const CHAR *q, *start;
1764: const CHAR *r;
1.39 daniel 1765: CHAR *val;
1.3 veillard 1766:
1767: /*
1.22 daniel 1768: * Check that there is a comment right here.
1.3 veillard 1769: */
1.40 daniel 1770: if ((CUR != '<') || (NXT(1) != '!') ||
1771: (NXT(2) != '-') || (NXT(3) != '-')) return(NULL);
1.3 veillard 1772:
1.40 daniel 1773: SKIP(4);
1774: start = q = CUR_PTR;
1775: NEXT;
1776: r = CUR_PTR;
1777: NEXT;
1778: while (IS_CHAR(CUR) &&
1779: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1780: (*r != '-') || (*q != '-'))) {
1.59 daniel 1781: if ((*r == '-') && (*q == '-')) {
1.55 daniel 1782: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1783: ctxt->sax->error(ctxt,
1.38 daniel 1784: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 1785: ctxt->wellFormed = 0;
1786: }
1.40 daniel 1787: NEXT;r++;q++;
1.3 veillard 1788: }
1.40 daniel 1789: if (!IS_CHAR(CUR)) {
1.55 daniel 1790: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791: ctxt->sax->error(ctxt, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 1792: ctxt->wellFormed = 0;
1.3 veillard 1793: } else {
1.40 daniel 1794: NEXT;
1.31 daniel 1795: if (create) {
1.39 daniel 1796: val = xmlStrndup(start, q - start);
1.50 daniel 1797: ret = xmlNewDocComment(ctxt->doc, val);
1.39 daniel 1798: free(val);
1.31 daniel 1799: }
1.3 veillard 1800: }
1.39 daniel 1801: return(ret);
1.3 veillard 1802: }
1803:
1.50 daniel 1804: /**
1805: * xmlParsePITarget:
1806: * @ctxt: an XML parser context
1807: *
1808: * parse the name of a PI
1.22 daniel 1809: *
1810: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.50 daniel 1811: * return values: the PITarget name or NULL
1.22 daniel 1812: */
1813:
1.55 daniel 1814: CHAR *
1815: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 1816: CHAR *name;
1817:
1818: name = xmlParseName(ctxt);
1819: if ((name != NULL) && (name[3] == 0) &&
1820: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1821: ((name[1] == 'm') || (name[1] == 'M')) &&
1822: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 1823: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1824: ctxt->sax->error(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1825: return(NULL);
1826: }
1827: return(name);
1828: }
1829:
1.50 daniel 1830: /**
1831: * xmlParsePI:
1832: * @ctxt: an XML parser context
1833: *
1834: * parse an XML Processing Instruction.
1.22 daniel 1835: *
1836: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.50 daniel 1837: * return values: the PI name or NULL
1.3 veillard 1838: */
1839:
1.55 daniel 1840: void
1841: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1842: CHAR *target;
1843:
1.40 daniel 1844: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1845: /*
1846: * this is a Processing Instruction.
1847: */
1.40 daniel 1848: SKIP(2);
1.3 veillard 1849:
1850: /*
1.22 daniel 1851: * Parse the target name and check for special support like
1852: * namespace.
1853: *
1854: * TODO : PI handling should be dynamically redefinable using an
1855: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1856: */
1.22 daniel 1857: target = xmlParsePITarget(ctxt);
1858: if (target != NULL) {
1859: /*
1.44 daniel 1860: * Support for the old Processing Instruction related to namespace.
1.22 daniel 1861: */
1862: if ((target[0] == 'n') && (target[1] == 'a') &&
1863: (target[2] == 'm') && (target[3] == 'e') &&
1864: (target[4] == 's') && (target[5] == 'p') &&
1865: (target[6] == 'a') && (target[7] == 'c') &&
1866: (target[8] == 'e')) {
1867: xmlParseNamespace(ctxt);
1868: } else if ((target[0] == 'x') && (target[1] == 'm') &&
1869: (target[2] == 'l') && (target[3] == ':') &&
1870: (target[4] == 'n') && (target[5] == 'a') &&
1871: (target[6] == 'm') && (target[7] == 'e') &&
1872: (target[8] == 's') && (target[9] == 'p') &&
1873: (target[10] == 'a') && (target[11] == 'c') &&
1874: (target[12] == 'e')) {
1875: xmlParseNamespace(ctxt);
1876: } else {
1.44 daniel 1877: const CHAR *q = CUR_PTR;
1878:
1.40 daniel 1879: while (IS_CHAR(CUR) &&
1880: ((CUR != '?') || (NXT(1) != '>')))
1881: NEXT;
1882: if (!IS_CHAR(CUR)) {
1.55 daniel 1883: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 1884: ctxt->sax->error(ctxt,
1885: "xmlParsePI: PI %s never end ...\n", target);
1886: ctxt->wellFormed = 0;
1.44 daniel 1887: } else {
1888: CHAR *data;
1889:
1890: data = xmlStrndup(CUR_PTR, CUR_PTR - q);
1.40 daniel 1891: SKIP(2);
1.44 daniel 1892:
1893: /*
1894: * SAX: PI detected.
1895: */
1896: if (ctxt->sax)
1897: ctxt->sax->processingInstruction(ctxt, target, data);
1898: /*
1899: * Unknown PI, ignore it !
1900: */
1901: else
1902: xmlParserWarning(ctxt,
1903: "xmlParsePI : skipping unknown PI %s\n",
1904: target);
1905: free(data);
1906: }
1.22 daniel 1907: }
1.39 daniel 1908: free(target);
1.3 veillard 1909: } else {
1.55 daniel 1910: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1911: ctxt->sax->error(ctxt, "xmlParsePI : no target name\n");
1.59 daniel 1912: ctxt->wellFormed = 0;
1913:
1.22 daniel 1914: /********* Should we try to complete parsing the PI ???
1.40 daniel 1915: while (IS_CHAR(CUR) &&
1916: (CUR != '?') && (CUR != '>'))
1917: NEXT;
1918: if (!IS_CHAR(CUR)) {
1.22 daniel 1919: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1920: target);
1921: }
1922: ********************************************************/
1923: }
1924: }
1925: }
1926:
1.50 daniel 1927: /**
1928: * xmlParseNotationDecl:
1929: * @ctxt: an XML parser context
1930: *
1931: * parse a notation declaration
1.22 daniel 1932: *
1933: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1934: *
1935: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1936: *
1937: * NOTE: Actually [75] and [83] interract badly since [75] can generate
1938: * 'PUBLIC' S PubidLiteral S SystemLiteral
1939: *
1940: * Hence there is actually 3 choices:
1941: * 'PUBLIC' S PubidLiteral
1942: * 'PUBLIC' S PubidLiteral S SystemLiteral
1943: * and 'SYSTEM' S SystemLiteral
1.50 daniel 1944: *
1945: * TODO: no handling of the values parsed !
1.22 daniel 1946: */
1947:
1.55 daniel 1948: void
1949: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 1950: CHAR *name;
1951:
1.40 daniel 1952: if ((CUR == '<') && (NXT(1) == '!') &&
1953: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1954: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1955: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1956: (NXT(8) == 'O') && (NXT(9) == 'N') &&
1957: (IS_BLANK(NXT(10)))) {
1958: SKIP(10);
1.42 daniel 1959: SKIP_BLANKS;
1.22 daniel 1960:
1961: name = xmlParseName(ctxt);
1962: if (name == NULL) {
1.55 daniel 1963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964: ctxt->sax->error(ctxt,
1.31 daniel 1965: "xmlParseAttributeListDecl: no name for Element\n");
1.59 daniel 1966: ctxt->wellFormed = 0;
1.22 daniel 1967: return;
1968: }
1.42 daniel 1969: SKIP_BLANKS;
1.22 daniel 1970: /*
1.31 daniel 1971: * TODO !!!
1.22 daniel 1972: */
1.40 daniel 1973: while ((IS_CHAR(CUR)) && (CUR != '>'))
1974: NEXT;
1.22 daniel 1975: free(name);
1976: }
1977: }
1978:
1.50 daniel 1979: /**
1980: * xmlParseEntityDecl:
1981: * @ctxt: an XML parser context
1982: *
1983: * parse <!ENTITY declarations
1.22 daniel 1984: *
1985: * [70] EntityDecl ::= GEDecl | PEDecl
1986: *
1987: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1988: *
1989: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1990: *
1991: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1992: *
1993: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 1994: *
1995: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 1996: */
1997:
1.55 daniel 1998: void
1999: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2000: CHAR *name = NULL;
1.24 daniel 2001: CHAR *value = NULL;
1.39 daniel 2002: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2003: CHAR *ndata = NULL;
1.39 daniel 2004: int isParameter = 0;
1.22 daniel 2005:
1.40 daniel 2006: if ((CUR == '<') && (NXT(1) == '!') &&
2007: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2008: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2009: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.40 daniel 2010: SKIP(8);
1.59 daniel 2011: if (!IS_BLANK(CUR)) {
2012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2013: ctxt->sax->error(ctxt, "Space required after '<!ENTITY'\n");
2014: ctxt->wellFormed = 0;
2015: }
2016: SKIP_BLANKS;
1.40 daniel 2017:
2018: if (CUR == '%') {
2019: NEXT;
1.59 daniel 2020: if (!IS_BLANK(CUR)) {
2021: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2022: ctxt->sax->error(ctxt, "Space required after '%'\n");
2023: ctxt->wellFormed = 0;
2024: }
1.42 daniel 2025: SKIP_BLANKS;
1.39 daniel 2026: isParameter = 1;
1.22 daniel 2027: }
2028:
2029: name = xmlParseName(ctxt);
1.24 daniel 2030: if (name == NULL) {
1.55 daniel 2031: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2032: ctxt->sax->error(ctxt, "xmlParseEntityDecl: no name\n");
1.59 daniel 2033: ctxt->wellFormed = 0;
1.24 daniel 2034: return;
2035: }
1.59 daniel 2036: if (!IS_BLANK(CUR)) {
2037: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2038: ctxt->sax->error(ctxt,
2039: "Space required after the entity name\n");
2040: ctxt->wellFormed = 0;
2041: }
1.42 daniel 2042: SKIP_BLANKS;
1.24 daniel 2043:
1.22 daniel 2044: /*
1.24 daniel 2045: * TODO handle the various case of definitions...
1.22 daniel 2046: */
1.39 daniel 2047: if (isParameter) {
1.40 daniel 2048: if ((CUR == '"') || (CUR == '\''))
1.24 daniel 2049: value = xmlParseEntityValue(ctxt);
1.39 daniel 2050: if (value) {
2051: xmlAddDocEntity(ctxt->doc, name,
2052: XML_INTERNAL_PARAMETER_ENTITY,
2053: NULL, NULL, value);
2054: }
1.24 daniel 2055: else {
1.39 daniel 2056: URI = xmlParseExternalID(ctxt, &literal);
2057: if (URI) {
2058: xmlAddDocEntity(ctxt->doc, name,
2059: XML_EXTERNAL_PARAMETER_ENTITY,
2060: literal, URI, NULL);
2061: }
1.24 daniel 2062: }
2063: } else {
1.40 daniel 2064: if ((CUR == '"') || (CUR == '\'')) {
1.24 daniel 2065: value = xmlParseEntityValue(ctxt);
1.39 daniel 2066: xmlAddDocEntity(ctxt->doc, name,
2067: XML_INTERNAL_GENERAL_ENTITY,
2068: NULL, NULL, value);
2069: } else {
2070: URI = xmlParseExternalID(ctxt, &literal);
1.59 daniel 2071: if ((CUR != '>') && (!IS_BLANK(CUR))) {
2072: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2073: ctxt->sax->error(ctxt,
2074: "Space required before 'NDATA'\n");
2075: ctxt->wellFormed = 0;
2076: }
1.42 daniel 2077: SKIP_BLANKS;
1.40 daniel 2078: if ((CUR == 'N') && (NXT(1) == 'D') &&
2079: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2080: (NXT(4) == 'A')) {
2081: SKIP(5);
1.59 daniel 2082: if (!IS_BLANK(CUR)) {
2083: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2084: ctxt->sax->error(ctxt,
2085: "Space required after 'NDATA'\n");
2086: ctxt->wellFormed = 0;
2087: }
1.42 daniel 2088: SKIP_BLANKS;
1.24 daniel 2089: ndata = xmlParseName(ctxt);
1.39 daniel 2090: xmlAddDocEntity(ctxt->doc, name,
2091: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2092: literal, URI, ndata);
2093: } else {
2094: xmlAddDocEntity(ctxt->doc, name,
2095: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2096: literal, URI, NULL);
1.24 daniel 2097: }
2098: }
2099: }
1.42 daniel 2100: SKIP_BLANKS;
1.40 daniel 2101: if (CUR != '>') {
1.55 daniel 2102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2103: ctxt->sax->error(ctxt,
1.31 daniel 2104: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 2105: ctxt->wellFormed = 0;
1.24 daniel 2106: } else
1.40 daniel 2107: NEXT;
1.39 daniel 2108: if (name != NULL) free(name);
2109: if (value != NULL) free(value);
2110: if (URI != NULL) free(URI);
2111: if (literal != NULL) free(literal);
2112: if (ndata != NULL) free(ndata);
1.22 daniel 2113: }
2114: }
2115:
1.50 daniel 2116: /**
1.59 daniel 2117: * xmlParseDefaultDecl:
2118: * @ctxt: an XML parser context
2119: * @value: Receive a possible fixed default value for the attribute
2120: *
2121: * Parse an attribute default declaration
2122: *
2123: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2124: *
2125: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
2126: * or XML_ATTRIBUTE_FIXED.
2127: */
2128:
2129: int
2130: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
2131: int val;
2132: CHAR *ret;
2133:
2134: *value = NULL;
2135: if ((CUR == '#') && (NXT(1) == 'R') &&
2136: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
2137: (NXT(4) == 'U') && (NXT(5) == 'I') &&
2138: (NXT(6) == 'R') && (NXT(7) == 'E') &&
2139: (NXT(8) == 'D')) {
2140: SKIP(9);
2141: return(XML_ATTRIBUTE_REQUIRED);
2142: }
2143: if ((CUR == '#') && (NXT(1) == 'I') &&
2144: (NXT(2) == 'M') && (NXT(3) == 'P') &&
2145: (NXT(4) == 'L') && (NXT(5) == 'I') &&
2146: (NXT(6) == 'E') && (NXT(7) == 'D')) {
2147: SKIP(8);
2148: return(XML_ATTRIBUTE_IMPLIED);
2149: }
2150: val = XML_ATTRIBUTE_NONE;
2151: if ((CUR == '#') && (NXT(1) == 'F') &&
2152: (NXT(2) == 'I') && (NXT(3) == 'X') &&
2153: (NXT(4) == 'E') && (NXT(5) == 'D')) {
2154: SKIP(6);
2155: val = XML_ATTRIBUTE_FIXED;
2156: if (!IS_BLANK(CUR)) {
2157: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2158: ctxt->sax->error(ctxt, "Space required after '#FIXED'\n");
2159: ctxt->wellFormed = 0;
2160: }
2161: SKIP_BLANKS;
2162: }
2163: ret = xmlParseAttValue(ctxt);
2164: if (ret == NULL) {
2165: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2166: ctxt->sax->error(ctxt,
2167: "Attribute default value declaration error\n");
2168: ctxt->wellFormed = 0;
2169: } else
2170: *value = ret;
2171: return(val);
2172: }
2173:
2174: /**
1.50 daniel 2175: * xmlParseEnumeratedType:
2176: * @ctxt: an XML parser context
2177: * @name: ???
2178: * @:
2179: *
2180: * parse and Enumerated attribute type.
1.22 daniel 2181: *
2182: * [57] EnumeratedType ::= NotationType | Enumeration
2183: *
2184: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2185: *
2186: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
1.50 daniel 2187: *
2188: * TODO: not implemented !!!
1.22 daniel 2189: */
2190:
1.55 daniel 2191: void
2192: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.22 daniel 2193: /*
2194: * TODO !!!
2195: */
1.59 daniel 2196: fprintf(stderr, "Production [57] EnumeratedType not yet supported\n");
1.40 daniel 2197: while ((IS_CHAR(CUR)) && (CUR != '>'))
2198: NEXT;
1.22 daniel 2199: }
2200:
1.50 daniel 2201: /**
2202: * xmlParseAttributeType:
2203: * @ctxt: an XML parser context
2204: * @name: ???
2205: *
1.59 daniel 2206: * parse the Attribute list def for an element
1.22 daniel 2207: *
2208: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2209: *
2210: * [55] StringType ::= 'CDATA'
2211: *
2212: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2213: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 2214: *
1.59 daniel 2215: * Returns: the attribute type
1.22 daniel 2216: */
1.59 daniel 2217: int
1.55 daniel 2218: xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
1.45 daniel 2219: /* TODO !!! */
1.40 daniel 2220: if ((CUR == 'C') && (NXT(1) == 'D') &&
2221: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2222: (NXT(4) == 'A')) {
2223: SKIP(5);
1.59 daniel 2224: return(XML_ATTRIBUTE_STRING);
1.40 daniel 2225: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2226: SKIP(2);
1.59 daniel 2227: return(XML_ATTRIBUTE_ID);
1.40 daniel 2228: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2229: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2230: (NXT(4) == 'F')) {
2231: SKIP(5);
1.59 daniel 2232: return(XML_ATTRIBUTE_IDREF);
1.40 daniel 2233: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2234: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2235: (NXT(4) == 'F') && (NXT(5) == 'S')) {
2236: SKIP(6);
1.59 daniel 2237: return(XML_ATTRIBUTE_IDREFS);
1.40 daniel 2238: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2239: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2240: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2241: SKIP(6);
1.59 daniel 2242: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 2243: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2244: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2245: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2246: (NXT(6) == 'E') && (NXT(7) == 'S')) {
2247: SKIP(8);
1.59 daniel 2248: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 2249: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2250: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2251: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2252: (NXT(6) == 'N')) {
2253: SKIP(7);
1.59 daniel 2254: return(XML_ATTRIBUTE_NMTOKEN);
1.40 daniel 2255: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2256: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2257: (NXT(4) == 'K') && (NXT(5) == 'E') &&
2258: (NXT(6) == 'N') && (NXT(7) == 'S')) {
1.59 daniel 2259: return(XML_ATTRIBUTE_NMTOKENS);
1.22 daniel 2260: }
1.59 daniel 2261: xmlParseEnumeratedType(ctxt, name);
2262: return(XML_ATTRIBUTE_ENUMERATED);
1.22 daniel 2263: }
2264:
1.50 daniel 2265: /**
2266: * xmlParseAttributeListDecl:
2267: * @ctxt: an XML parser context
2268: *
2269: * : parse the Attribute list def for an element
1.22 daniel 2270: *
2271: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2272: *
2273: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 2274: *
2275: * TODO: not implemented !!!
1.22 daniel 2276: */
1.55 daniel 2277: void
2278: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 2279: CHAR *elemName;
2280: CHAR *attrName;
1.22 daniel 2281:
1.45 daniel 2282: /* TODO !!! */
1.40 daniel 2283: if ((CUR == '<') && (NXT(1) == '!') &&
2284: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2285: (NXT(4) == 'T') && (NXT(5) == 'L') &&
2286: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 2287: (NXT(8) == 'T')) {
1.40 daniel 2288: SKIP(9);
1.59 daniel 2289: if (!IS_BLANK(CUR)) {
2290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2291: ctxt->sax->error(ctxt, "Space required after '<!ATTLIST'\n");
2292: ctxt->wellFormed = 0;
2293: }
1.42 daniel 2294: SKIP_BLANKS;
1.59 daniel 2295: elemName = xmlParseName(ctxt);
2296: if (elemName == NULL) {
1.55 daniel 2297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2298: ctxt->sax->error(ctxt, "ATTLIST: no name for Element\n");
2299: ctxt->wellFormed = 0;
1.22 daniel 2300: return;
2301: }
1.42 daniel 2302: SKIP_BLANKS;
1.40 daniel 2303: while (CUR != '>') {
2304: const CHAR *check = CUR_PTR;
1.59 daniel 2305: int type;
2306: int def;
2307: CHAR *defaultValue = NULL;
2308:
2309: attrName = xmlParseName(ctxt);
2310: if (attrName == NULL) {
2311: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2312: ctxt->sax->error(ctxt, "ATTLIST: no name for Attribute\n");
2313: ctxt->wellFormed = 0;
2314: break;
2315: }
2316: if (!IS_BLANK(CUR)) {
2317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2318: ctxt->sax->error(ctxt,
2319: "Space required after the attribute name\n");
2320: ctxt->wellFormed = 0;
2321: break;
2322: }
2323: SKIP_BLANKS;
2324:
2325: type = xmlParseAttributeType(ctxt, attrName);
2326: if (type <= 0) break;
1.22 daniel 2327:
1.59 daniel 2328: if (!IS_BLANK(CUR)) {
2329: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2330: ctxt->sax->error(ctxt,
2331: "Space required after the attribute type\n");
2332: ctxt->wellFormed = 0;
2333: break;
2334: }
1.42 daniel 2335: SKIP_BLANKS;
1.59 daniel 2336:
2337: def = xmlParseDefaultDecl(ctxt, &defaultValue);
2338: if (def <= 0) break;
2339:
2340: if (CUR != '>') {
2341: if (!IS_BLANK(CUR)) {
2342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2343: ctxt->sax->error(ctxt,
2344: "Space required after the attribute default value\n");
2345: ctxt->wellFormed = 0;
2346: break;
2347: }
2348: SKIP_BLANKS;
2349: }
1.40 daniel 2350: if (check == CUR_PTR) {
1.55 daniel 2351: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2352: ctxt->sax->error(ctxt,
1.59 daniel 2353: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 2354: break;
2355: }
1.59 daniel 2356: if (attrName != NULL)
2357: free(attrName);
2358: if (defaultValue != NULL)
2359: free(defaultValue);
1.22 daniel 2360: }
1.40 daniel 2361: if (CUR == '>')
2362: NEXT;
1.22 daniel 2363:
1.59 daniel 2364: free(elemName);
1.22 daniel 2365: }
2366: }
2367:
1.50 daniel 2368: /**
1.61 daniel 2369: * xmlParseElementMixedContentDecl:
2370: * @ctxt: an XML parser context
2371: *
2372: * parse the declaration for a Mixed Element content
2373: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
2374: *
2375: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2376: * '(' S? '#PCDATA' S? ')'
2377: *
2378: * returns: the list of the xmlElementContentPtr describing the element choices
2379: */
2380: xmlElementContentPtr
1.62 daniel 2381: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.61 daniel 2382: xmlElementContentPtr ret = NULL, cur = NULL;
2383: CHAR *elem = NULL;
2384:
2385: if ((CUR == '#') && (NXT(1) == 'P') &&
2386: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2387: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2388: (NXT(6) == 'A')) {
2389: SKIP(7);
2390: SKIP_BLANKS;
1.63 ! daniel 2391: if (CUR == ')') {
! 2392: NEXT;
! 2393: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
! 2394: return(ret);
! 2395: }
1.61 daniel 2396: if ((CUR == '(') || (CUR == '|')) {
2397: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2398: if (ret == NULL) return(NULL);
1.63 ! daniel 2399: } /********** else {
1.61 daniel 2400: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2401: ctxt->sax->error(ctxt,
2402: "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2403: ctxt->wellFormed = 0;
2404: return(NULL);
1.63 ! daniel 2405: } **********/
1.61 daniel 2406: while (CUR == '|') {
2407: if (elem == NULL) {
2408: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2409: if (ret == NULL) return(NULL);
2410: ret->c1 = cur;
2411: } else {
2412: cur->c1 = xmlNewElementContent(elem,
2413: XML_ELEMENT_CONTENT_ELEMENT);
2414: cur->c2 = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2415: cur = cur->c2;
2416: if (cur == NULL) return(NULL);
2417: }
2418: SKIP_BLANKS;
2419: elem = xmlParseName(ctxt);
2420: if (elem == NULL) {
2421: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2422: ctxt->sax->error(ctxt,
2423: "xmlParseElementMixedContentDecl : Name expected\n");
2424: ctxt->wellFormed = 0;
2425: xmlFreeElementContent(cur);
2426: return(NULL);
2427: }
2428: SKIP_BLANKS;
2429: }
1.63 ! daniel 2430: if ((CUR == ')') && (NXT(1) == '*')) {
1.61 daniel 2431: if (elem != NULL)
2432: cur->c2 = xmlNewElementContent(elem,
2433: XML_ELEMENT_CONTENT_ELEMENT);
2434: NEXT;
2435: } else {
2436: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2437: ctxt->sax->error(ctxt,
1.63 ! daniel 2438: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 2439: ctxt->wellFormed = 0;
2440: xmlFreeElementContent(ret);
2441: return(NULL);
2442: }
2443:
2444: } else {
2445: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2446: ctxt->sax->error(ctxt,
2447: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
2448: ctxt->wellFormed = 0;
2449: }
2450: return(ret);
2451: }
2452:
2453: /**
2454: * xmlParseElementChildrenContentDecl:
1.50 daniel 2455: * @ctxt: an XML parser context
2456: *
1.61 daniel 2457: * parse the declaration for a Mixed Element content
2458: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 2459: *
1.61 daniel 2460: *
1.22 daniel 2461: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2462: *
2463: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2464: *
2465: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2466: *
2467: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2468: *
1.62 daniel 2469: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 2470: * hierarchy.
2471: */
2472: xmlElementContentPtr
1.62 daniel 2473: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 ! daniel 2474: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 2475: CHAR *elem;
2476: CHAR type = 0;
2477:
2478: SKIP_BLANKS;
2479: if (CUR == '(') {
1.63 ! daniel 2480: /* Recurse on first child */
1.62 daniel 2481: NEXT;
2482: SKIP_BLANKS;
2483: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
2484: SKIP_BLANKS;
2485: } else {
2486: elem = xmlParseName(ctxt);
2487: if (elem == NULL) {
2488: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2489: ctxt->sax->error(ctxt,
2490: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2491: ctxt->wellFormed = 0;
2492: return(NULL);
2493: }
2494: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2495: if (CUR == '?') {
2496: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2497: NEXT;
2498: } else if (CUR == '*') {
2499: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2500: NEXT;
2501: } else if (CUR == '+') {
2502: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2503: NEXT;
2504: } else {
2505: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2506: }
2507: }
2508: SKIP_BLANKS;
2509: while (CUR != ')') {
1.63 ! daniel 2510: /*
! 2511: * Each loop we parse one separator and one element.
! 2512: */
1.62 daniel 2513: if (CUR == ',') {
2514: if (type == 0) type = CUR;
2515:
2516: /*
2517: * Detect "Name | Name , Name" error
2518: */
2519: else if (type != CUR) {
2520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2521: ctxt->sax->error(ctxt,
2522: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2523: type);
2524: ctxt->wellFormed = 0;
2525: xmlFreeElementContent(ret);
2526: return(NULL);
2527: }
2528:
1.63 ! daniel 2529: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
! 2530: if (op == NULL) {
! 2531: xmlFreeElementContent(ret);
! 2532: return(NULL);
! 2533: }
! 2534: if (last == NULL) {
! 2535: op->c1 = ret;
! 2536: ret = cur = op;
! 2537: } else {
! 2538: cur->c2 = op;
! 2539: op->c1 = last;
! 2540: cur =op;
! 2541: }
1.62 daniel 2542: } else if (CUR == '|') {
2543: if (type == 0) type = CUR;
2544:
2545: /*
1.63 ! daniel 2546: * Detect "Name , Name | Name" error
1.62 daniel 2547: */
2548: else if (type != CUR) {
2549: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2550: ctxt->sax->error(ctxt,
2551: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2552: type);
2553: ctxt->wellFormed = 0;
2554: xmlFreeElementContent(ret);
2555: return(NULL);
2556: }
2557:
1.63 ! daniel 2558: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
! 2559: if (op == NULL) {
! 2560: xmlFreeElementContent(ret);
! 2561: return(NULL);
! 2562: }
! 2563: if (last == NULL) {
! 2564: op->c1 = ret;
! 2565: ret = cur = op;
! 2566: } else {
! 2567: cur->c2 = op;
! 2568: op->c1 = last;
! 2569: cur =op;
! 2570: }
1.62 daniel 2571: } else {
2572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2573: ctxt->sax->error(ctxt,
2574: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
2575: ctxt->wellFormed = 0;
2576: xmlFreeElementContent(ret);
2577: return(NULL);
2578: }
2579: SKIP_BLANKS;
2580: if (CUR == '(') {
1.63 ! daniel 2581: /* Recurse on second child */
1.62 daniel 2582: NEXT;
2583: SKIP_BLANKS;
2584: cur = xmlParseElementChildrenContentDecl(ctxt);
2585: SKIP_BLANKS;
2586: } else {
2587: elem = xmlParseName(ctxt);
2588: if (elem == NULL) {
2589: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2590: ctxt->sax->error(ctxt,
2591: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2592: ctxt->wellFormed = 0;
2593: return(NULL);
2594: }
2595: cur = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2596: }
1.63 ! daniel 2597: if (CUR == '?') {
! 2598: ret->ocur = XML_ELEMENT_CONTENT_OPT;
! 2599: NEXT;
! 2600: } else if (CUR == '*') {
! 2601: ret->ocur = XML_ELEMENT_CONTENT_MULT;
! 2602: NEXT;
! 2603: } else if (CUR == '+') {
! 2604: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
! 2605: NEXT;
! 2606: } else {
! 2607: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
! 2608: }
! 2609: SKIP_BLANKS;
1.62 daniel 2610: }
2611: NEXT;
2612: if (CUR == '?') {
2613: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2614: NEXT;
2615: } else if (CUR == '*') {
2616: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2617: NEXT;
2618: } else if (CUR == '+') {
2619: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2620: NEXT;
2621: } else {
2622: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2623: }
2624: return(ret);
1.61 daniel 2625: }
2626:
2627: /**
2628: * xmlParseElementContentDecl:
2629: * @ctxt: an XML parser context
2630: * @name: the name of the element being defined.
2631: * @result: the Element Content pointer will be stored here if any
1.22 daniel 2632: *
1.61 daniel 2633: * parse the declaration for an Element content either Mixed or Children,
2634: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
2635: *
2636: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 2637: *
1.61 daniel 2638: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 2639: */
2640:
1.61 daniel 2641: int
2642: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
2643: xmlElementContentPtr *result) {
2644:
2645: xmlElementContentPtr tree = NULL;
2646: int res;
2647:
2648: *result = NULL;
2649:
2650: if (CUR != '(') {
2651: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2652: ctxt->sax->error(ctxt,
2653: "xmlParseElementContentDecl : '(' expected\n");
2654: ctxt->wellFormed = 0;
2655: return(-1);
2656: }
2657: NEXT;
2658: SKIP_BLANKS;
2659: if ((CUR == '#') && (NXT(1) == 'P') &&
2660: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2661: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2662: (NXT(6) == 'A')) {
1.62 daniel 2663: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 2664: res = XML_ELEMENT_TYPE_MIXED;
2665: } else {
1.62 daniel 2666: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 2667: res = XML_ELEMENT_TYPE_ELEMENT;
2668: }
2669: SKIP_BLANKS;
1.63 ! daniel 2670: /****************************
1.61 daniel 2671: if (CUR != ')') {
2672: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2673: ctxt->sax->error(ctxt,
2674: "xmlParseElementContentDecl : ')' expected\n");
2675: ctxt->wellFormed = 0;
2676: return(-1);
2677: }
1.63 ! daniel 2678: ****************************/
! 2679: *result = tree;
1.61 daniel 2680: return(res);
1.22 daniel 2681: }
2682:
1.50 daniel 2683: /**
2684: * xmlParseElementDecl:
2685: * @ctxt: an XML parser context
2686: *
2687: * parse an Element declaration.
1.22 daniel 2688: *
2689: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2690: *
2691: * TODO There is a check [ VC: Unique Element Type Declaration ]
2692: */
1.59 daniel 2693: int
1.55 daniel 2694: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2695: CHAR *name;
1.59 daniel 2696: int ret = -1;
1.61 daniel 2697: xmlElementContentPtr content = NULL;
1.22 daniel 2698:
1.40 daniel 2699: if ((CUR == '<') && (NXT(1) == '!') &&
2700: (NXT(2) == 'E') && (NXT(3) == 'L') &&
2701: (NXT(4) == 'E') && (NXT(5) == 'M') &&
2702: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 2703: (NXT(8) == 'T')) {
1.40 daniel 2704: SKIP(9);
1.59 daniel 2705: if (!IS_BLANK(CUR)) {
2706: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2707: ctxt->sax->error(ctxt,
2708: "Space required after 'ELEMENT'\n");
2709: ctxt->wellFormed = 0;
2710: }
1.42 daniel 2711: SKIP_BLANKS;
1.22 daniel 2712: name = xmlParseName(ctxt);
2713: if (name == NULL) {
1.55 daniel 2714: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2715: ctxt->sax->error(ctxt,
2716: "xmlParseElementDecl: no name for Element\n");
2717: ctxt->wellFormed = 0;
2718: return(-1);
2719: }
2720: if (!IS_BLANK(CUR)) {
2721: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2722: ctxt->sax->error(ctxt,
2723: "Space required after the element name\n");
2724: ctxt->wellFormed = 0;
1.22 daniel 2725: }
1.42 daniel 2726: SKIP_BLANKS;
1.40 daniel 2727: if ((CUR == 'E') && (NXT(1) == 'M') &&
2728: (NXT(2) == 'P') && (NXT(3) == 'T') &&
2729: (NXT(4) == 'Y')) {
2730: SKIP(5);
1.22 daniel 2731: /*
2732: * Element must always be empty.
2733: */
1.59 daniel 2734: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 2735: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2736: (NXT(2) == 'Y')) {
2737: SKIP(3);
1.22 daniel 2738: /*
2739: * Element is a generic container.
2740: */
1.59 daniel 2741: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 2742: } else if (CUR == '(') {
2743: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 2744: } else {
1.61 daniel 2745: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2746: ctxt->sax->error(ctxt,
2747: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
2748: ctxt->wellFormed = 0;
2749: if (name != NULL) free(name);
2750: return(-1);
1.22 daniel 2751: }
1.42 daniel 2752: SKIP_BLANKS;
1.40 daniel 2753: if (CUR != '>') {
1.55 daniel 2754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2755: ctxt->sax->error(ctxt,
1.31 daniel 2756: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 2757: ctxt->wellFormed = 0;
1.61 daniel 2758: } else {
1.40 daniel 2759: NEXT;
1.61 daniel 2760: xmlAddElementDecl(ctxt->doc->intSubset, name, ret, content);
2761: }
2762: if (name != NULL) {
2763: free(name);
2764: }
1.22 daniel 2765: }
1.59 daniel 2766: return(ret);
1.22 daniel 2767: }
2768:
1.50 daniel 2769: /**
2770: * xmlParseMarkupDecl:
2771: * @ctxt: an XML parser context
2772: *
2773: * parse Markup declarations
1.22 daniel 2774: *
2775: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2776: * NotationDecl | PI | Comment
2777: *
2778: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2779: */
1.55 daniel 2780: void
2781: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2782: xmlParseElementDecl(ctxt);
2783: xmlParseAttributeListDecl(ctxt);
2784: xmlParseEntityDecl(ctxt);
2785: xmlParseNotationDecl(ctxt);
2786: xmlParsePI(ctxt);
1.31 daniel 2787: xmlParseComment(ctxt, 0);
1.22 daniel 2788: }
2789:
1.50 daniel 2790: /**
2791: * xmlParseCharRef:
2792: * @ctxt: an XML parser context
2793: *
2794: * parse Reference declarations
1.24 daniel 2795: *
2796: * [66] CharRef ::= '&#' [0-9]+ ';' |
2797: * '&#x' [0-9a-fA-F]+ ';'
1.50 daniel 2798: * return values: the value parsed
1.24 daniel 2799: */
1.55 daniel 2800: CHAR *
2801: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 2802: int val = 0;
1.44 daniel 2803: CHAR buf[2];
1.24 daniel 2804:
1.40 daniel 2805: if ((CUR == '&') && (NXT(1) == '#') &&
2806: (NXT(2) == 'x')) {
2807: SKIP(3);
2808: while (CUR != ';') {
2809: if ((CUR >= '0') && (CUR <= '9'))
2810: val = val * 16 + (CUR - '0');
2811: else if ((CUR >= 'a') && (CUR <= 'f'))
2812: val = val * 16 + (CUR - 'a') + 10;
2813: else if ((CUR >= 'A') && (CUR <= 'F'))
2814: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 2815: else {
1.55 daniel 2816: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2817: ctxt->sax->error(ctxt,
1.59 daniel 2818: "xmlParseCharRef: invalid hexadecimal value\n");
2819: ctxt->wellFormed = 0;
1.29 daniel 2820: val = 0;
1.24 daniel 2821: break;
2822: }
1.47 daniel 2823: NEXT;
1.24 daniel 2824: }
1.55 daniel 2825: if (CUR == ';')
1.40 daniel 2826: NEXT;
2827: } else if ((CUR == '&') && (NXT(1) == '#')) {
2828: SKIP(2);
2829: while (CUR != ';') {
2830: if ((CUR >= '0') && (CUR <= '9'))
1.55 daniel 2831: val = val * 10 + (CUR - '0');
1.24 daniel 2832: else {
1.55 daniel 2833: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2834: ctxt->sax->error(ctxt,
2835: "xmlParseCharRef: invalid decimal value\n");
1.59 daniel 2836: ctxt->wellFormed = 0;
1.29 daniel 2837: val = 0;
1.24 daniel 2838: break;
2839: }
1.47 daniel 2840: NEXT;
1.24 daniel 2841: }
1.55 daniel 2842: if (CUR == ';')
1.40 daniel 2843: NEXT;
1.24 daniel 2844: } else {
1.55 daniel 2845: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2846: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid value\n");
1.59 daniel 2847: ctxt->wellFormed = 0;
1.24 daniel 2848: }
1.29 daniel 2849: /*
2850: * Check the value IS_CHAR ...
2851: */
1.44 daniel 2852: if (IS_CHAR(val)) {
2853: buf[0] = (CHAR) val;
2854: buf[1] = 0;
1.50 daniel 2855: return(xmlStrndup(buf, 1));
1.44 daniel 2856: } else {
1.55 daniel 2857: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 2858: ctxt->sax->error(ctxt, "xmlParseCharRef: invalid CHAR value %d\n",
2859: val);
1.59 daniel 2860: ctxt->wellFormed = 0;
1.29 daniel 2861: }
1.46 daniel 2862: return(NULL);
1.24 daniel 2863: }
2864:
1.50 daniel 2865: /**
2866: * xmlParseEntityRef:
2867: * @ctxt: an XML parser context
2868: *
2869: * parse ENTITY references declarations
1.24 daniel 2870: *
2871: * [68] EntityRef ::= '&' Name ';'
1.52 daniel 2872: * return values: the entity ref string or NULL if directly as input stream.
1.24 daniel 2873: */
1.55 daniel 2874: CHAR *
2875: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.46 daniel 2876: CHAR *ret = NULL;
1.50 daniel 2877: const CHAR *q;
1.24 daniel 2878: CHAR *name;
1.59 daniel 2879: xmlEntityPtr ent;
1.50 daniel 2880: xmlParserInputPtr input = NULL;
1.24 daniel 2881:
1.50 daniel 2882: q = CUR_PTR;
1.40 daniel 2883: if (CUR == '&') {
2884: NEXT;
1.24 daniel 2885: name = xmlParseName(ctxt);
2886: if (name == NULL) {
1.55 daniel 2887: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888: ctxt->sax->error(ctxt, "xmlParseEntityRef: no name\n");
1.59 daniel 2889: ctxt->wellFormed = 0;
1.24 daniel 2890: } else {
1.40 daniel 2891: if (CUR == ';') {
2892: NEXT;
1.24 daniel 2893: /*
1.59 daniel 2894: * Well Formedness Constraint if:
2895: * - standalone
2896: * or
2897: * - no external subset and no external parameter entities
2898: * referenced
2899: * then
2900: * the entity referenced must have been declared
2901: *
2902: * TODO: to be double checked !!!
2903: */
2904: ent = xmlGetDocEntity(ctxt->doc, name);
2905: if ((ctxt->doc->standalone) ||
2906: ((ctxt->doc->intSubset == NULL) &&
2907: (ctxt->doc->extSubset == NULL))) {
2908: if (ent == NULL) {
2909: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2910: ctxt->sax->error(ctxt,
2911: "Entity '%s' not defined\n", name);
2912: ctxt->wellFormed = 0;
2913: }
2914: }
2915:
2916: /*
2917: * Well Formedness Constraint :
2918: * The referenced entity must be a parsed entity.
2919: */
2920: if (ent != NULL) {
2921: switch (ent->type) {
2922: case XML_INTERNAL_PARAMETER_ENTITY:
2923: case XML_EXTERNAL_PARAMETER_ENTITY:
2924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2925: ctxt->sax->error(ctxt,
2926: "Attempt to reference the parameter entity '%s'\n", name);
2927: ctxt->wellFormed = 0;
2928: break;
2929:
2930: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
2931: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2932: ctxt->sax->error(ctxt,
2933: "Attempt to reference unparsed entity '%s'\n", name);
2934: ctxt->wellFormed = 0;
2935: break;
2936: }
2937: }
2938:
2939: /*
2940: * Well Formedness Constraint :
2941: * The referenced entity must not lead to recursion !
2942: */
2943:
2944: /*
1.52 daniel 2945: * We parsed the entity reference correctly, call SAX
2946: * interface for the proper behaviour:
2947: * - get a new input stream
2948: * - or keep the reference inline
1.24 daniel 2949: */
1.52 daniel 2950: if (ctxt->sax)
2951: input = ctxt->sax->resolveEntity(ctxt, NULL, name);
2952: if (input != NULL)
2953: xmlPushInput(ctxt, input);
2954: else {
2955: ret = xmlStrndup(q, CUR_PTR - q);
2956: }
1.24 daniel 2957: } else {
1.46 daniel 2958: char cst[2] = { '&', 0 };
2959:
1.55 daniel 2960: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 2961: ctxt->sax->error(ctxt,
2962: "xmlParseEntityRef: expecting ';'\n");
2963: ctxt->wellFormed = 0;
1.46 daniel 2964: ret = xmlStrndup(cst, 1);
2965: ret = xmlStrcat(ret, name);
1.24 daniel 2966: }
1.45 daniel 2967: free(name);
1.24 daniel 2968: }
2969: }
1.46 daniel 2970: return(ret);
1.24 daniel 2971: }
2972:
1.50 daniel 2973: /**
2974: * xmlParseReference:
2975: * @ctxt: an XML parser context
2976: *
2977: * parse Reference declarations
1.24 daniel 2978: *
2979: * [67] Reference ::= EntityRef | CharRef
1.52 daniel 2980: * return values: the entity string or NULL if handled directly by pushing
2981: * the entity value as the input.
1.24 daniel 2982: */
1.55 daniel 2983: CHAR *
2984: xmlParseReference(xmlParserCtxtPtr ctxt) {
1.44 daniel 2985: if ((CUR == '&') && (NXT(1) == '#')) {
1.59 daniel 2986: CHAR *val = xmlParseCharRef(ctxt);
2987: xmlParserInputPtr in;
2988:
2989: if (val != NULL) {
2990: in = xmlNewStringInputStream(ctxt, val);
2991: xmlPushInput(ctxt, in);
2992: }
2993: return(NULL);
1.44 daniel 2994: } else if (CUR == '&') {
1.50 daniel 2995: return(xmlParseEntityRef(ctxt));
1.24 daniel 2996: }
1.46 daniel 2997: return(NULL);
1.24 daniel 2998: }
2999:
1.50 daniel 3000: /**
3001: * xmlParsePEReference:
3002: * @ctxt: an XML parser context
3003: *
3004: * parse PEReference declarations
1.22 daniel 3005: *
3006: * [69] PEReference ::= '%' Name ';'
1.50 daniel 3007: * return values: the entity content or NULL if handled directly.
1.22 daniel 3008: */
1.55 daniel 3009: CHAR *
3010: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.46 daniel 3011: CHAR *ret = NULL;
1.22 daniel 3012: CHAR *name;
1.45 daniel 3013: xmlEntityPtr entity;
1.50 daniel 3014: xmlParserInputPtr input;
1.22 daniel 3015:
1.40 daniel 3016: if (CUR == '%') {
3017: NEXT;
1.22 daniel 3018: name = xmlParseName(ctxt);
3019: if (name == NULL) {
1.55 daniel 3020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3021: ctxt->sax->error(ctxt, "xmlParsePEReference: no name\n");
1.59 daniel 3022: ctxt->wellFormed = 0;
1.22 daniel 3023: } else {
1.40 daniel 3024: if (CUR == ';') {
3025: NEXT;
1.45 daniel 3026: entity = xmlGetDtdEntity(ctxt->doc, name);
3027: if (entity == NULL) {
1.55 daniel 3028: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3029: ctxt->sax->warning(ctxt,
1.59 daniel 3030: "xmlParsePEReference: %%%s; not found\n", name);
1.50 daniel 3031: } else {
3032: input = xmlNewEntityInputStream(ctxt, entity);
3033: xmlPushInput(ctxt, input);
1.45 daniel 3034: }
1.22 daniel 3035: } else {
1.50 daniel 3036: char cst[2] = { '%', 0 };
1.46 daniel 3037:
1.55 daniel 3038: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3039: ctxt->sax->error(ctxt,
3040: "xmlParsePEReference: expecting ';'\n");
3041: ctxt->wellFormed = 0;
1.46 daniel 3042: ret = xmlStrndup(cst, 1);
3043: ret = xmlStrcat(ret, name);
1.22 daniel 3044: }
1.45 daniel 3045: free(name);
1.3 veillard 3046: }
3047: }
1.46 daniel 3048: return(ret);
1.3 veillard 3049: }
3050:
1.50 daniel 3051: /**
3052: * xmlParseDocTypeDecl :
3053: * @ctxt: an XML parser context
3054: *
3055: * parse a DOCTYPE declaration
1.21 daniel 3056: *
1.22 daniel 3057: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3058: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 3059: */
3060:
1.55 daniel 3061: void
3062: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.36 daniel 3063: xmlDtdPtr dtd;
1.21 daniel 3064: CHAR *name;
3065: CHAR *ExternalID = NULL;
1.39 daniel 3066: CHAR *URI = NULL;
1.21 daniel 3067:
3068: /*
3069: * We know that '<!DOCTYPE' has been detected.
3070: */
1.40 daniel 3071: SKIP(9);
1.21 daniel 3072:
1.42 daniel 3073: SKIP_BLANKS;
1.21 daniel 3074:
3075: /*
3076: * Parse the DOCTYPE name.
3077: */
3078: name = xmlParseName(ctxt);
3079: if (name == NULL) {
1.55 daniel 3080: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3081: ctxt->sax->error(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 3082: ctxt->wellFormed = 0;
1.21 daniel 3083: }
3084:
1.42 daniel 3085: SKIP_BLANKS;
1.21 daniel 3086:
3087: /*
1.22 daniel 3088: * Check for SystemID and ExternalID
3089: */
1.39 daniel 3090: URI = xmlParseExternalID(ctxt, &ExternalID);
1.42 daniel 3091: SKIP_BLANKS;
1.36 daniel 3092:
1.59 daniel 3093: dtd = xmlCreateIntSubset(ctxt->doc, name, ExternalID, URI);
1.22 daniel 3094:
3095: /*
3096: * Is there any DTD definition ?
3097: */
1.40 daniel 3098: if (CUR == '[') {
3099: NEXT;
1.22 daniel 3100: /*
3101: * Parse the succession of Markup declarations and
3102: * PEReferences.
3103: * Subsequence (markupdecl | PEReference | S)*
3104: */
1.40 daniel 3105: while (CUR != ']') {
3106: const CHAR *check = CUR_PTR;
1.22 daniel 3107:
1.42 daniel 3108: SKIP_BLANKS;
1.22 daniel 3109: xmlParseMarkupDecl(ctxt);
1.50 daniel 3110: xmlParsePEReference(ctxt);
1.22 daniel 3111:
1.40 daniel 3112: if (CUR_PTR == check) {
1.55 daniel 3113: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3114: ctxt->sax->error(ctxt,
1.31 daniel 3115: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 3116: ctxt->wellFormed = 0;
1.22 daniel 3117: break;
3118: }
3119: }
1.40 daniel 3120: if (CUR == ']') NEXT;
1.22 daniel 3121: }
3122:
3123: /*
3124: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 3125: */
1.40 daniel 3126: if (CUR != '>') {
1.55 daniel 3127: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3128: ctxt->sax->error(ctxt, "DOCTYPE unproperly terminated\n");
1.59 daniel 3129: ctxt->wellFormed = 0;
1.22 daniel 3130: /* We shouldn't try to resynchronize ... */
1.21 daniel 3131: }
1.40 daniel 3132: NEXT;
1.22 daniel 3133:
3134: /*
3135: * Cleanup, since we don't use all those identifiers
3136: * TODO : the DOCTYPE if available should be stored !
3137: */
1.39 daniel 3138: if (URI != NULL) free(URI);
1.22 daniel 3139: if (ExternalID != NULL) free(ExternalID);
3140: if (name != NULL) free(name);
1.21 daniel 3141: }
3142:
1.50 daniel 3143: /**
3144: * xmlParseAttribute:
3145: * @ctxt: an XML parser context
3146: * @node: the node carrying the attribute
3147: *
3148: * parse an attribute
1.3 veillard 3149: *
1.22 daniel 3150: * [41] Attribute ::= Name Eq AttValue
3151: *
3152: * [25] Eq ::= S? '=' S?
3153: *
1.29 daniel 3154: * With namespace:
3155: *
3156: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 3157: *
3158: * Also the case QName == xmlns:??? is handled independently as a namespace
3159: * definition.
1.3 veillard 3160: */
3161:
1.52 daniel 3162: xmlAttrPtr xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
1.59 daniel 3163: CHAR *name, *val;
1.29 daniel 3164: CHAR *ns;
1.52 daniel 3165: CHAR *value = NULL;
3166: xmlAttrPtr ret;
1.3 veillard 3167:
1.29 daniel 3168: name = xmlNamespaceParseQName(ctxt, &ns);
1.22 daniel 3169: if (name == NULL) {
1.55 daniel 3170: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3171: ctxt->sax->error(ctxt, "error parsing attribute name\n");
1.59 daniel 3172: ctxt->wellFormed = 0;
1.52 daniel 3173: return(NULL);
1.3 veillard 3174: }
3175:
3176: /*
1.29 daniel 3177: * read the value
1.3 veillard 3178: */
1.42 daniel 3179: SKIP_BLANKS;
1.40 daniel 3180: if (CUR == '=') {
3181: NEXT;
1.42 daniel 3182: SKIP_BLANKS;
1.29 daniel 3183: value = xmlParseAttValue(ctxt);
3184: } else {
1.55 daniel 3185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3186: ctxt->sax->error(ctxt,
3187: "Specification mandate value for attribute %s\n", name);
3188: ctxt->wellFormed = 0;
1.3 veillard 3189: }
3190:
3191: /*
1.43 daniel 3192: * Check whether it's a namespace definition
3193: */
3194: if ((ns == NULL) &&
3195: (name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
3196: (name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
3197: /* a default namespace definition */
3198: xmlNewNs(node, value, NULL);
3199: if (name != NULL)
3200: free(name);
3201: if (value != NULL)
3202: free(value);
1.52 daniel 3203: return(NULL);
1.43 daniel 3204: }
3205: if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
3206: (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
3207: /* a standard namespace definition */
3208: xmlNewNs(node, value, name);
1.50 daniel 3209: free(ns);
1.43 daniel 3210: if (name != NULL)
3211: free(name);
3212: if (value != NULL)
3213: free(value);
1.52 daniel 3214: return(NULL);
1.43 daniel 3215: }
3216:
1.59 daniel 3217: /*
3218: * Well formedness requires at most one declaration of an attribute
3219: */
3220: if ((val = xmlGetProp(ctxt->node, name)) != NULL) {
3221: free(val);
3222: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3223: ctxt->sax->error(ctxt, "Attribute %s redefined\n", name);
3224: ctxt->wellFormed = 0;
3225: ret = NULL;
3226: } else {
3227: ret = xmlNewProp(ctxt->node, name, NULL);
3228: if (ret != NULL)
3229: ret->val = xmlStringGetNodeList(ctxt->doc, value);
3230: }
1.53 daniel 3231:
3232: if (ns != NULL)
3233: free(ns);
3234: if (value != NULL)
3235: free(value);
3236: free(name);
1.52 daniel 3237: return(ret);
1.3 veillard 3238: }
3239:
1.50 daniel 3240: /**
3241: * xmlParseStartTag:
3242: * @ctxt: an XML parser context
3243: *
3244: * parse a start of tag either for rule element or
3245: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 3246: *
3247: * [40] STag ::= '<' Name (S Attribute)* S? '>'
3248: *
1.29 daniel 3249: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3250: *
3251: * With namespace:
3252: *
3253: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3254: *
3255: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.50 daniel 3256: *
3257: * return values: the XML new node or NULL.
1.2 veillard 3258: */
3259:
1.16 daniel 3260: xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.34 daniel 3261: CHAR *namespace, *name;
3262: xmlNsPtr ns = NULL;
1.2 veillard 3263: xmlNodePtr ret = NULL;
1.50 daniel 3264: xmlNodePtr parent = ctxt->node;
1.2 veillard 3265:
1.40 daniel 3266: if (CUR != '<') return(NULL);
3267: NEXT;
1.3 veillard 3268:
1.34 daniel 3269: name = xmlNamespaceParseQName(ctxt, &namespace);
1.59 daniel 3270: if (name == NULL) {
3271: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3272: ctxt->sax->error(ctxt,
3273: "xmlParseStartTag: invalid element name\n");
3274: ctxt->wellFormed = 0;
3275: return(NULL);
3276: }
1.3 veillard 3277:
1.43 daniel 3278: /*
3279: * Note : the namespace resolution is deferred until the end of the
3280: * attributes parsing, since local namespace can be defined as
3281: * an attribute at this level.
3282: */
1.50 daniel 3283: ret = xmlNewDocNode(ctxt->doc, ns, name, NULL);
3284: if (ret == NULL) {
3285: if (namespace != NULL)
3286: free(namespace);
3287: free(name);
3288: return(NULL);
3289: }
3290:
3291: /*
3292: * We are parsing a new node.
3293: */
3294: nodePush(ctxt, ret);
1.2 veillard 3295:
1.3 veillard 3296: /*
3297: * Now parse the attributes, it ends up with the ending
3298: *
3299: * (S Attribute)* S?
3300: */
1.42 daniel 3301: SKIP_BLANKS;
1.40 daniel 3302: while ((IS_CHAR(CUR)) &&
3303: (CUR != '>') &&
3304: ((CUR != '/') || (NXT(1) != '>'))) {
3305: const CHAR *q = CUR_PTR;
1.29 daniel 3306:
3307: xmlParseAttribute(ctxt, ret);
1.42 daniel 3308: SKIP_BLANKS;
1.29 daniel 3309:
1.40 daniel 3310: if (q == CUR_PTR) {
1.55 daniel 3311: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3312: ctxt->sax->error(ctxt,
1.31 daniel 3313: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 3314: ctxt->wellFormed = 0;
1.29 daniel 3315: break;
1.3 veillard 3316: }
3317: }
3318:
1.43 daniel 3319: /*
3320: * Search the namespace
3321: */
3322: ns = xmlSearchNs(ctxt->doc, ret, namespace);
3323: if (ns == NULL) /* ret still doesn't have a parent yet ! */
1.50 daniel 3324: ns = xmlSearchNs(ctxt->doc, parent, namespace);
1.43 daniel 3325: xmlSetNs(ret, ns);
3326: if (namespace != NULL)
3327: free(namespace);
3328:
1.44 daniel 3329: /*
3330: * SAX: Start of Element !
3331: */
3332: if (ctxt->sax != NULL)
3333: ctxt->sax->startElement(ctxt, name);
1.52 daniel 3334: free(name);
3335:
3336: /*
3337: * Link the child element
3338: */
3339: if (ctxt->nodeNr < 2) return(ret);
3340: parent = ctxt->nodeTab[ctxt->nodeNr - 2];
3341: if (parent != NULL)
3342: xmlAddChild(parent, ctxt->node);
1.44 daniel 3343:
1.3 veillard 3344: return(ret);
3345: }
3346:
1.50 daniel 3347: /**
3348: * xmlParseEndTag:
3349: * @ctxt: an XML parser context
3350: * @nsPtr: the current node namespace definition
3351: * @tagPtr: CHAR** receive the tag value
3352: *
3353: * parse an end of tag
1.27 daniel 3354: *
3355: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 3356: *
3357: * With namespace
3358: *
3359: * [9] ETag ::= '</' QName S? '>'
1.50 daniel 3360: *
3361: * return values: tagPtr receive the tag name just read
1.7 veillard 3362: */
3363:
1.55 daniel 3364: void
3365: xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr *nsPtr, CHAR **tagPtr) {
1.34 daniel 3366: CHAR *namespace, *name;
3367: xmlNsPtr ns = NULL;
1.7 veillard 3368:
1.34 daniel 3369: *nsPtr = NULL;
1.7 veillard 3370: *tagPtr = NULL;
3371:
1.40 daniel 3372: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 3373: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3374: ctxt->sax->error(ctxt, "xmlParseEndTag: '</' not found\n");
1.59 daniel 3375: ctxt->wellFormed = 0;
1.27 daniel 3376: return;
3377: }
1.40 daniel 3378: SKIP(2);
1.7 veillard 3379:
1.34 daniel 3380: name = xmlNamespaceParseQName(ctxt, &namespace);
1.43 daniel 3381:
3382: /*
3383: * Search the namespace
3384: */
3385: ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
3386: if (namespace != NULL)
1.34 daniel 3387: free(namespace);
1.7 veillard 3388:
1.34 daniel 3389: *nsPtr = ns;
1.7 veillard 3390: *tagPtr = name;
3391:
3392: /*
3393: * We should definitely be at the ending "S? '>'" part
3394: */
1.42 daniel 3395: SKIP_BLANKS;
1.40 daniel 3396: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 3397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3398: ctxt->sax->error(ctxt, "End tag : expected '>'\n");
1.59 daniel 3399: ctxt->wellFormed = 0;
1.7 veillard 3400: } else
1.40 daniel 3401: NEXT;
1.7 veillard 3402:
3403: return;
3404: }
3405:
1.50 daniel 3406: /**
3407: * xmlParseCDSect:
3408: * @ctxt: an XML parser context
3409: *
3410: * Parse escaped pure raw content.
1.29 daniel 3411: *
3412: * [18] CDSect ::= CDStart CData CDEnd
3413: *
3414: * [19] CDStart ::= '<![CDATA['
3415: *
3416: * [20] Data ::= (Char* - (Char* ']]>' Char*))
3417: *
3418: * [21] CDEnd ::= ']]>'
1.3 veillard 3419: */
1.55 daniel 3420: void
3421: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 3422: const CHAR *r, *s, *base;
1.3 veillard 3423:
1.40 daniel 3424: if ((CUR == '<') && (NXT(1) == '!') &&
3425: (NXT(2) == '[') && (NXT(3) == 'C') &&
3426: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3427: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3428: (NXT(8) == '[')) {
3429: SKIP(9);
1.29 daniel 3430: } else
1.45 daniel 3431: return;
1.40 daniel 3432: base = CUR_PTR;
3433: if (!IS_CHAR(CUR)) {
1.55 daniel 3434: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3435: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3436: ctxt->wellFormed = 0;
1.45 daniel 3437: return;
1.3 veillard 3438: }
1.40 daniel 3439: r = NEXT;
3440: if (!IS_CHAR(CUR)) {
1.55 daniel 3441: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3442: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3443: ctxt->wellFormed = 0;
1.45 daniel 3444: return;
1.3 veillard 3445: }
1.40 daniel 3446: s = NEXT;
3447: while (IS_CHAR(CUR) &&
3448: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
3449: r++;s++;NEXT;
1.3 veillard 3450: }
1.40 daniel 3451: if (!IS_CHAR(CUR)) {
1.55 daniel 3452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3453: ctxt->sax->error(ctxt, "CData section not finished\n%.50s\n", base);
1.59 daniel 3454: ctxt->wellFormed = 0;
1.45 daniel 3455: return;
1.3 veillard 3456: }
1.16 daniel 3457:
1.45 daniel 3458: /*
3459: * Ok the segment [base CUR_PTR] is to be consumed as chars.
3460: */
3461: if (ctxt->sax != NULL) {
3462: if (areBlanks(ctxt, base, CUR_PTR - base))
1.59 daniel 3463: ctxt->sax->ignorableWhitespace(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3464: else
1.59 daniel 3465: ctxt->sax->characters(ctxt, base, 0, (CUR_PTR - base) - 2);
1.45 daniel 3466: }
1.2 veillard 3467: }
3468:
1.50 daniel 3469: /**
3470: * xmlParseContent:
3471: * @ctxt: an XML parser context
3472: *
3473: * Parse a content:
1.2 veillard 3474: *
1.27 daniel 3475: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 3476: */
3477:
1.55 daniel 3478: void
3479: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.2 veillard 3480: xmlNodePtr ret = NULL;
3481:
1.40 daniel 3482: while ((CUR != '<') || (NXT(1) != '/')) {
3483: const CHAR *test = CUR_PTR;
1.27 daniel 3484: ret = NULL;
3485:
3486: /*
3487: * First case : a Processing Instruction.
3488: */
1.40 daniel 3489: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 3490: xmlParsePI(ctxt);
3491: }
3492: /*
3493: * Second case : a CDSection
3494: */
1.40 daniel 3495: else if ((CUR == '<') && (NXT(1) == '!') &&
3496: (NXT(2) == '[') && (NXT(3) == 'C') &&
3497: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3498: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3499: (NXT(8) == '[')) {
1.45 daniel 3500: xmlParseCDSect(ctxt);
1.27 daniel 3501: }
3502: /*
3503: * Third case : a comment
3504: */
1.40 daniel 3505: else if ((CUR == '<') && (NXT(1) == '!') &&
3506: (NXT(2) == '-') && (NXT(3) == '-')) {
1.31 daniel 3507: ret = xmlParseComment(ctxt, 1);
1.27 daniel 3508: }
3509: /*
3510: * Fourth case : a sub-element.
3511: */
1.40 daniel 3512: else if (CUR == '<') {
1.45 daniel 3513: ret = xmlParseElement(ctxt);
3514: }
3515: /*
1.50 daniel 3516: * Fifth case : a reference. If if has not been resolved,
3517: * parsing returns it's Name, create the node
1.45 daniel 3518: */
3519: else if (CUR == '&') {
1.50 daniel 3520: CHAR *val = xmlParseReference(ctxt);
3521: if (val != NULL) {
3522: if (val[0] != '&') {
3523: /*
3524: * inline predefined entity.
3525: */
3526: if (ctxt->sax != NULL)
3527: ctxt->sax->characters(ctxt, val, 0, xmlStrlen(val));
3528: } else {
3529: /*
3530: * user defined entity, create a node.
3531: */
3532: ret = xmlNewReference(ctxt->doc, val);
3533: xmlAddChild(ctxt->node, ret);
3534: }
3535: free(val);
3536: }
1.27 daniel 3537: }
3538: /*
3539: * Last case, text. Note that References are handled directly.
3540: */
3541: else {
1.45 daniel 3542: xmlParseCharData(ctxt, 0);
1.3 veillard 3543: }
1.14 veillard 3544:
3545: /*
1.45 daniel 3546: * Pop-up of finished entities.
1.14 veillard 3547: */
1.45 daniel 3548: while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
3549:
1.40 daniel 3550: if (test == CUR_PTR) {
1.55 daniel 3551: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3552: ctxt->sax->error(ctxt,
3553: "detected an error in element content\n");
3554: ctxt->wellFormed = 0;
1.29 daniel 3555: break;
3556: }
1.3 veillard 3557: }
1.2 veillard 3558: }
3559:
1.50 daniel 3560: /**
3561: * xmlParseElement:
3562: * @ctxt: an XML parser context
3563: *
3564: * parse an XML element, this is highly recursive
1.26 daniel 3565: *
3566: * [39] element ::= EmptyElemTag | STag content ETag
3567: *
3568: * [41] Attribute ::= Name Eq AttValue
1.50 daniel 3569: * return values: the XML new node or NULL
1.2 veillard 3570: */
1.26 daniel 3571:
1.2 veillard 3572:
1.45 daniel 3573: xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
1.27 daniel 3574: xmlNodePtr ret;
1.40 daniel 3575: const CHAR *openTag = CUR_PTR;
1.32 daniel 3576: xmlParserNodeInfo node_info;
1.27 daniel 3577: CHAR *endTag;
1.34 daniel 3578: xmlNsPtr endNs;
1.2 veillard 3579:
1.32 daniel 3580: /* Capture start position */
1.40 daniel 3581: node_info.begin_pos = CUR_PTR - ctxt->input->base;
3582: node_info.begin_line = ctxt->input->line;
1.32 daniel 3583:
1.16 daniel 3584: ret = xmlParseStartTag(ctxt);
1.3 veillard 3585: if (ret == NULL) {
3586: return(NULL);
3587: }
1.2 veillard 3588:
3589: /*
3590: * Check for an Empty Element.
3591: */
1.40 daniel 3592: if ((CUR == '/') && (NXT(1) == '>')) {
3593: SKIP(2);
1.45 daniel 3594: if (ctxt->sax != NULL)
3595: ctxt->sax->endElement(ctxt, ret->name);
3596:
3597: /*
3598: * end of parsing of this node.
3599: */
3600: nodePop(ctxt);
3601:
1.2 veillard 3602: return(ret);
3603: }
1.40 daniel 3604: if (CUR == '>') NEXT;
1.2 veillard 3605: else {
1.55 daniel 3606: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3607: ctxt->sax->error(ctxt, "Couldn't find end of Start Tag\n%.30s\n",
3608: openTag);
1.59 daniel 3609: ctxt->wellFormed = 0;
1.45 daniel 3610:
3611: /*
3612: * end of parsing of this node.
3613: */
3614: nodePop(ctxt);
3615:
1.16 daniel 3616: return(NULL);
1.2 veillard 3617: }
3618:
3619: /*
3620: * Parse the content of the element:
3621: */
1.45 daniel 3622: xmlParseContent(ctxt);
1.40 daniel 3623: if (!IS_CHAR(CUR)) {
1.55 daniel 3624: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.57 daniel 3625: ctxt->sax->error(ctxt,
3626: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 3627: ctxt->wellFormed = 0;
1.45 daniel 3628:
3629: /*
3630: * end of parsing of this node.
3631: */
3632: nodePop(ctxt);
3633:
1.16 daniel 3634: return(NULL);
1.2 veillard 3635: }
3636:
3637: /*
1.27 daniel 3638: * parse the end of tag: '</' should be here.
1.2 veillard 3639: */
1.34 daniel 3640: xmlParseEndTag(ctxt, &endNs, &endTag);
1.7 veillard 3641:
1.27 daniel 3642: /*
3643: * Check that the Name in the ETag is the same as in the STag.
3644: */
1.34 daniel 3645: if (endNs != ret->ns) {
1.55 daniel 3646: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3647: ctxt->sax->error(ctxt,
1.43 daniel 3648: "Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
1.31 daniel 3649: openTag, endTag);
1.59 daniel 3650: ctxt->wellFormed = 0;
1.27 daniel 3651: }
1.32 daniel 3652: if (endTag == NULL ) {
1.55 daniel 3653: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3654: ctxt->sax->error(ctxt, "The End tag has no name\n%.30s\n", openTag);
1.59 daniel 3655: ctxt->wellFormed = 0;
1.45 daniel 3656: } else if (xmlStrcmp(ret->name, endTag)) {
1.55 daniel 3657: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3658: ctxt->sax->error(ctxt,
1.31 daniel 3659: "Start and End tags don't use the same name\n%.30s\n%.30s\n",
3660: openTag, endTag);
1.59 daniel 3661: ctxt->wellFormed = 0;
1.27 daniel 3662: }
1.44 daniel 3663: /*
3664: * SAX: End of Tag
3665: */
3666: else if (ctxt->sax != NULL)
3667: ctxt->sax->endElement(ctxt, endTag);
1.7 veillard 3668:
1.44 daniel 3669: if (endTag != NULL)
3670: free(endTag);
1.2 veillard 3671:
1.32 daniel 3672: /* Capture end position and add node */
3673: if ( ret != NULL && ctxt->record_info ) {
1.40 daniel 3674: node_info.end_pos = CUR_PTR - ctxt->input->base;
3675: node_info.end_line = ctxt->input->line;
1.32 daniel 3676: node_info.node = ret;
3677: xmlParserAddNodeInfo(ctxt, &node_info);
3678: }
1.43 daniel 3679:
3680: /*
3681: * end of parsing of this node.
3682: */
3683: nodePop(ctxt);
3684:
1.2 veillard 3685: return(ret);
3686: }
3687:
1.50 daniel 3688: /**
3689: * xmlParseVersionNum:
3690: * @ctxt: an XML parser context
3691: *
3692: * parse the XML version value.
1.29 daniel 3693: *
3694: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.50 daniel 3695: * return values: the string giving the XML version number, or NULL
1.29 daniel 3696: */
1.55 daniel 3697: CHAR *
3698: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 3699: const CHAR *q = CUR_PTR;
1.29 daniel 3700: CHAR *ret;
3701:
1.40 daniel 3702: while (IS_CHAR(CUR) &&
3703: (((CUR >= 'a') && (CUR <= 'z')) ||
3704: ((CUR >= 'A') && (CUR <= 'Z')) ||
3705: ((CUR >= '0') && (CUR <= '9')) ||
3706: (CUR == '_') || (CUR == '.') ||
3707: (CUR == ':') || (CUR == '-'))) NEXT;
3708: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3709: return(ret);
3710: }
3711:
1.50 daniel 3712: /**
3713: * xmlParseVersionInfo:
3714: * @ctxt: an XML parser context
3715: *
3716: * parse the XML version.
1.29 daniel 3717: *
3718: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3719: *
3720: * [25] Eq ::= S? '=' S?
1.50 daniel 3721: *
3722: * return values: the version string, e.g. "1.0"
1.29 daniel 3723: */
3724:
1.55 daniel 3725: CHAR *
3726: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 3727: CHAR *version = NULL;
3728: const CHAR *q;
3729:
1.40 daniel 3730: if ((CUR == 'v') && (NXT(1) == 'e') &&
3731: (NXT(2) == 'r') && (NXT(3) == 's') &&
3732: (NXT(4) == 'i') && (NXT(5) == 'o') &&
3733: (NXT(6) == 'n')) {
3734: SKIP(7);
1.42 daniel 3735: SKIP_BLANKS;
1.40 daniel 3736: if (CUR != '=') {
1.55 daniel 3737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3738: ctxt->sax->error(ctxt, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 3739: ctxt->wellFormed = 0;
1.31 daniel 3740: return(NULL);
3741: }
1.40 daniel 3742: NEXT;
1.42 daniel 3743: SKIP_BLANKS;
1.40 daniel 3744: if (CUR == '"') {
3745: NEXT;
3746: q = CUR_PTR;
1.29 daniel 3747: version = xmlParseVersionNum(ctxt);
1.55 daniel 3748: if (CUR != '"') {
3749: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3750: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3751: ctxt->wellFormed = 0;
1.55 daniel 3752: } else
1.40 daniel 3753: NEXT;
3754: } else if (CUR == '\''){
3755: NEXT;
3756: q = CUR_PTR;
1.29 daniel 3757: version = xmlParseVersionNum(ctxt);
1.55 daniel 3758: if (CUR != '\'') {
3759: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3760: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3761: ctxt->wellFormed = 0;
1.55 daniel 3762: } else
1.40 daniel 3763: NEXT;
1.31 daniel 3764: } else {
1.55 daniel 3765: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3766: ctxt->sax->error(ctxt,
3767: "xmlParseVersionInfo : expected ' or \"\n");
3768: ctxt->wellFormed = 0;
1.29 daniel 3769: }
3770: }
3771: return(version);
3772: }
3773:
1.50 daniel 3774: /**
3775: * xmlParseEncName:
3776: * @ctxt: an XML parser context
3777: *
3778: * parse the XML encoding name
1.29 daniel 3779: *
3780: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 3781: *
3782: * return values: the encoding name value or NULL
1.29 daniel 3783: */
1.55 daniel 3784: CHAR *
3785: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 3786: const CHAR *q = CUR_PTR;
1.29 daniel 3787: CHAR *ret = NULL;
3788:
1.40 daniel 3789: if (((CUR >= 'a') && (CUR <= 'z')) ||
3790: ((CUR >= 'A') && (CUR <= 'Z'))) {
3791: NEXT;
3792: while (IS_CHAR(CUR) &&
3793: (((CUR >= 'a') && (CUR <= 'z')) ||
3794: ((CUR >= 'A') && (CUR <= 'Z')) ||
3795: ((CUR >= '0') && (CUR <= '9')) ||
3796: (CUR == '-'))) NEXT;
3797: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3798: } else {
1.55 daniel 3799: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3800: ctxt->sax->error(ctxt, "Invalid XML encoding name\n");
1.59 daniel 3801: ctxt->wellFormed = 0;
1.29 daniel 3802: }
3803: return(ret);
3804: }
3805:
1.50 daniel 3806: /**
3807: * xmlParseEncodingDecl:
3808: * @ctxt: an XML parser context
3809: *
3810: * parse the XML encoding declaration
1.29 daniel 3811: *
3812: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 3813: *
3814: * TODO: this should setup the conversion filters.
3815: *
3816: * return values: the encoding value or NULL
1.29 daniel 3817: */
3818:
1.55 daniel 3819: CHAR *
3820: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3821: CHAR *encoding = NULL;
3822: const CHAR *q;
3823:
1.42 daniel 3824: SKIP_BLANKS;
1.40 daniel 3825: if ((CUR == 'e') && (NXT(1) == 'n') &&
3826: (NXT(2) == 'c') && (NXT(3) == 'o') &&
3827: (NXT(4) == 'd') && (NXT(5) == 'i') &&
3828: (NXT(6) == 'n') && (NXT(7) == 'g')) {
3829: SKIP(8);
1.42 daniel 3830: SKIP_BLANKS;
1.40 daniel 3831: if (CUR != '=') {
1.55 daniel 3832: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3833: ctxt->sax->error(ctxt, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 3834: ctxt->wellFormed = 0;
1.31 daniel 3835: return(NULL);
3836: }
1.40 daniel 3837: NEXT;
1.42 daniel 3838: SKIP_BLANKS;
1.40 daniel 3839: if (CUR == '"') {
3840: NEXT;
3841: q = CUR_PTR;
1.29 daniel 3842: encoding = xmlParseEncName(ctxt);
1.55 daniel 3843: if (CUR != '"') {
3844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3845: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3846: ctxt->wellFormed = 0;
1.55 daniel 3847: } else
1.40 daniel 3848: NEXT;
3849: } else if (CUR == '\''){
3850: NEXT;
3851: q = CUR_PTR;
1.29 daniel 3852: encoding = xmlParseEncName(ctxt);
1.55 daniel 3853: if (CUR != '\'') {
3854: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3855: ctxt->sax->error(ctxt, "String not closed\n%.50s\n", q);
1.59 daniel 3856: ctxt->wellFormed = 0;
1.55 daniel 3857: } else
1.40 daniel 3858: NEXT;
3859: } else if (CUR == '"'){
1.55 daniel 3860: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3861: ctxt->sax->error(ctxt,
3862: "xmlParseEncodingDecl : expected ' or \"\n");
3863: ctxt->wellFormed = 0;
1.29 daniel 3864: }
3865: }
3866: return(encoding);
3867: }
3868:
1.50 daniel 3869: /**
3870: * xmlParseSDDecl:
3871: * @ctxt: an XML parser context
3872: *
3873: * parse the XML standalone declaration
1.29 daniel 3874: *
3875: * [32] SDDecl ::= S 'standalone' Eq
3876: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.50 daniel 3877: * return values: 1 if standalone, 0 otherwise
1.29 daniel 3878: */
3879:
1.55 daniel 3880: int
3881: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 3882: int standalone = -1;
3883:
1.42 daniel 3884: SKIP_BLANKS;
1.40 daniel 3885: if ((CUR == 's') && (NXT(1) == 't') &&
3886: (NXT(2) == 'a') && (NXT(3) == 'n') &&
3887: (NXT(4) == 'd') && (NXT(5) == 'a') &&
3888: (NXT(6) == 'l') && (NXT(7) == 'o') &&
3889: (NXT(8) == 'n') && (NXT(9) == 'e')) {
3890: SKIP(10);
3891: if (CUR != '=') {
1.55 daniel 3892: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3893: ctxt->sax->error(ctxt,
3894: "XML standalone declaration : expected '='\n");
3895: ctxt->wellFormed = 0;
1.32 daniel 3896: return(standalone);
3897: }
1.40 daniel 3898: NEXT;
1.42 daniel 3899: SKIP_BLANKS;
1.40 daniel 3900: if (CUR == '\''){
3901: NEXT;
3902: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3903: standalone = 0;
1.40 daniel 3904: SKIP(2);
3905: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3906: (NXT(2) == 's')) {
1.29 daniel 3907: standalone = 1;
1.40 daniel 3908: SKIP(3);
1.29 daniel 3909: } else {
1.55 daniel 3910: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3911: ctxt->sax->error(ctxt, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 3912: ctxt->wellFormed = 0;
1.29 daniel 3913: }
1.55 daniel 3914: if (CUR != '\'') {
3915: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3916: ctxt->sax->error(ctxt, "String not closed\n");
1.59 daniel 3917: ctxt->wellFormed = 0;
1.55 daniel 3918: } else
1.40 daniel 3919: NEXT;
3920: } else if (CUR == '"'){
3921: NEXT;
3922: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 3923: standalone = 0;
1.40 daniel 3924: SKIP(2);
3925: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
3926: (NXT(2) == 's')) {
1.29 daniel 3927: standalone = 1;
1.40 daniel 3928: SKIP(3);
1.29 daniel 3929: } else {
1.55 daniel 3930: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.59 daniel 3931: ctxt->sax->error(ctxt,
3932: "standalone accepts only 'yes' or 'no'\n");
3933: ctxt->wellFormed = 0;
1.29 daniel 3934: }
1.55 daniel 3935: if (CUR != '"') {
3936: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3937: ctxt->sax->error(ctxt, "String not closed\n");
1.59 daniel 3938: ctxt->wellFormed = 0;
1.55 daniel 3939: } else
1.40 daniel 3940: NEXT;
1.37 daniel 3941: } else {
1.55 daniel 3942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943: ctxt->sax->error(ctxt, "Standalone value not found\n");
1.59 daniel 3944: ctxt->wellFormed = 0;
1.37 daniel 3945: }
1.29 daniel 3946: }
3947: return(standalone);
3948: }
3949:
1.50 daniel 3950: /**
3951: * xmlParseXMLDecl:
3952: * @ctxt: an XML parser context
3953: *
3954: * parse an XML declaration header
1.29 daniel 3955: *
3956: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 3957: */
3958:
1.55 daniel 3959: void
3960: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 3961: CHAR *version;
3962:
3963: /*
1.19 daniel 3964: * We know that '<?xml' is here.
1.1 veillard 3965: */
1.40 daniel 3966: SKIP(5);
1.1 veillard 3967:
1.59 daniel 3968: if (!IS_BLANK(CUR)) {
3969: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3970: ctxt->sax->error(ctxt, "Blank needed after '<?xml'\n");
3971: ctxt->wellFormed = 0;
3972: }
1.42 daniel 3973: SKIP_BLANKS;
1.1 veillard 3974:
3975: /*
1.29 daniel 3976: * We should have the VersionInfo here.
1.1 veillard 3977: */
1.29 daniel 3978: version = xmlParseVersionInfo(ctxt);
3979: if (version == NULL)
1.45 daniel 3980: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3981: ctxt->doc = xmlNewDoc(version);
3982: free(version);
1.29 daniel 3983:
3984: /*
3985: * We may have the encoding declaration
3986: */
1.59 daniel 3987: if (!IS_BLANK(CUR)) {
3988: if ((CUR == '?') && (NXT(1) == '>')) {
3989: SKIP(2);
3990: return;
3991: }
3992: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3993: ctxt->sax->error(ctxt, "Blank needed here\n");
3994: ctxt->wellFormed = 0;
3995: }
1.32 daniel 3996: ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 3997:
3998: /*
1.29 daniel 3999: * We may have the standalone status.
1.1 veillard 4000: */
1.59 daniel 4001: if ((ctxt->doc->encoding != NULL) && (!IS_BLANK(CUR))) {
4002: if ((CUR == '?') && (NXT(1) == '>')) {
4003: SKIP(2);
4004: return;
4005: }
4006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007: ctxt->sax->error(ctxt, "Blank needed here\n");
4008: ctxt->wellFormed = 0;
4009: }
4010: SKIP_BLANKS;
1.32 daniel 4011: ctxt->doc->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 4012:
1.42 daniel 4013: SKIP_BLANKS;
1.40 daniel 4014: if ((CUR == '?') && (NXT(1) == '>')) {
4015: SKIP(2);
4016: } else if (CUR == '>') {
1.31 daniel 4017: /* Deprecated old WD ... */
1.55 daniel 4018: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4019: ctxt->sax->error(ctxt, "XML declaration must end-up with '?>'\n");
1.59 daniel 4020: ctxt->wellFormed = 0;
1.40 daniel 4021: NEXT;
1.29 daniel 4022: } else {
1.55 daniel 4023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4024: ctxt->sax->error(ctxt, "parsing XML declaration: '?>' expected\n");
1.59 daniel 4025: ctxt->wellFormed = 0;
1.40 daniel 4026: MOVETO_ENDTAG(CUR_PTR);
4027: NEXT;
1.29 daniel 4028: }
1.1 veillard 4029: }
4030:
1.50 daniel 4031: /**
4032: * xmlParseMisc:
4033: * @ctxt: an XML parser context
4034: *
4035: * parse an XML Misc* optionnal field.
1.21 daniel 4036: *
1.22 daniel 4037: * [27] Misc ::= Comment | PI | S
1.1 veillard 4038: */
4039:
1.55 daniel 4040: void
4041: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 4042: while (((CUR == '<') && (NXT(1) == '?')) ||
4043: ((CUR == '<') && (NXT(1) == '!') &&
4044: (NXT(2) == '-') && (NXT(3) == '-')) ||
4045: IS_BLANK(CUR)) {
4046: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 4047: xmlParsePI(ctxt);
1.40 daniel 4048: } else if (IS_BLANK(CUR)) {
4049: NEXT;
1.1 veillard 4050: } else
1.31 daniel 4051: xmlParseComment(ctxt, 0);
1.1 veillard 4052: }
4053: }
4054:
1.50 daniel 4055: /**
4056: * xmlParseDocument :
4057: * @ctxt: an XML parser context
4058: *
4059: * parse an XML document (and build a tree if using the standard SAX
4060: * interface).
1.21 daniel 4061: *
1.22 daniel 4062: * [1] document ::= prolog element Misc*
1.29 daniel 4063: *
4064: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 4065: *
4066: * return values: 0, -1 in case of error. the parser context is augmented
4067: * as a result of the parsing.
1.1 veillard 4068: */
4069:
1.55 daniel 4070: int
4071: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 4072: xmlDefaultSAXHandlerInit();
4073:
1.14 veillard 4074: /*
1.44 daniel 4075: * SAX: beginning of the document processing.
4076: */
4077: if (ctxt->sax)
4078: ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
4079: if (ctxt->sax)
4080: ctxt->sax->startDocument(ctxt);
4081:
4082: /*
1.14 veillard 4083: * We should check for encoding here and plug-in some
4084: * conversion code TODO !!!!
4085: */
1.1 veillard 4086:
4087: /*
4088: * Wipe out everything which is before the first '<'
4089: */
1.59 daniel 4090: if (IS_BLANK(CUR)) {
4091: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4092: ctxt->sax->error(ctxt,
4093: "Extra spaces at the beginning of the document are not allowed\n");
4094: ctxt->wellFormed = 0;
4095: SKIP_BLANKS;
4096: }
4097:
4098: if (CUR == 0) {
4099: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4100: ctxt->sax->error(ctxt, "Document is empty\n");
4101: ctxt->wellFormed = 0;
4102: }
1.1 veillard 4103:
4104: /*
4105: * Check for the XMLDecl in the Prolog.
4106: */
1.40 daniel 4107: if ((CUR == '<') && (NXT(1) == '?') &&
4108: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4109: (NXT(4) == 'l')) {
1.19 daniel 4110: xmlParseXMLDecl(ctxt);
4111: /* SKIP_EOL(cur); */
1.42 daniel 4112: SKIP_BLANKS;
1.40 daniel 4113: } else if ((CUR == '<') && (NXT(1) == '?') &&
4114: (NXT(2) == 'X') && (NXT(3) == 'M') &&
4115: (NXT(4) == 'L')) {
1.19 daniel 4116: /*
4117: * The first drafts were using <?XML and the final W3C REC
4118: * now use <?xml ...
4119: */
1.16 daniel 4120: xmlParseXMLDecl(ctxt);
1.1 veillard 4121: /* SKIP_EOL(cur); */
1.42 daniel 4122: SKIP_BLANKS;
1.1 veillard 4123: } else {
1.45 daniel 4124: CHAR *version;
4125:
4126: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4127: ctxt->doc = xmlNewDoc(version);
4128: free(version);
1.1 veillard 4129: }
4130:
4131: /*
4132: * The Misc part of the Prolog
4133: */
1.16 daniel 4134: xmlParseMisc(ctxt);
1.1 veillard 4135:
4136: /*
1.29 daniel 4137: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 4138: * (doctypedecl Misc*)?
4139: */
1.40 daniel 4140: if ((CUR == '<') && (NXT(1) == '!') &&
4141: (NXT(2) == 'D') && (NXT(3) == 'O') &&
4142: (NXT(4) == 'C') && (NXT(5) == 'T') &&
4143: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
4144: (NXT(8) == 'E')) {
1.22 daniel 4145: xmlParseDocTypeDecl(ctxt);
4146: xmlParseMisc(ctxt);
1.21 daniel 4147: }
4148:
4149: /*
4150: * Time to start parsing the tree itself
1.1 veillard 4151: */
1.45 daniel 4152: ctxt->doc->root = xmlParseElement(ctxt);
1.33 daniel 4153:
4154: /*
4155: * The Misc part at the end
4156: */
4157: xmlParseMisc(ctxt);
1.16 daniel 4158:
1.59 daniel 4159: if (CUR != 0) {
4160: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4161: ctxt->sax->error(ctxt,
4162: "Extra content at the end of the document\n");
4163: ctxt->wellFormed = 0;
4164: }
4165:
1.44 daniel 4166: /*
4167: * SAX: end of the document processing.
4168: */
4169: if (ctxt->sax)
4170: ctxt->sax->endDocument(ctxt);
1.59 daniel 4171: if (! ctxt->wellFormed) return(-1);
1.16 daniel 4172: return(0);
4173: }
4174:
1.50 daniel 4175: /**
1.55 daniel 4176: * xmlSAXParseDoc :
4177: * @sax: the SAX handler block
1.50 daniel 4178: * @cur: a pointer to an array of CHAR
1.59 daniel 4179: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4180: * documents
1.50 daniel 4181: *
4182: * parse an XML in-memory document and build a tree.
1.55 daniel 4183: * It use the given SAX function block to handle the parsing callback.
4184: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 4185: *
4186: * return values: the resulting document tree
1.16 daniel 4187: */
4188:
1.59 daniel 4189: xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
1.16 daniel 4190: xmlDocPtr ret;
4191: xmlParserCtxtPtr ctxt;
1.40 daniel 4192: xmlParserInputPtr input;
1.16 daniel 4193:
4194: if (cur == NULL) return(NULL);
1.1 veillard 4195:
1.16 daniel 4196: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4197: if (ctxt == NULL) {
4198: perror("malloc");
4199: return(NULL);
4200: }
1.40 daniel 4201: xmlInitParserCtxt(ctxt);
1.56 daniel 4202: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4203: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4204: if (input == NULL) {
4205: perror("malloc");
4206: free(ctxt);
4207: return(NULL);
4208: }
4209:
4210: input->filename = NULL;
4211: input->line = 1;
4212: input->col = 1;
4213: input->base = cur;
4214: input->cur = cur;
4215:
4216: inputPush(ctxt, input);
1.16 daniel 4217:
4218:
4219: xmlParseDocument(ctxt);
1.59 daniel 4220: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4221: else {
4222: ret = NULL;
4223: xmlFreeDoc(ctxt->doc);
4224: ctxt->doc = NULL;
4225: }
1.50 daniel 4226: free(ctxt->nodeTab);
4227: free(ctxt->inputTab);
4228: if (input->filename != NULL)
1.51 daniel 4229: free((char *)input->filename);
1.50 daniel 4230: free(input);
1.16 daniel 4231: free(ctxt);
4232:
1.1 veillard 4233: return(ret);
4234: }
4235:
1.50 daniel 4236: /**
1.55 daniel 4237: * xmlParseDoc :
4238: * @cur: a pointer to an array of CHAR
4239: *
4240: * parse an XML in-memory document and build a tree.
4241: *
4242: * return values: the resulting document tree
4243: */
4244:
4245: xmlDocPtr xmlParseDoc(CHAR *cur) {
1.59 daniel 4246: return(xmlSAXParseDoc(NULL, cur, 0));
4247: }
4248:
4249: /**
4250: * xmlRecoverDoc :
4251: * @cur: a pointer to an array of CHAR
4252: *
4253: * parse an XML in-memory document and build a tree.
4254: * In the case the document is not Well Formed, a tree is built anyway
4255: *
4256: * return values: the resulting document tree
4257: */
4258:
4259: xmlDocPtr xmlRecoverDoc(CHAR *cur) {
4260: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 4261: }
4262:
4263: /**
4264: * xmlSAXParseFile :
4265: * @sax: the SAX handler block
1.50 daniel 4266: * @filename: the filename
1.59 daniel 4267: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4268: * documents
1.50 daniel 4269: *
4270: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4271: * compressed document is provided by default if found at compile-time.
1.55 daniel 4272: * It use the given SAX function block to handle the parsing callback.
4273: * If sax is NULL, fallback to the default DOM tree building routines.
1.50 daniel 4274: *
4275: * return values: the resulting document tree
1.9 httpng 4276: */
4277:
1.59 daniel 4278: xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
4279: int recovery) {
1.9 httpng 4280: xmlDocPtr ret;
1.20 daniel 4281: #ifdef HAVE_ZLIB_H
4282: gzFile input;
4283: #else
1.9 httpng 4284: int input;
1.20 daniel 4285: #endif
1.9 httpng 4286: int res;
1.55 daniel 4287: int len;
1.9 httpng 4288: struct stat buf;
4289: char *buffer;
1.16 daniel 4290: xmlParserCtxtPtr ctxt;
1.40 daniel 4291: xmlParserInputPtr inputStream;
1.9 httpng 4292:
1.11 veillard 4293: res = stat(filename, &buf);
1.9 httpng 4294: if (res < 0) return(NULL);
4295:
1.20 daniel 4296: #ifdef HAVE_ZLIB_H
1.55 daniel 4297: len = (buf.st_size * 8) + 1000;
1.20 daniel 4298: retry_bigger:
1.55 daniel 4299: buffer = malloc(len);
1.20 daniel 4300: #else
1.55 daniel 4301: len = buf.st_size + 100;
4302: buffer = malloc(len);
1.20 daniel 4303: #endif
1.9 httpng 4304: if (buffer == NULL) {
4305: perror("malloc");
4306: return(NULL);
4307: }
4308:
1.55 daniel 4309: memset(buffer, 0, len);
1.20 daniel 4310: #ifdef HAVE_ZLIB_H
4311: input = gzopen (filename, "r");
4312: if (input == NULL) {
4313: fprintf (stderr, "Cannot read file %s :\n", filename);
4314: perror ("gzopen failed");
4315: return(NULL);
4316: }
4317: #else
1.9 httpng 4318: input = open (filename, O_RDONLY);
4319: if (input < 0) {
4320: fprintf (stderr, "Cannot read file %s :\n", filename);
4321: perror ("open failed");
4322: return(NULL);
4323: }
1.20 daniel 4324: #endif
4325: #ifdef HAVE_ZLIB_H
1.55 daniel 4326: res = gzread(input, buffer, len);
1.20 daniel 4327: #else
1.9 httpng 4328: res = read(input, buffer, buf.st_size);
1.20 daniel 4329: #endif
1.9 httpng 4330: if (res < 0) {
4331: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 4332: #ifdef HAVE_ZLIB_H
4333: perror ("gzread failed");
4334: #else
1.9 httpng 4335: perror ("read failed");
1.20 daniel 4336: #endif
1.9 httpng 4337: return(NULL);
4338: }
1.20 daniel 4339: #ifdef HAVE_ZLIB_H
4340: gzclose(input);
1.55 daniel 4341: if (res >= len) {
1.20 daniel 4342: free(buffer);
1.55 daniel 4343: len *= 2;
1.20 daniel 4344: goto retry_bigger;
4345: }
4346: buf.st_size = res;
4347: #else
1.9 httpng 4348: close(input);
1.20 daniel 4349: #endif
4350:
1.40 daniel 4351: buffer[buf.st_size] = '\0';
1.9 httpng 4352:
1.16 daniel 4353: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4354: if (ctxt == NULL) {
4355: perror("malloc");
4356: return(NULL);
4357: }
1.40 daniel 4358: xmlInitParserCtxt(ctxt);
1.56 daniel 4359: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4360: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4361: if (inputStream == NULL) {
4362: perror("malloc");
4363: free(ctxt);
4364: return(NULL);
4365: }
4366:
4367: inputStream->filename = strdup(filename);
4368: inputStream->line = 1;
4369: inputStream->col = 1;
1.45 daniel 4370:
4371: /*
4372: * TODO : plug some encoding conversion routines here. !!!
4373: */
1.40 daniel 4374: inputStream->base = buffer;
4375: inputStream->cur = buffer;
1.16 daniel 4376:
1.40 daniel 4377: inputPush(ctxt, inputStream);
1.16 daniel 4378:
4379: xmlParseDocument(ctxt);
1.40 daniel 4380:
1.59 daniel 4381: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4382: else {
4383: ret = NULL;
4384: xmlFreeDoc(ctxt->doc);
4385: ctxt->doc = NULL;
4386: }
1.9 httpng 4387: free(buffer);
1.50 daniel 4388: free(ctxt->nodeTab);
4389: free(ctxt->inputTab);
4390: if (inputStream->filename != NULL)
1.51 daniel 4391: free((char *)inputStream->filename);
1.50 daniel 4392: free(inputStream);
1.20 daniel 4393: free(ctxt);
4394:
4395: return(ret);
4396: }
4397:
1.55 daniel 4398: /**
4399: * xmlParseFile :
4400: * @filename: the filename
4401: *
4402: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4403: * compressed document is provided by default if found at compile-time.
4404: *
4405: * return values: the resulting document tree
4406: */
4407:
4408: xmlDocPtr xmlParseFile(const char *filename) {
1.59 daniel 4409: return(xmlSAXParseFile(NULL, filename, 0));
4410: }
4411:
4412: /**
4413: * xmlRecoverFile :
4414: * @filename: the filename
4415: *
4416: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4417: * compressed document is provided by default if found at compile-time.
4418: * In the case the document is not Well Formed, a tree is built anyway
4419: *
4420: * return values: the resulting document tree
4421: */
4422:
4423: xmlDocPtr xmlRecoverFile(const char *filename) {
4424: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 4425: }
1.32 daniel 4426:
1.50 daniel 4427: /**
1.55 daniel 4428: * xmlSAXParseMemory :
4429: * @sax: the SAX handler block
1.50 daniel 4430: * @cur: an pointer to a char array
4431: * @size: the siwe of the array
1.59 daniel 4432: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4433: * documents
1.50 daniel 4434: *
1.55 daniel 4435: * parse an XML in-memory block and use the given SAX function block
4436: * to handle the parsing callback. If sax is NULL, fallback to the default
4437: * DOM tree building routines.
1.50 daniel 4438: *
4439: * TODO : plug some encoding conversion routines here. !!!
4440: *
4441: * return values: the resulting document tree
1.20 daniel 4442: */
1.50 daniel 4443:
1.59 daniel 4444: xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size,
4445: int recovery) {
1.20 daniel 4446: xmlDocPtr ret;
4447: xmlParserCtxtPtr ctxt;
1.40 daniel 4448: xmlParserInputPtr input;
4449:
4450: buffer[size - 1] = '\0';
4451:
1.20 daniel 4452: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4453: if (ctxt == NULL) {
4454: perror("malloc");
4455: return(NULL);
4456: }
1.40 daniel 4457: xmlInitParserCtxt(ctxt);
1.56 daniel 4458: if (sax != NULL) ctxt->sax = sax;
1.40 daniel 4459: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4460: if (input == NULL) {
4461: perror("malloc");
1.50 daniel 4462: free(ctxt->nodeTab);
4463: free(ctxt->inputTab);
1.40 daniel 4464: free(ctxt);
4465: return(NULL);
4466: }
1.20 daniel 4467:
1.40 daniel 4468: input->filename = NULL;
4469: input->line = 1;
4470: input->col = 1;
1.45 daniel 4471:
4472: /*
4473: * TODO : plug some encoding conversion routines here. !!!
4474: */
1.40 daniel 4475: input->base = buffer;
4476: input->cur = buffer;
1.20 daniel 4477:
1.40 daniel 4478: inputPush(ctxt, input);
1.20 daniel 4479:
4480: xmlParseDocument(ctxt);
1.40 daniel 4481:
1.59 daniel 4482: if ((ctxt->wellFormed) || recovery) ret = ctxt->doc;
4483: else {
4484: ret = NULL;
4485: xmlFreeDoc(ctxt->doc);
4486: ctxt->doc = NULL;
4487: }
1.50 daniel 4488: free(ctxt->nodeTab);
4489: free(ctxt->inputTab);
4490: if (input->filename != NULL)
1.51 daniel 4491: free((char *)input->filename);
1.50 daniel 4492: free(input);
1.16 daniel 4493: free(ctxt);
4494:
1.9 httpng 4495: return(ret);
1.17 daniel 4496: }
4497:
1.55 daniel 4498: /**
4499: * xmlParseMemory :
4500: * @cur: an pointer to a char array
4501: * @size: the size of the array
4502: *
4503: * parse an XML in-memory block and build a tree.
4504: *
4505: * return values: the resulting document tree
4506: */
4507:
4508: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 4509: return(xmlSAXParseMemory(NULL, buffer, size, 0));
4510: }
4511:
4512: /**
4513: * xmlRecoverMemory :
4514: * @cur: an pointer to a char array
4515: * @size: the size of the array
4516: *
4517: * parse an XML in-memory block and build a tree.
4518: * In the case the document is not Well Formed, a tree is built anyway
4519: *
4520: * return values: the resulting document tree
4521: */
4522:
4523: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
4524: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.55 daniel 4525: }
1.17 daniel 4526:
1.50 daniel 4527: /**
4528: * xmlInitParserCtxt:
4529: * @ctxt: an XML parser context
4530: *
4531: * Initialize a parser context
4532: */
4533:
1.55 daniel 4534: void
4535: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4536: {
1.40 daniel 4537: /* Allocate the Input stack */
4538: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
4539: ctxt->inputNr = 0;
4540: ctxt->inputMax = 5;
4541: ctxt->input = NULL;
4542:
1.43 daniel 4543: /* Allocate the Node stack */
4544: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
4545: ctxt->nodeNr = 0;
4546: ctxt->nodeMax = 10;
4547: ctxt->node = NULL;
4548:
1.45 daniel 4549: ctxt->sax = &xmlDefaultSAXHandler;
1.32 daniel 4550: ctxt->doc = NULL;
1.59 daniel 4551: ctxt->wellFormed = 1;
1.32 daniel 4552: ctxt->record_info = 0;
4553: xmlInitNodeInfoSeq(&ctxt->node_seq);
1.17 daniel 4554: }
4555:
1.50 daniel 4556: /**
4557: * xmlClearParserCtxt:
4558: * @ctxt: an XML parser context
4559: *
4560: * Clear (release owned resources) and reinitialize a parser context
4561: */
1.17 daniel 4562:
1.55 daniel 4563: void
4564: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 4565: {
1.32 daniel 4566: xmlClearNodeInfoSeq(&ctxt->node_seq);
4567: xmlInitParserCtxt(ctxt);
1.17 daniel 4568: }
4569:
4570:
1.50 daniel 4571: /**
4572: * xmlSetupParserForBuffer:
4573: * @ctxt: an XML parser context
4574: * @buffer: a CHAR * buffer
4575: * @filename: a file name
4576: *
1.19 daniel 4577: * Setup the parser context to parse a new buffer; Clears any prior
4578: * contents from the parser context. The buffer parameter must not be
4579: * NULL, but the filename parameter can be
4580: */
1.50 daniel 4581:
1.55 daniel 4582: void
4583: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 4584: const char* filename)
4585: {
1.40 daniel 4586: xmlParserInputPtr input;
4587:
4588: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4589: if (input == NULL) {
4590: perror("malloc");
4591: free(ctxt);
4592: exit(1);
4593: }
4594:
1.17 daniel 4595: xmlClearParserCtxt(ctxt);
1.40 daniel 4596: if (input->filename != NULL)
4597: input->filename = strdup(filename);
4598: else
4599: input->filename = NULL;
4600: input->line = 1;
4601: input->col = 1;
4602: input->base = buffer;
4603: input->cur = buffer;
4604:
4605: inputPush(ctxt, input);
1.17 daniel 4606: }
4607:
1.32 daniel 4608:
1.50 daniel 4609: /**
4610: * xmlParserFindNodeInfo:
4611: * @ctxt: an XML parser context
4612: * @node: an XML node within the tree
4613: *
4614: * Find the parser node info struct for a given node
4615: *
4616: * return values: an xmlParserNodeInfo block pointer or NULL
1.32 daniel 4617: */
4618: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
4619: const xmlNode* node)
4620: {
4621: unsigned long pos;
4622:
4623: /* Find position where node should be at */
4624: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
4625: if ( ctx->node_seq.buffer[pos].node == node )
4626: return &ctx->node_seq.buffer[pos];
4627: else
4628: return NULL;
4629: }
4630:
4631:
1.50 daniel 4632: /**
4633: * xmlInitNodeInfoSeq :
4634: * @seq: a node info sequence pointer
4635: *
4636: * -- Initialize (set to initial state) node info sequence
1.32 daniel 4637: */
1.55 daniel 4638: void
4639: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4640: {
4641: seq->length = 0;
4642: seq->maximum = 0;
4643: seq->buffer = NULL;
4644: }
4645:
1.50 daniel 4646: /**
4647: * xmlClearNodeInfoSeq :
4648: * @seq: a node info sequence pointer
4649: *
4650: * -- Clear (release memory and reinitialize) node
1.32 daniel 4651: * info sequence
4652: */
1.55 daniel 4653: void
4654: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 4655: {
4656: if ( seq->buffer != NULL )
4657: free(seq->buffer);
4658: xmlInitNodeInfoSeq(seq);
4659: }
4660:
4661:
1.50 daniel 4662: /**
4663: * xmlParserFindNodeInfoIndex:
4664: * @seq: a node info sequence pointer
4665: * @node: an XML node pointer
4666: *
4667: *
1.32 daniel 4668: * xmlParserFindNodeInfoIndex : Find the index that the info record for
4669: * the given node is or should be at in a sorted sequence
1.50 daniel 4670: * return values: a long indicating the position of the record
1.32 daniel 4671: */
4672: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
4673: const xmlNode* node)
4674: {
4675: unsigned long upper, lower, middle;
4676: int found = 0;
4677:
4678: /* Do a binary search for the key */
4679: lower = 1;
4680: upper = seq->length;
4681: middle = 0;
4682: while ( lower <= upper && !found) {
4683: middle = lower + (upper - lower) / 2;
4684: if ( node == seq->buffer[middle - 1].node )
4685: found = 1;
4686: else if ( node < seq->buffer[middle - 1].node )
4687: upper = middle - 1;
4688: else
4689: lower = middle + 1;
4690: }
4691:
4692: /* Return position */
4693: if ( middle == 0 || seq->buffer[middle - 1].node < node )
4694: return middle;
4695: else
4696: return middle - 1;
4697: }
4698:
4699:
1.50 daniel 4700: /**
4701: * xmlParserAddNodeInfo:
4702: * @ctxt: an XML parser context
4703: * @seq: a node info sequence pointer
4704: *
4705: * Insert node info record into the sorted sequence
1.32 daniel 4706: */
1.55 daniel 4707: void
4708: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.32 daniel 4709: const xmlParserNodeInfo* info)
4710: {
4711: unsigned long pos;
4712: static unsigned int block_size = 5;
4713:
4714: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 4715: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
4716: if ( pos < ctxt->node_seq.length
4717: && ctxt->node_seq.buffer[pos].node == info->node ) {
4718: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 4719: }
4720:
4721: /* Otherwise, we need to add new node to buffer */
4722: else {
4723: /* Expand buffer by 5 if needed */
1.55 daniel 4724: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 4725: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 4726: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
4727: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 4728:
1.55 daniel 4729: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 4730: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
4731: else
1.55 daniel 4732: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 4733:
4734: if ( tmp_buffer == NULL ) {
1.55 daniel 4735: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.58 daniel 4736: ctxt->sax->error(ctxt, "Out of memory\n");
1.32 daniel 4737: return;
4738: }
1.55 daniel 4739: ctxt->node_seq.buffer = tmp_buffer;
4740: ctxt->node_seq.maximum += block_size;
1.32 daniel 4741: }
4742:
4743: /* If position is not at end, move elements out of the way */
1.55 daniel 4744: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 4745: unsigned long i;
4746:
1.55 daniel 4747: for ( i = ctxt->node_seq.length; i > pos; i-- )
4748: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 4749: }
4750:
4751: /* Copy element and increase length */
1.55 daniel 4752: ctxt->node_seq.buffer[pos] = *info;
4753: ctxt->node_seq.length++;
1.32 daniel 4754: }
4755: }
Webmaster